New syntax allowing the use of code in parameters

This commit is contained in:
Solene Rapenne 2018-01-11 15:03:46 +01:00
parent 01a3f1cc34
commit 3f03224030
4 changed files with 77 additions and 70 deletions

44
README
View File

@ -144,7 +144,7 @@ Check if the actual number of processes of the system exceeds a specific limit.
> Set the limit that will trigger an alert when exceeded.
:limit INTEGER
Example : `(=> alert number-of-processes (:limit 200))`
Example : `(=> alert number-of-processes :limit 200)`
pid-running
@ -154,7 +154,7 @@ Check if the PID number found in a .pid file is alive.
> Set the path of the pid file. If $USER doesn't have permission to open it, return "file not found".
:path "STRING"
Example : `(=> alert pid-running (:path "/var/run/nginx.pid"))`
Example : `(=> alert pid-running :path "/var/run/nginx.pid")`
disk-usage
@ -167,7 +167,7 @@ Check if the disk-usage of a chosen partition does exceed a specific limit.
> Set the limit that will trigger an alert when exceeded.
:limit INTEGER
Example : `(=> alert disk-usage (:path "/tmp" :limit 50))`
Example : `(=> alert disk-usage :path "/tmp" :limit 50)`
file-exists
@ -177,7 +177,7 @@ Check if a file exists.
> Set the path of the file to check.
:path "STRING"
Example : `(=> alert file-exists (:path "/var/postgresql/standby"))`
Example : `(=> alert file-exists :path "/var/postgresql/standby")`
file-updated
@ -190,7 +190,7 @@ Check if a file exists and has been updated since a defined time.
> Set the limit in minutes since the last modification time before triggering an alert.
:limit INTEGER
Example : `(=> alert file-updated (:path "/var/log/nginx/access.log" :limit 60))`
Example : `(=> alert file-updated :path "/var/log/nginx/access.log" :limit 60)`
load-average-1
@ -200,7 +200,7 @@ Check if the load average during the last minute exceeds a specific limit.
> Set the limit not to exceed.
:limit INTEGER
Example : `(=> alert load-average-1 (:limit 2))`
Example : `(=> alert load-average-1 :limit 2)`
load-average-5
@ -210,7 +210,7 @@ Check if the load average during the last five minutes exceeds a specific limit.
> Set the limit not to exceed.
:limit INTEGER
Example : `(=> alert load-average-5 (:limit 2))`
Example : `(=> alert load-average-5 :limit 2)`
load-average-15
@ -220,7 +220,7 @@ Check if the load average during the last fifteen minutes exceeds a specific lim
> Set the limit not to exceed.
:limit INTEGER
Example : `(=> alert load-average-15 (:limit 2))`
Example : `(=> alert load-average-15 :limit 2)`
ping
@ -230,7 +230,7 @@ Check if a remote host answers the 2 ICMP ping.
> Set the host to ping. Return an error if ping command returns non-zero.
:host "STRING" (can be IP or hostname)
Example : `(=> alert ping (:host "8.8.8.8"))`
Example : `(=> alert ping :host "8.8.8.8")`
command
@ -241,7 +241,7 @@ This may be the most useful probe because it let the user do any check needed.
> Command to execute, accept commands with pipes.
:command "STRING"
Example : `(=> alert command (:command "tail -n 10 /var/log/messages | grep -v CRITICAL"))`
Example : `(=> alert command :command "tail -n 10 /var/log/messages | grep -v CRITICAL")`
service
-------
@ -250,7 +250,7 @@ Check if a service is started on the system.
> Set the name of the service to test
:name STRING
Example : `(=> alert service (:name "mysql-server"))`
Example : `(=> alert service :name "mysql-server")`
file-less-than
--------------
@ -262,7 +262,7 @@ Check if a file has a size less than a specified limit.
> Set the limit in bytes before triggering an alert.
:limit INTEGER
Example : `(=> alert file-less-than (:path "/var/log/nginx.log" :limit 60))`
Example : `(=> alert file-less-than :path "/var/log/nginx.log" :limit 60)`
The configuration file
@ -278,13 +278,13 @@ It's possible to write loops if you don't want to repeat code
(loop for host in '("bitreich.org" "dataswamp.org" "floodgap.com")
do
(=> mail ping (:host host)))
(=> mail ping :host host))
or another example
(loop for service in '("smtpd" "nginx" "mysqld" "postgresql")
do
(=> mail service (:name service)))
(=> mail service :name service))
and another example using rows from a file to check remote hosts
@ -292,7 +292,7 @@ and another example using rows from a file to check remote hosts
(loop for line = (read-line stream nil)
while line
do
(=> mail ping (:host line))))
(=> mail ping :host line)))
Conditional
@ -310,9 +310,9 @@ router, probes requiring the router to work will trigger errors so we
should skip them.
(stop-if-error
(=> mail ping (:host "192.168.1.1" :desc "My local router"))
(=> mail ping (:host "89.89.89.89" :desc "My ISP DNS server"))
(=> mail ping (:host "kernel.org" :desc "Remote website")))
(=> mail ping :host "192.168.1.1" :desc "My local router")
(=> mail ping :host "89.89.89.89" :desc "My ISP DNS server")
(=> mail ping :host "kernel.org" :desc "Remote website"))
Note : stop-if-error is an alias for the **and** function.
@ -326,10 +326,10 @@ the detection and fixing it. You could want to receive a mail when
things need to be fixed on spare time, but mail another people if
things aren't fixed after some level.
(escalation
(=> mail-me disk-usage (:path "/" :limit 70))
(=> sms-me disk-usage (:path "/" :limit 90))
(=> buzzer disk-usage (:path "/" :limit 98)))
(escalation
(=> mail-me disk-usage :path "/" :limit 70)
(=> sms-me disk-usage :path "/" :limit 90)
(=> buzzer disk-usage :path "/" :limit 98))
In this example, we check the disk usage, I will get a mail through
"mail-me" alert if the disk usage go get more than 70%. Once it goes

View File

@ -3,17 +3,17 @@
(alert mail "echo -n 'Problem with %function% %date% %params%' | mail -s alarm mail@isp.net")
(alert sms "/home/user/sms.sh '%date% %function% %params% %hostname%")
(alert available-variables "REMINDER : %function% %params% %date% %hostname% %desc% %level% %os% %newline% %result%")
(alert void "")
(alert empty "")
;; this is a comment
; this is also a comment
(=> mail disk-usage (:path "/" :limit 90))
(=> mail disk-usage :path "/" :limit 90)
(=> mail service (:name "dovecot"))
(=> mail service (:name "httpd"))
(=> sms service (:name "smtpd"))
(=> mail number-of-processes (:limit 100))
(=> mail service :name "dovecot")
(=> mail service :name "httpd")
(=> sms service :name "smtpd")
(=> mail number-of-processes :limit 100)
(=> mail ping (:host "bitreich.org" :desc "Ping Bitreich"))
(=> mail ping (:host "openbsd.org" :desc "Ping OpenBSD.org"))
(=> mail ping :host "bitreich.org" :desc "Ping Bitreich")
(=> mail ping :host "openbsd.org" :desc "Ping OpenBSD.org")

View File

@ -1,58 +1,62 @@
(load "functions.lisp")
(alert dont-use-it "REMINDER %function% %params% %date% %hostname% %desc% %level% %os% %newline% _ %space% %result%")
(alert void "")
(alert empty "")
(alert mail "")
(alert peroket "echo 'problem at %date% with %function% %params%'")
(alert sms "echo -n '%date% %function% CRITICAL on %hostname%' | curl http://somewebservice")
;(alert mail "echo -n '%date% %hostname% had problem on %function% %newline% %params% values %result% %newline%
; %desc%' | mail -s '[Error] %function% - %hostname%' foo@bar.com")
;; check if used percent :path partition is more than :limit
(=> mail disk-usage (:path "/" :limit 90))
(=> mail disk-usage (:path "/usr" :limit 85))
(=> mail disk-usage (:path "/tmp" :limit 1)) ;; failure
(=> peroket disk-usage :path "/" :limit 90)
(=> peroket disk-usage :path "/usr" :limit 85)
(=> peroket disk-usage :path "/tmp" :limit 0) ;; failure
;; check if :path file exists
(=> mail file-exists (:path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd"))
(=> void file-exists (:path "/non-existant-file")) ;; failure file not found
(=> mail file-exists :path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd")
(=> empty file-exists :path "/non-existant-file") ;; failure file not found
;; check if :path file exists and has been updated since :limit minutes
(=> void file-updated (:path "/var/log/messages" :limit 400))
(=> mail file-updated (:path "/bsd.rd" :limit 1 :desc "OpenBSD kernel")) ;; failure
(=> empty file-updated :path "/var/log/messages" :limit 400)
(=> mail file-updated :path "/bsd.rd" :limit 1 :desc "OpenBSD kernel") ;; failure
;; check if :path pid file process is running
(=> mail pid-running (:path "/var/run/xdm.pid" :desc "XDM pid"))
(=> mail pid-running (:path "/home/user/test.pid")) ;; failure
(=> mail pid-running :path "/var/run/xdm.pid" :desc "XDM pid")
(=> mail pid-running :path "/home/user/test.pid") ;; failure
;; check if number of processes on the system is more than :limit
(=> mail number-of-processes (:limit 200))
(=> mail number-of-processes (:limit 1)) ;; failure
(=> mail number-of-processes :limit 200)
(=> mail number-of-processes :limit 1) ;; failure
;; check if service is running
(=> mail service (:name "httpd"))
(=> mail service (:name "ospfd")) ;; failure : not started
(=> mail service (:name "unknown")) ;; failure : not known
(=> mail service :name "httpd")
(=> mail service :name "ospfd") ;; failure : not started
(=> mail service :name "unknown") ;; failure : not known
;; check if load average on (1/5/15) minutes is more than :limit
(=> mail load-average-1 (:limit 4))
;;(=> mail load-average-5 (:limit 2))
;;(=> mail load-average-15 (:limit 1))
(=> mail load-average-1 (:limit 0.2)) ;; should trigger error
(=> mail load-average-1 :limit 4)
;;(=> mail load-average-5 :limit 2)
;;(=> mail load-average-15 :limit 1)
(=> mail load-average-1 :limit 0.2) ;; should trigger error
;; check if :host host is reachable
;;(=> mail ping (:host "8.8.8.8" :desc "Google DNS"))
;;(=> void ping (:host "127.40.30.21" :desc "Certainly not used address")) ;; fail time out
;;(=> mail ping :host "8.8.8.8" :desc "Google DNS")
;;(=> empty ping :host "127.40.30.21" :desc "Certainly not used address") ;; fail time out
(loop for host in (list "8.8.8.8" "8.8.4.4" "127.0.0.1")
do
(=> empty ping :host host))
;; check if :command command return 0 (success) or something else (error)
(=> void command (:command "echo hello")) ;; success
(=> void command (:command "ls /non-existent-file")) ;; fail
(=> empty command :command "echo hello") ;; success
(=> empty command :command "ls /non-existent-file") ;; fail
;; check if web page :url answer under :limit
(=> void command (:command "curl -m 10 http://google.fr/"))
(=> empty command :command "curl -m 10 http://google.fr/")
;; check if the web page :url contains the text regex :pattern
(=> void command (:command "curl http://google.fr/ | grep html"))
(=> void command (:command "curl http://google.fr/ | grep hello")) ;; error
(=> empty command :command "curl http://google.fr/ | grep html")
(=> empty command :command "curl http://google.fr/ | grep hello") ;; error
(quit)

View File

@ -25,7 +25,9 @@
while pos)))
(defmacro create-probe(name &body code)
`(progn (defun ,name(params) ,@code)))
`(progn
(defparameter ,name ',name)
(defun ,name(params) ,@code)))
(defun get-file-size(path)
(with-open-file (stream path)
@ -38,7 +40,9 @@
(list nil (format nil "return code = ~a" code)))))
(defmacro alert(name string)
`(progn (push (list ',name ,string)
`(progn
(defparameter ,name ',name)
(push (list ',name ,string)
*alerts*)))
(defun trigger-alert(level function params result)
@ -67,17 +71,16 @@
`(progn
(or ,@body)))
(defmacro =>(level fonction params)
`(progn
(format t "[~a~a ~20A~a] ~35A" *yellow* ',level ',fonction *white* (getf ',params :desc ',params))
(let ((result (funcall ',fonction ',params)))
(if (not (listp result))
(progn
(format t " => ~asuccess~a~%" *green* *white*)
t)
(progn
(format t " => ~aerror~a~%" *red* *white*)
(uiop:run-program (trigger-alert ',level ',fonction ',params (cadr result)) :output t)
nil)))))
(defun =>(level fonction &rest params)
(format t "[~a~a ~20A~a] ~35A" *yellow* level fonction *white* (getf params :desc params))
(let ((result (funcall fonction params)))
(if (not (listp result))
(progn
(format t " => ~asuccess~a~%" *green* *white*)
t)
(progn
(format t " => ~aerror~a~%" *red* *white*)
(uiop:run-program (trigger-alert level fonction params (cadr result)) :output t)
nil))))
(load "probes.lisp")