mirror of git://bitreich.org/reed-alert
New syntax allowing the use of code in parameters
This commit is contained in:
parent
01a3f1cc34
commit
3f03224030
44
README
44
README
|
@ -144,7 +144,7 @@ Check if the actual number of processes of the system exceeds a specific limit.
|
|||
> Set the limit that will trigger an alert when exceeded.
|
||||
:limit INTEGER
|
||||
|
||||
Example : `(=> alert number-of-processes (:limit 200))`
|
||||
Example : `(=> alert number-of-processes :limit 200)`
|
||||
|
||||
|
||||
pid-running
|
||||
|
@ -154,7 +154,7 @@ Check if the PID number found in a .pid file is alive.
|
|||
> Set the path of the pid file. If $USER doesn't have permission to open it, return "file not found".
|
||||
:path "STRING"
|
||||
|
||||
Example : `(=> alert pid-running (:path "/var/run/nginx.pid"))`
|
||||
Example : `(=> alert pid-running :path "/var/run/nginx.pid")`
|
||||
|
||||
|
||||
disk-usage
|
||||
|
@ -167,7 +167,7 @@ Check if the disk-usage of a chosen partition does exceed a specific limit.
|
|||
> Set the limit that will trigger an alert when exceeded.
|
||||
:limit INTEGER
|
||||
|
||||
Example : `(=> alert disk-usage (:path "/tmp" :limit 50))`
|
||||
Example : `(=> alert disk-usage :path "/tmp" :limit 50)`
|
||||
|
||||
|
||||
file-exists
|
||||
|
@ -177,7 +177,7 @@ Check if a file exists.
|
|||
> Set the path of the file to check.
|
||||
:path "STRING"
|
||||
|
||||
Example : `(=> alert file-exists (:path "/var/postgresql/standby"))`
|
||||
Example : `(=> alert file-exists :path "/var/postgresql/standby")`
|
||||
|
||||
|
||||
file-updated
|
||||
|
@ -190,7 +190,7 @@ Check if a file exists and has been updated since a defined time.
|
|||
> Set the limit in minutes since the last modification time before triggering an alert.
|
||||
:limit INTEGER
|
||||
|
||||
Example : `(=> alert file-updated (:path "/var/log/nginx/access.log" :limit 60))`
|
||||
Example : `(=> alert file-updated :path "/var/log/nginx/access.log" :limit 60)`
|
||||
|
||||
|
||||
load-average-1
|
||||
|
@ -200,7 +200,7 @@ Check if the load average during the last minute exceeds a specific limit.
|
|||
> Set the limit not to exceed.
|
||||
:limit INTEGER
|
||||
|
||||
Example : `(=> alert load-average-1 (:limit 2))`
|
||||
Example : `(=> alert load-average-1 :limit 2)`
|
||||
|
||||
|
||||
load-average-5
|
||||
|
@ -210,7 +210,7 @@ Check if the load average during the last five minutes exceeds a specific limit.
|
|||
> Set the limit not to exceed.
|
||||
:limit INTEGER
|
||||
|
||||
Example : `(=> alert load-average-5 (:limit 2))`
|
||||
Example : `(=> alert load-average-5 :limit 2)`
|
||||
|
||||
|
||||
load-average-15
|
||||
|
@ -220,7 +220,7 @@ Check if the load average during the last fifteen minutes exceeds a specific lim
|
|||
> Set the limit not to exceed.
|
||||
:limit INTEGER
|
||||
|
||||
Example : `(=> alert load-average-15 (:limit 2))`
|
||||
Example : `(=> alert load-average-15 :limit 2)`
|
||||
|
||||
|
||||
ping
|
||||
|
@ -230,7 +230,7 @@ Check if a remote host answers the 2 ICMP ping.
|
|||
> Set the host to ping. Return an error if ping command returns non-zero.
|
||||
:host "STRING" (can be IP or hostname)
|
||||
|
||||
Example : `(=> alert ping (:host "8.8.8.8"))`
|
||||
Example : `(=> alert ping :host "8.8.8.8")`
|
||||
|
||||
|
||||
command
|
||||
|
@ -241,7 +241,7 @@ This may be the most useful probe because it let the user do any check needed.
|
|||
> Command to execute, accept commands with pipes.
|
||||
:command "STRING"
|
||||
|
||||
Example : `(=> alert command (:command "tail -n 10 /var/log/messages | grep -v CRITICAL"))`
|
||||
Example : `(=> alert command :command "tail -n 10 /var/log/messages | grep -v CRITICAL")`
|
||||
|
||||
service
|
||||
-------
|
||||
|
@ -250,7 +250,7 @@ Check if a service is started on the system.
|
|||
> Set the name of the service to test
|
||||
:name STRING
|
||||
|
||||
Example : `(=> alert service (:name "mysql-server"))`
|
||||
Example : `(=> alert service :name "mysql-server")`
|
||||
|
||||
file-less-than
|
||||
--------------
|
||||
|
@ -262,7 +262,7 @@ Check if a file has a size less than a specified limit.
|
|||
> Set the limit in bytes before triggering an alert.
|
||||
:limit INTEGER
|
||||
|
||||
Example : `(=> alert file-less-than (:path "/var/log/nginx.log" :limit 60))`
|
||||
Example : `(=> alert file-less-than :path "/var/log/nginx.log" :limit 60)`
|
||||
|
||||
|
||||
The configuration file
|
||||
|
@ -278,13 +278,13 @@ It's possible to write loops if you don't want to repeat code
|
|||
|
||||
(loop for host in '("bitreich.org" "dataswamp.org" "floodgap.com")
|
||||
do
|
||||
(=> mail ping (:host host)))
|
||||
(=> mail ping :host host))
|
||||
|
||||
or another example
|
||||
|
||||
(loop for service in '("smtpd" "nginx" "mysqld" "postgresql")
|
||||
do
|
||||
(=> mail service (:name service)))
|
||||
(=> mail service :name service))
|
||||
|
||||
and another example using rows from a file to check remote hosts
|
||||
|
||||
|
@ -292,7 +292,7 @@ and another example using rows from a file to check remote hosts
|
|||
(loop for line = (read-line stream nil)
|
||||
while line
|
||||
do
|
||||
(=> mail ping (:host line))))
|
||||
(=> mail ping :host line)))
|
||||
|
||||
|
||||
Conditional
|
||||
|
@ -310,9 +310,9 @@ router, probes requiring the router to work will trigger errors so we
|
|||
should skip them.
|
||||
|
||||
(stop-if-error
|
||||
(=> mail ping (:host "192.168.1.1" :desc "My local router"))
|
||||
(=> mail ping (:host "89.89.89.89" :desc "My ISP DNS server"))
|
||||
(=> mail ping (:host "kernel.org" :desc "Remote website")))
|
||||
(=> mail ping :host "192.168.1.1" :desc "My local router")
|
||||
(=> mail ping :host "89.89.89.89" :desc "My ISP DNS server")
|
||||
(=> mail ping :host "kernel.org" :desc "Remote website"))
|
||||
|
||||
Note : stop-if-error is an alias for the **and** function.
|
||||
|
||||
|
@ -326,10 +326,10 @@ the detection and fixing it. You could want to receive a mail when
|
|||
things need to be fixed on spare time, but mail another people if
|
||||
things aren't fixed after some level.
|
||||
|
||||
(escalation
|
||||
(=> mail-me disk-usage (:path "/" :limit 70))
|
||||
(=> sms-me disk-usage (:path "/" :limit 90))
|
||||
(=> buzzer disk-usage (:path "/" :limit 98)))
|
||||
(escalation
|
||||
(=> mail-me disk-usage :path "/" :limit 70)
|
||||
(=> sms-me disk-usage :path "/" :limit 90)
|
||||
(=> buzzer disk-usage :path "/" :limit 98))
|
||||
|
||||
In this example, we check the disk usage, I will get a mail through
|
||||
"mail-me" alert if the disk usage go get more than 70%. Once it goes
|
||||
|
|
|
@ -3,17 +3,17 @@
|
|||
(alert mail "echo -n 'Problem with %function% %date% %params%' | mail -s alarm mail@isp.net")
|
||||
(alert sms "/home/user/sms.sh '%date% %function% %params% %hostname%")
|
||||
(alert available-variables "REMINDER : %function% %params% %date% %hostname% %desc% %level% %os% %newline% %result%")
|
||||
(alert void "")
|
||||
(alert empty "")
|
||||
|
||||
|
||||
;; this is a comment
|
||||
; this is also a comment
|
||||
(=> mail disk-usage (:path "/" :limit 90))
|
||||
(=> mail disk-usage :path "/" :limit 90)
|
||||
|
||||
(=> mail service (:name "dovecot"))
|
||||
(=> mail service (:name "httpd"))
|
||||
(=> sms service (:name "smtpd"))
|
||||
(=> mail number-of-processes (:limit 100))
|
||||
(=> mail service :name "dovecot")
|
||||
(=> mail service :name "httpd")
|
||||
(=> sms service :name "smtpd")
|
||||
(=> mail number-of-processes :limit 100)
|
||||
|
||||
(=> mail ping (:host "bitreich.org" :desc "Ping Bitreich"))
|
||||
(=> mail ping (:host "openbsd.org" :desc "Ping OpenBSD.org"))
|
||||
(=> mail ping :host "bitreich.org" :desc "Ping Bitreich")
|
||||
(=> mail ping :host "openbsd.org" :desc "Ping OpenBSD.org")
|
||||
|
|
56
example.lisp
56
example.lisp
|
@ -1,58 +1,62 @@
|
|||
(load "functions.lisp")
|
||||
|
||||
(alert dont-use-it "REMINDER %function% %params% %date% %hostname% %desc% %level% %os% %newline% _ %space% %result%")
|
||||
(alert void "")
|
||||
(alert empty "")
|
||||
(alert mail "")
|
||||
(alert peroket "echo 'problem at %date% with %function% %params%'")
|
||||
(alert sms "echo -n '%date% %function% CRITICAL on %hostname%' | curl http://somewebservice")
|
||||
;(alert mail "echo -n '%date% %hostname% had problem on %function% %newline% %params% values %result% %newline%
|
||||
; %desc%' | mail -s '[Error] %function% - %hostname%' foo@bar.com")
|
||||
|
||||
|
||||
;; check if used percent :path partition is more than :limit
|
||||
(=> mail disk-usage (:path "/" :limit 90))
|
||||
(=> mail disk-usage (:path "/usr" :limit 85))
|
||||
(=> mail disk-usage (:path "/tmp" :limit 1)) ;; failure
|
||||
(=> peroket disk-usage :path "/" :limit 90)
|
||||
(=> peroket disk-usage :path "/usr" :limit 85)
|
||||
(=> peroket disk-usage :path "/tmp" :limit 0) ;; failure
|
||||
|
||||
;; check if :path file exists
|
||||
(=> mail file-exists (:path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd"))
|
||||
(=> void file-exists (:path "/non-existant-file")) ;; failure file not found
|
||||
(=> mail file-exists :path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd")
|
||||
(=> empty file-exists :path "/non-existant-file") ;; failure file not found
|
||||
|
||||
;; check if :path file exists and has been updated since :limit minutes
|
||||
(=> void file-updated (:path "/var/log/messages" :limit 400))
|
||||
(=> mail file-updated (:path "/bsd.rd" :limit 1 :desc "OpenBSD kernel")) ;; failure
|
||||
(=> empty file-updated :path "/var/log/messages" :limit 400)
|
||||
(=> mail file-updated :path "/bsd.rd" :limit 1 :desc "OpenBSD kernel") ;; failure
|
||||
|
||||
;; check if :path pid file process is running
|
||||
(=> mail pid-running (:path "/var/run/xdm.pid" :desc "XDM pid"))
|
||||
(=> mail pid-running (:path "/home/user/test.pid")) ;; failure
|
||||
(=> mail pid-running :path "/var/run/xdm.pid" :desc "XDM pid")
|
||||
(=> mail pid-running :path "/home/user/test.pid") ;; failure
|
||||
|
||||
;; check if number of processes on the system is more than :limit
|
||||
(=> mail number-of-processes (:limit 200))
|
||||
(=> mail number-of-processes (:limit 1)) ;; failure
|
||||
(=> mail number-of-processes :limit 200)
|
||||
(=> mail number-of-processes :limit 1) ;; failure
|
||||
|
||||
;; check if service is running
|
||||
(=> mail service (:name "httpd"))
|
||||
(=> mail service (:name "ospfd")) ;; failure : not started
|
||||
(=> mail service (:name "unknown")) ;; failure : not known
|
||||
(=> mail service :name "httpd")
|
||||
(=> mail service :name "ospfd") ;; failure : not started
|
||||
(=> mail service :name "unknown") ;; failure : not known
|
||||
|
||||
;; check if load average on (1/5/15) minutes is more than :limit
|
||||
(=> mail load-average-1 (:limit 4))
|
||||
;;(=> mail load-average-5 (:limit 2))
|
||||
;;(=> mail load-average-15 (:limit 1))
|
||||
(=> mail load-average-1 (:limit 0.2)) ;; should trigger error
|
||||
(=> mail load-average-1 :limit 4)
|
||||
;;(=> mail load-average-5 :limit 2)
|
||||
;;(=> mail load-average-15 :limit 1)
|
||||
(=> mail load-average-1 :limit 0.2) ;; should trigger error
|
||||
|
||||
;; check if :host host is reachable
|
||||
;;(=> mail ping (:host "8.8.8.8" :desc "Google DNS"))
|
||||
;;(=> void ping (:host "127.40.30.21" :desc "Certainly not used address")) ;; fail time out
|
||||
;;(=> mail ping :host "8.8.8.8" :desc "Google DNS")
|
||||
;;(=> empty ping :host "127.40.30.21" :desc "Certainly not used address") ;; fail time out
|
||||
(loop for host in (list "8.8.8.8" "8.8.4.4" "127.0.0.1")
|
||||
do
|
||||
(=> empty ping :host host))
|
||||
|
||||
;; check if :command command return 0 (success) or something else (error)
|
||||
(=> void command (:command "echo hello")) ;; success
|
||||
(=> void command (:command "ls /non-existent-file")) ;; fail
|
||||
(=> empty command :command "echo hello") ;; success
|
||||
(=> empty command :command "ls /non-existent-file") ;; fail
|
||||
|
||||
;; check if web page :url answer under :limit
|
||||
(=> void command (:command "curl -m 10 http://google.fr/"))
|
||||
(=> empty command :command "curl -m 10 http://google.fr/")
|
||||
|
||||
;; check if the web page :url contains the text regex :pattern
|
||||
(=> void command (:command "curl http://google.fr/ | grep html"))
|
||||
(=> void command (:command "curl http://google.fr/ | grep hello")) ;; error
|
||||
(=> empty command :command "curl http://google.fr/ | grep html")
|
||||
(=> empty command :command "curl http://google.fr/ | grep hello") ;; error
|
||||
|
||||
(quit)
|
||||
|
|
|
@ -25,7 +25,9 @@
|
|||
while pos)))
|
||||
|
||||
(defmacro create-probe(name &body code)
|
||||
`(progn (defun ,name(params) ,@code)))
|
||||
`(progn
|
||||
(defparameter ,name ',name)
|
||||
(defun ,name(params) ,@code)))
|
||||
|
||||
(defun get-file-size(path)
|
||||
(with-open-file (stream path)
|
||||
|
@ -38,7 +40,9 @@
|
|||
(list nil (format nil "return code = ~a" code)))))
|
||||
|
||||
(defmacro alert(name string)
|
||||
`(progn (push (list ',name ,string)
|
||||
`(progn
|
||||
(defparameter ,name ',name)
|
||||
(push (list ',name ,string)
|
||||
*alerts*)))
|
||||
|
||||
(defun trigger-alert(level function params result)
|
||||
|
@ -67,17 +71,16 @@
|
|||
`(progn
|
||||
(or ,@body)))
|
||||
|
||||
(defmacro =>(level fonction params)
|
||||
`(progn
|
||||
(format t "[~a~a ~20A~a] ~35A" *yellow* ',level ',fonction *white* (getf ',params :desc ',params))
|
||||
(let ((result (funcall ',fonction ',params)))
|
||||
(if (not (listp result))
|
||||
(progn
|
||||
(format t " => ~asuccess~a~%" *green* *white*)
|
||||
t)
|
||||
(progn
|
||||
(format t " => ~aerror~a~%" *red* *white*)
|
||||
(uiop:run-program (trigger-alert ',level ',fonction ',params (cadr result)) :output t)
|
||||
nil)))))
|
||||
(defun =>(level fonction &rest params)
|
||||
(format t "[~a~a ~20A~a] ~35A" *yellow* level fonction *white* (getf params :desc params))
|
||||
(let ((result (funcall fonction params)))
|
||||
(if (not (listp result))
|
||||
(progn
|
||||
(format t " => ~asuccess~a~%" *green* *white*)
|
||||
t)
|
||||
(progn
|
||||
(format t " => ~aerror~a~%" *red* *white*)
|
||||
(uiop:run-program (trigger-alert level fonction params (cadr result)) :output t)
|
||||
nil))))
|
||||
|
||||
(load "probes.lisp")
|
||||
|
|
Loading…
Reference in New Issue