This commit is contained in:
solene rapenne 2016-10-07 12:25:49 +02:00
commit 9410e05e37
3 changed files with 191 additions and 0 deletions

50
example.lisp Normal file
View File

@ -0,0 +1,50 @@
(defvar *alerts*
(list
'(dont-use-it ("REMINDER" function params date hostname description level os newline _ space result))
'(void nil)
'(mail nil)
'(sms ("echo -n '" date _ function " CRITICAL " hostname "' | curl http://somewebservice"))
'(mail ("echo -n '" date _ hostname " had problem on " function newline params _ " values " result newline
description "' | mail -s '[Error] " function " - " hostname "' foo@bar.com"))
'(with-plus ("echo -n '" + date + _ + hostname + " had problem on " + function + newline + params + newline
+ description + "' | mail -s '[Error] " + function + " - " + hostname + "' foo@bar.com"))))
(load "functions.lisp")
;; check if used percent :path partition is more than :limit
(=> mail disk-usage (:path "/" :limit 90))
(=> mail disk-usage (:path "/usr" :limit 85))
(=> mail disk-usage (:path "/tmp" :limit 1)) ;; failure
;; check if :path file exists
(=> mail file-exists (:path "/bsd.rd" :desc "OpenBSD kernel /bsd.rd"))
(=> void file-exists (:path "/non-existant-file")) ;; failure file not found
;; check if :path file exists and has been updated since :limit minutes
(=> void file-updated (:path "/var/log/messages" :limit 400))
(=> mail file-updated (:path "/bsd.rd" :limit 1 :desc "OpenBSD kernel")) ;; failure
;; check if :path pid file process is running
(=> mail pid-running (:path "/var/run/xdm.pid" :desc "XDM pid"))
(=> mail pid-running (:path "/home/user/test.pid")) ;; failure
;; check if number of processes on the system is more than :limit
(=> mail number-of-processes (:limit 200))
(=> mail number-of-processes (:limit 1)) ;; failure
;; check if load average on (1/5/15) minutes is more than :limit
(=> mail load-average-1 (:limit 4))
(=> mail load-average-5 (:limit 2))
(=> mail load-average-15 (:limit 1))
(=> mail load-average-1 (:limit 0.2)) ;; should trigger error
;; check if :host host is reachable
(=> mail ping (:host "8.8.8.8" :desc "Google DNS"))
(=> void ping (:host "2.3.4.256" :desc "Not valid ipv4 address")) ;; fail error
(=> void ping (:host "127.40.30.21" :desc "Certainly not used address")) ;; fail time out
;; check if :command command return 0 (success) or something else (error)
(=> void command (:command "echo hello")) ;; success
(=> void command (:command "ls /non-existent-file")) ;; fail
(quit)

49
functions.lisp Normal file
View File

@ -0,0 +1,49 @@
(load "probes.lisp")
(defun color(num1 num2)
(format nil "~a[~a;~am" #\Escape num1 num2))
(defparameter *red* (color 1 31))
(defparameter *white* (color 0 70))
(defparameter *green* (color 1 32))
(defparameter *yellow* (color 0 33))
(defun trigger-alert(level function params result)
(format nil "~{~a~}"
(mapcar #'(lambda(x)
(if (symbolp x)
(case x
(+ "")
(result result)
(hostname (machine-instance))
(date (multiple-value-bind
(second minute hour day month year)
(get-decoded-time)
(format nil "~a/~a/~a ~a:~a:~a" year month day hour minute second)))
(os (software-type))
(function function)
(space " ")
(_ " ")
(params params)
(desc (getf params :desc ""))
(newline #\Newline)
(level level))
x))
(cadr (assoc level *alerts*)))))
(defmacro stop-if-error(&body body)
`(progn
(and ,@body)))
(defmacro =>(level fonction params)
`(progn
(format t "[~a~a ~20A~a] ~35A" *yellow* ',level ',fonction *white* (getf ',params :desc ',params))
(let ((result (funcall ',fonction ',params)))
(if (not (listp result))
(progn
(format t " => ~asuccess~a~%" *green* *white*)
t)
(progn
(format t " => ~aerror~a~%" *red* *white*)
(uiop:run-program (trigger-alert ',level ',fonction ',params (cadr result)) :output t)
nil)))))

92
probes.lisp Normal file
View File

@ -0,0 +1,92 @@
(defmacro create-probe(name &body code)
`(progn (defun ,name(params) ,@code)))
(defun command-return-code(command)
(let ((code (nth-value 2 (uiop:run-program command :ignore-error-status t))))
(if (= 0 code)
t
(list nil (format nil "return code = ~a" code)))))
(create-probe
file-exists
(let ((result (probe-file (getf params :path))))
(if result
t
(list nil "file not found"))))
(create-probe
file-updated
(if (probe-file (getf params :path))
(with-open-file (file (getf params :path))
(let* ((write-date (file-write-date file))
(now (get-universal-time))
(result (floor (- now write-date) 60)))
(if (> (getf params :limit) result)
t
(list nil result))))
(list nil "file not found")))
(create-probe
pid-running
(if (probe-file (getf params :path))
(let ((pid-number (with-open-file (stream (getf params :path)) (read-line stream))))
(command-return-code (list "ps" "-p" pid-number)))
(list nil "file not found")))
(create-probe
disk-usage
(let* ((output (uiop:run-program (list "df" (getf params :path)) :output :lines)) (line (second output)))
(let ((percent-character-pos (position #\% line)))
(let ((used-disk
(parse-integer
(subseq line
(position #\Space line :end percent-character-pos :from-end t)
percent-character-pos))))
(if (< used-disk (getf params :limit))
t
(list nil "used-disk"))))))
(defun system-load(time)
(read-from-string
(let ((command (concatenate 'string
"uptime | awk '{ print $"
(princ-to-string time)
" }'")))
(uiop:run-program command :output :string))))
(create-probe
load-average-1
(let ((load (system-load 10)))
(if (< load (getf params :limit))
t
(list nil load))))
(create-probe
load-average-5
(let ((load (system-load 11)))
(if (< load (getf params :limit))
t
(list nil load))))
(create-probe
load-average-15
(let ((load (system-load 12)))
(if (< load (getf params :limit))
t
(list nil load))))
(create-probe
command
(command-return-code (getf params :command)))
(create-probe
ping
(command-return-code (list "ping" "-c2" (getf params :host))))
(create-probe
number-of-processes
(let* ((output (uiop:run-program (list "ps" "aux") :output :lines))
(result (length output)))
(if (> (getf params :limit) result)
t
(list nil result))))