Reminder feature

This commit is contained in:
Solene Rapenne 2019-01-15 17:21:18 +01:00
parent a98fb0c772
commit bd55683140
2 changed files with 26 additions and 5 deletions

10
README
View File

@ -95,7 +95,8 @@ The Notification System
When a check return a failure, a previously defined notifier will be
called. This will be triggered only after reed-alert find **3**
failures (not more or less) in a row for this check, this is a default
failures (not more or less, but this can be changed globally by
modifying *tries* variable) in a row for this check, this is a default
value that can be changed per probe with the :try parameter as
explained later in this document. This is to prevent reed-alert to
spam notifications for a long time (number of failures very high, like
@ -108,6 +109,13 @@ reed-alert will use the notifier system when it reach its try number
and when the problem is fixed, so you know when it begins and when it
ends.
It is possible to be reminded about a failure every n tries by setting
the keyword :reminder and using a number. This is useful if you want
to be reminded from time to time if a problem is not fixed, using some
alerts like mails can be easily overlooked or lost in a huge mail
amount. The :reminder is a setting per check. For a global reminder
setting, one can set *reminder* variable.
reed-alert keep tracks of the count of failures with one file per
probe failing in the "states" folder. To ensure unique filenames, the
following format is used (+ means it's concatenated) :

View File

@ -3,6 +3,7 @@
(require 'asdf))
(defparameter *tries* 3)
(defparameter *reminder* 0)
(defparameter *alerts* '())
(defparameter *states-dir* "~/.reed-alert/states/")
(ensure-directories-exist *states-dir*)
@ -69,7 +70,10 @@
(defun trigger-alert(level function params result state)
(let* ((notifier-command (assoc level *alerts*))
(command-string (cadr notifier-command)))
(setf command-string (replace-all command-string "%state%" (if (eql 'error state) "Start" "End")))
(setf command-string (replace-all command-string "%state%" (cond
((eql state 'START) "Begin")
((eql state 'REMINDER) "Reminder")
(t "End"))))
(setf command-string (replace-all command-string "%result%" (format nil "~a" result)))
(setf command-string (replace-all command-string "%hostname%" (machine-instance)))
(setf command-string (replace-all command-string "%os%" (software-type)))
@ -84,7 +88,7 @@
(get-decoded-time)
(format nil "~a/~a/~a ~a:~a:~a" year month day hour minute second))))
command-string))
(defmacro stop-if-error(&body body)
`(progn
(and ,@body)))
@ -129,12 +133,21 @@
t)
;; failure handling
(let ((trigger-now? (= (+ 1 tries) (getf params :try *tries*)))) ; we add +1 because it's failing right now
(let ((trigger-now? (or
;; we add +1 to tries because it's failing right now
(and (= (+ 1 tries) (getf params :try *tries*))
'START) ;; it starts failing
;; if reminder is set and a valid value (> 0)
(when (< 0 (getf params :reminder *reminder*))
(and (= 0 (mod (+ 1 tries) (getf params :reminder *reminder*)))
'REMINDER))))) ;; do we need to remind it's failing?
(format t " => ~aerror (~a failure(s) before)~a~a~%" *red* tries *white* (if trigger-now? " NOTIFIED" ""))
;; more error than limit, send alert once
(when trigger-now?
(uiop:run-program (trigger-alert level fonction params (cadr result) 'error) :output t))
(uiop:run-program (trigger-alert level fonction params (cadr result) trigger-now?) :output t))
;; increment the number of tries by 1
(with-open-file (stream-out filepath :direction :output
:if-exists :supersede)