From 731b8db5cbec48d0be44b22ee953386b0af9a7b8 Mon Sep 17 00:00:00 2001 From: Son NK <> Date: Sat, 3 Oct 2020 10:34:07 +0200 Subject: [PATCH] monitoring: alert when fails for 10 minutes --- monitoring.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/monitoring.py b/monitoring.py index e30498b9..4f1f1053 100644 --- a/monitoring.py +++ b/monitoring.py @@ -8,11 +8,13 @@ from app.models import Monitoring from server import create_app # the number of consecutive fails -# if more than 3 fails, alert +# if more than _max_nb_fails, alert # reset whenever the system comes back to normal # a system is considered fail if incoming_queue + active_queue > 50 _nb_failed = 0 +_max_nb_fails = 10 + def get_stats(): """Look at different metrics and alert appropriately""" @@ -35,7 +37,7 @@ def get_stats(): if incoming_queue + active_queue > 50: _nb_failed += 1 - if _nb_failed > 3: + if _nb_failed > _max_nb_fails: # reset _nb_failed = 0 @@ -59,5 +61,5 @@ if __name__ == "__main__": with app.app_context(): get_stats() - # 2 min - sleep(120) + # 1 min + sleep(60)