fix: alerting
This commit is contained in:
@@ -9,10 +9,10 @@ let
|
||||
{ name = "OpenLDAP"; service = "openldap.service"; instance = "mail:9100"; }
|
||||
{ name = "Gitea"; service = "container@git.service"; instance = "fw:9100"; }
|
||||
{ name = "Gitea Runner"; service = "microvm@git-runner-1.service"; instance = "fw:9100"; }
|
||||
{ name = "WireGuard"; service = "wireguard-wg_cloonar.service"; instance = "mail:9100"; }
|
||||
{ name = "WireGuard"; service = "wireguard-wg_cloonar.service"; instance = "fw:9100"; }
|
||||
{ name = "MySQL"; service = "mysql.service"; instance = "amzebs-01:9100"; }
|
||||
{ name = "Nginx"; service = "nginx.service"; instance = "amzebs-01:9100"; }
|
||||
{ name = "PHP-FPM"; service = "phpfpm-.*\\.service"; instance = "amzebs-01:9100"; }
|
||||
{ name = "PHP-FPM"; service = "phpfpm-.*[.]service"; instance = "amzebs-01:9100"; }
|
||||
];
|
||||
|
||||
# Extract host from instance (e.g., "fw:9100" -> "fw")
|
||||
@@ -25,12 +25,17 @@ let
|
||||
isRegex = svc: lib.hasInfix ".*" svc || lib.hasInfix "\\" svc;
|
||||
|
||||
# Build the PromQL expression
|
||||
# For regex patterns: use min() to alert if ANY matching service is down
|
||||
# For single services: use OR vector(0) to handle missing metrics
|
||||
mkExpr = svc:
|
||||
let
|
||||
nameMatch = if isRegex svc.service
|
||||
then "name=~\"${svc.service}\""
|
||||
else "name=\"${svc.service}\"";
|
||||
in "node_systemd_unit_state{state=\"active\", ${nameMatch}, instance=\"${svc.instance}\"} OR on() vector(0)";
|
||||
baseQuery = "node_systemd_unit_state{state=\"active\", ${nameMatch}, instance=\"${svc.instance}\"}";
|
||||
in if isRegex svc.service
|
||||
then "min(${baseQuery})"
|
||||
else "${baseQuery} OR on() vector(0)";
|
||||
|
||||
mkServiceAlert = svc: {
|
||||
uid = mkUid svc.name;
|
||||
|
||||
Reference in New Issue
Block a user