fix: alerting

This commit is contained in:
2026-01-05 10:45:38 +01:00
parent ed451e3b95
commit 21c5c6dbd5
6 changed files with 32 additions and 31 deletions

View File

@@ -9,10 +9,10 @@ let
{ name = "OpenLDAP"; service = "openldap.service"; instance = "mail:9100"; }
{ name = "Gitea"; service = "container@git.service"; instance = "fw:9100"; }
{ name = "Gitea Runner"; service = "microvm@git-runner-1.service"; instance = "fw:9100"; }
{ name = "WireGuard"; service = "wireguard-wg_cloonar.service"; instance = "mail:9100"; }
{ name = "WireGuard"; service = "wireguard-wg_cloonar.service"; instance = "fw:9100"; }
{ name = "MySQL"; service = "mysql.service"; instance = "amzebs-01:9100"; }
{ name = "Nginx"; service = "nginx.service"; instance = "amzebs-01:9100"; }
{ name = "PHP-FPM"; service = "phpfpm-.*\\.service"; instance = "amzebs-01:9100"; }
{ name = "PHP-FPM"; service = "phpfpm-.*[.]service"; instance = "amzebs-01:9100"; }
];
# Extract host from instance (e.g., "fw:9100" -> "fw")
@@ -25,12 +25,17 @@ let
isRegex = svc: lib.hasInfix ".*" svc || lib.hasInfix "\\" svc;
# Build the PromQL expression
# For regex patterns: use min() to alert if ANY matching service is down
# For single services: use OR vector(0) to handle missing metrics
mkExpr = svc:
let
nameMatch = if isRegex svc.service
then "name=~\"${svc.service}\""
else "name=\"${svc.service}\"";
in "node_systemd_unit_state{state=\"active\", ${nameMatch}, instance=\"${svc.instance}\"} OR on() vector(0)";
baseQuery = "node_systemd_unit_state{state=\"active\", ${nameMatch}, instance=\"${svc.instance}\"}";
in if isRegex svc.service
then "min(${baseQuery})"
else "${baseQuery} OR on() vector(0)";
mkServiceAlert = svc: {
uid = mkUid svc.name;