From 21c5c6dbd5d674c5147cd4964731fd168c757863 Mon Sep 17 00:00:00 2001 From: Dominik Polakovics Date: Mon, 5 Jan 2026 10:45:38 +0100 Subject: [PATCH 1/2] fix: alerting --- hosts/amzebs-01/configuration.nix | 3 ++ hosts/fw/configuration.nix | 3 ++ hosts/fw/modules/fwmetrics.nix | 29 ++----------------- hosts/mail/modules/metrics/default.nix | 3 ++ .../alerting/service/services_down.nix | 11 +++++-- utils/modules/victoriametrics/default.nix | 14 +++++++-- 6 files changed, 32 insertions(+), 31 deletions(-) diff --git a/hosts/amzebs-01/configuration.nix b/hosts/amzebs-01/configuration.nix index 700cc30..1485f94 100644 --- a/hosts/amzebs-01/configuration.nix +++ b/hosts/amzebs-01/configuration.nix @@ -60,6 +60,9 @@ }; }; + # Systemd services to monitor + services.victoriametrics.monitoredServices = [ "mysql" "nginx" "phpfpm-.*" ]; + # backups - adjust repo for this host borgbackup.repo = "u149513-sub10@u149513-sub10.your-backup.de:borg"; diff --git a/hosts/fw/configuration.nix b/hosts/fw/configuration.nix index e3f8115..493d50f 100644 --- a/hosts/fw/configuration.nix +++ b/hosts/fw/configuration.nix @@ -76,6 +76,9 @@ networkPrefix = "10.42"; + # Systemd services to monitor + services.victoriametrics.monitoredServices = [ "ai-mailer" "container@git" "microvm@git-runner-" ]; + nixpkgs.overlays = [ (import ./utils/overlays/packages.nix) ]; diff --git a/hosts/fw/modules/fwmetrics.nix b/hosts/fw/modules/fwmetrics.nix index 665775b..7ee2e9e 100644 --- a/hosts/fw/modules/fwmetrics.nix +++ b/hosts/fw/modules/fwmetrics.nix @@ -2,42 +2,19 @@ let configure_prom = builtins.toFile "prometheus.yml" '' scrape_configs: - # System metrics - - job_name: 'node' + - job_name: 'server' stream_parse: true static_configs: - targets: - ${config.networking.hostName}:9100 - - # Systemd service monitoring - - job_name: 'systemd' - metrics_path: /metrics - params: - collect[]: - - 'systemd.service.state' - - 'systemd.service.start_time_seconds' - - 'systemd.unit_file.state' - static_configs: - - targets: - - ${config.networking.hostName}:9100 - relabel_configs: - - source_labels: [__name__] - regex: 'node_systemd_unit_state' - action: keep - - source_labels: [name] - regex: '(ai-mailer|container@git|microvm@git-runner-).*\.service' - action: keep ''; in { sops.secrets.victoria-agent-env = { sopsFile = ../utils/modules/victoriametrics/secrets.yaml; }; - services.prometheus.exporters.node = { - enable = true; - enabledCollectors = [ "systemd" ]; - }; - + services.prometheus.exporters.node.enable = true; + systemd.services.export-fw-to-prometheus = { path = with pkgs; [victoriametrics]; enable = true; diff --git a/hosts/mail/modules/metrics/default.nix b/hosts/mail/modules/metrics/default.nix index 998283a..c355986 100644 --- a/hosts/mail/modules/metrics/default.nix +++ b/hosts/mail/modules/metrics/default.nix @@ -5,4 +5,7 @@ ./postfix-exporter.nix ./dovecot-exporter.nix ]; + + # Systemd services to monitor + services.victoriametrics.monitoredServices = [ "postfix" "dovecot" "openldap" "wireguard-wg_cloonar" ]; } \ No newline at end of file diff --git a/hosts/web-arm/modules/grafana/alerting/service/services_down.nix b/hosts/web-arm/modules/grafana/alerting/service/services_down.nix index bc2df22..8d04832 100644 --- a/hosts/web-arm/modules/grafana/alerting/service/services_down.nix +++ b/hosts/web-arm/modules/grafana/alerting/service/services_down.nix @@ -9,10 +9,10 @@ let { name = "OpenLDAP"; service = "openldap.service"; instance = "mail:9100"; } { name = "Gitea"; service = "container@git.service"; instance = "fw:9100"; } { name = "Gitea Runner"; service = "microvm@git-runner-1.service"; instance = "fw:9100"; } - { name = "WireGuard"; service = "wireguard-wg_cloonar.service"; instance = "mail:9100"; } + { name = "WireGuard"; service = "wireguard-wg_cloonar.service"; instance = "fw:9100"; } { name = "MySQL"; service = "mysql.service"; instance = "amzebs-01:9100"; } { name = "Nginx"; service = "nginx.service"; instance = "amzebs-01:9100"; } - { name = "PHP-FPM"; service = "phpfpm-.*\\.service"; instance = "amzebs-01:9100"; } + { name = "PHP-FPM"; service = "phpfpm-.*[.]service"; instance = "amzebs-01:9100"; } ]; # Extract host from instance (e.g., "fw:9100" -> "fw") @@ -25,12 +25,17 @@ let isRegex = svc: lib.hasInfix ".*" svc || lib.hasInfix "\\" svc; # Build the PromQL expression + # For regex patterns: use min() to alert if ANY matching service is down + # For single services: use OR vector(0) to handle missing metrics mkExpr = svc: let nameMatch = if isRegex svc.service then "name=~\"${svc.service}\"" else "name=\"${svc.service}\""; - in "node_systemd_unit_state{state=\"active\", ${nameMatch}, instance=\"${svc.instance}\"} OR on() vector(0)"; + baseQuery = "node_systemd_unit_state{state=\"active\", ${nameMatch}, instance=\"${svc.instance}\"}"; + in if isRegex svc.service + then "min(${baseQuery})" + else "${baseQuery} OR on() vector(0)"; mkServiceAlert = svc: { uid = mkUid svc.name; diff --git a/utils/modules/victoriametrics/default.nix b/utils/modules/victoriametrics/default.nix index a323699..bf04e4f 100644 --- a/utils/modules/victoriametrics/default.nix +++ b/utils/modules/victoriametrics/default.nix @@ -1,6 +1,9 @@ { config, lib, pkgs, ... }: with lib; let + cfg = config.services.victoriametrics; + serviceRegex = concatStringsSep "|" cfg.monitoredServices; + configure_prom = builtins.toFile "prometheus.yml" '' scrape_configs: # System metrics @@ -27,13 +30,20 @@ let regex: 'node_systemd_unit_state' action: keep - source_labels: [name] - regex: '(container@git|microvm@git-runner-|postfix|dovecot|openldap|wireguard-wg_cloonar).*\.service' + regex: '(${serviceRegex}).*\.service' action: keep - ${concatStringsSep "\n" config.services.victoriametrics.extraScrapeConfigs} + ${concatStringsSep "\n" cfg.extraScrapeConfigs} ''; in { options.services.victoriametrics = { + monitoredServices = mkOption { + type = types.listOf types.str; + default = []; + description = "List of systemd service name patterns to monitor (without .service suffix)"; + example = [ "mysql" "nginx" "phpfpm-.*" ]; + }; + extraScrapeConfigs = mkOption { type = types.listOf types.str; default = []; From 025adf414235b0515e0dd7d14cb62eef8fbf5699 Mon Sep 17 00:00:00 2001 From: Dominik Polakovics Date: Mon, 5 Jan 2026 10:45:45 +0100 Subject: [PATCH 2/2] feat: add project --- hosts/nb/users/configs/project_history | 1 + hosts/nb/users/dominik.nix | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hosts/nb/users/configs/project_history b/hosts/nb/users/configs/project_history index 16b2b75..b38e27f 100644 --- a/hosts/nb/users/configs/project_history +++ b/hosts/nb/users/configs/project_history @@ -13,6 +13,7 @@ /home/dominik/projects/scana11y/sa-core /home/dominik/projects/cloonar/cloonar-fit /home/dominik/projects/cloonar/ai-image-alt +/home/dominik/projects/cloonar/bookmap /home/dominik/projects/home-automation/lego-hetzner-bridge /home/dominik/projects/home-automation/ghetto-nixos diff --git a/hosts/nb/users/dominik.nix b/hosts/nb/users/dominik.nix index e91c9a0..42f58d8 100644 --- a/hosts/nb/users/dominik.nix +++ b/hosts/nb/users/dominik.nix @@ -619,6 +619,8 @@ in git clone gitea@git.cloonar.com:Cloonar/ldap2vcard.git ${persistHome}/projects/cloonar/ldap2vcard 2>/dev/null git clone gitea@git.cloonar.com:ScanA11y/sa-core.git ${persistHome}/projects/scana11y/sa-core 2>/dev/null git clone gitea@git.cloonar.com:Cloonar/ai-image-alt.git ${persistHome}/projects/cloonar/ai-image-alt 2>/dev/null + git clone gitea@git.cloonar.com:Cloonar/bookmap.git ${persistHome}/projects/cloonar/bookmap 2>/dev/null + git clone gitea@git.cloonar.com:dominik.polakovics/typo3-basic.git ${persistHome}/cloonar/typo3-basic 2>/dev/null git clone gitea@git.cloonar.com:renovate/renovate-config.git ${persistHome}/cloonar/renovate-config 2>/dev/null