diff --git a/hosts/amzebs-01/configuration.nix b/hosts/amzebs-01/configuration.nix index 1485f94..700cc30 100644 --- a/hosts/amzebs-01/configuration.nix +++ b/hosts/amzebs-01/configuration.nix @@ -60,9 +60,6 @@ }; }; - # Systemd services to monitor - services.victoriametrics.monitoredServices = [ "mysql" "nginx" "phpfpm-.*" ]; - # backups - adjust repo for this host borgbackup.repo = "u149513-sub10@u149513-sub10.your-backup.de:borg"; diff --git a/hosts/fw/configuration.nix b/hosts/fw/configuration.nix index 493d50f..e3f8115 100644 --- a/hosts/fw/configuration.nix +++ b/hosts/fw/configuration.nix @@ -76,9 +76,6 @@ networkPrefix = "10.42"; - # Systemd services to monitor - services.victoriametrics.monitoredServices = [ "ai-mailer" "container@git" "microvm@git-runner-" ]; - nixpkgs.overlays = [ (import ./utils/overlays/packages.nix) ]; diff --git a/hosts/fw/modules/fwmetrics.nix b/hosts/fw/modules/fwmetrics.nix index 7ee2e9e..665775b 100644 --- a/hosts/fw/modules/fwmetrics.nix +++ b/hosts/fw/modules/fwmetrics.nix @@ -2,19 +2,42 @@ let configure_prom = builtins.toFile "prometheus.yml" '' scrape_configs: - - job_name: 'server' + # System metrics + - job_name: 'node' stream_parse: true static_configs: - targets: - ${config.networking.hostName}:9100 + + # Systemd service monitoring + - job_name: 'systemd' + metrics_path: /metrics + params: + collect[]: + - 'systemd.service.state' + - 'systemd.service.start_time_seconds' + - 'systemd.unit_file.state' + static_configs: + - targets: + - ${config.networking.hostName}:9100 + relabel_configs: + - source_labels: [__name__] + regex: 'node_systemd_unit_state' + action: keep + - source_labels: [name] + regex: '(ai-mailer|container@git|microvm@git-runner-).*\.service' + action: keep ''; in { sops.secrets.victoria-agent-env = { sopsFile = ../utils/modules/victoriametrics/secrets.yaml; }; - services.prometheus.exporters.node.enable = true; - + services.prometheus.exporters.node = { + enable = true; + enabledCollectors = [ "systemd" ]; + }; + systemd.services.export-fw-to-prometheus = { path = with pkgs; [victoriametrics]; enable = true; diff --git a/hosts/mail/modules/metrics/default.nix b/hosts/mail/modules/metrics/default.nix index c355986..998283a 100644 --- a/hosts/mail/modules/metrics/default.nix +++ b/hosts/mail/modules/metrics/default.nix @@ -5,7 +5,4 @@ ./postfix-exporter.nix ./dovecot-exporter.nix ]; - - # Systemd services to monitor - services.victoriametrics.monitoredServices = [ "postfix" "dovecot" "openldap" "wireguard-wg_cloonar" ]; } \ No newline at end of file diff --git a/hosts/nb/users/configs/project_history b/hosts/nb/users/configs/project_history index b38e27f..16b2b75 100644 --- a/hosts/nb/users/configs/project_history +++ b/hosts/nb/users/configs/project_history @@ -13,7 +13,6 @@ /home/dominik/projects/scana11y/sa-core /home/dominik/projects/cloonar/cloonar-fit /home/dominik/projects/cloonar/ai-image-alt -/home/dominik/projects/cloonar/bookmap /home/dominik/projects/home-automation/lego-hetzner-bridge /home/dominik/projects/home-automation/ghetto-nixos diff --git a/hosts/nb/users/dominik.nix b/hosts/nb/users/dominik.nix index 42f58d8..e91c9a0 100644 --- a/hosts/nb/users/dominik.nix +++ b/hosts/nb/users/dominik.nix @@ -619,8 +619,6 @@ in git clone gitea@git.cloonar.com:Cloonar/ldap2vcard.git ${persistHome}/projects/cloonar/ldap2vcard 2>/dev/null git clone gitea@git.cloonar.com:ScanA11y/sa-core.git ${persistHome}/projects/scana11y/sa-core 2>/dev/null git clone gitea@git.cloonar.com:Cloonar/ai-image-alt.git ${persistHome}/projects/cloonar/ai-image-alt 2>/dev/null - git clone gitea@git.cloonar.com:Cloonar/bookmap.git ${persistHome}/projects/cloonar/bookmap 2>/dev/null - git clone gitea@git.cloonar.com:dominik.polakovics/typo3-basic.git ${persistHome}/cloonar/typo3-basic 2>/dev/null git clone gitea@git.cloonar.com:renovate/renovate-config.git ${persistHome}/cloonar/renovate-config 2>/dev/null diff --git a/hosts/web-arm/modules/grafana/alerting/service/services_down.nix b/hosts/web-arm/modules/grafana/alerting/service/services_down.nix index 8d04832..bc2df22 100644 --- a/hosts/web-arm/modules/grafana/alerting/service/services_down.nix +++ b/hosts/web-arm/modules/grafana/alerting/service/services_down.nix @@ -9,10 +9,10 @@ let { name = "OpenLDAP"; service = "openldap.service"; instance = "mail:9100"; } { name = "Gitea"; service = "container@git.service"; instance = "fw:9100"; } { name = "Gitea Runner"; service = "microvm@git-runner-1.service"; instance = "fw:9100"; } - { name = "WireGuard"; service = "wireguard-wg_cloonar.service"; instance = "fw:9100"; } + { name = "WireGuard"; service = "wireguard-wg_cloonar.service"; instance = "mail:9100"; } { name = "MySQL"; service = "mysql.service"; instance = "amzebs-01:9100"; } { name = "Nginx"; service = "nginx.service"; instance = "amzebs-01:9100"; } - { name = "PHP-FPM"; service = "phpfpm-.*[.]service"; instance = "amzebs-01:9100"; } + { name = "PHP-FPM"; service = "phpfpm-.*\\.service"; instance = "amzebs-01:9100"; } ]; # Extract host from instance (e.g., "fw:9100" -> "fw") @@ -25,17 +25,12 @@ let isRegex = svc: lib.hasInfix ".*" svc || lib.hasInfix "\\" svc; # Build the PromQL expression - # For regex patterns: use min() to alert if ANY matching service is down - # For single services: use OR vector(0) to handle missing metrics mkExpr = svc: let nameMatch = if isRegex svc.service then "name=~\"${svc.service}\"" else "name=\"${svc.service}\""; - baseQuery = "node_systemd_unit_state{state=\"active\", ${nameMatch}, instance=\"${svc.instance}\"}"; - in if isRegex svc.service - then "min(${baseQuery})" - else "${baseQuery} OR on() vector(0)"; + in "node_systemd_unit_state{state=\"active\", ${nameMatch}, instance=\"${svc.instance}\"} OR on() vector(0)"; mkServiceAlert = svc: { uid = mkUid svc.name; diff --git a/utils/modules/victoriametrics/default.nix b/utils/modules/victoriametrics/default.nix index bf04e4f..a323699 100644 --- a/utils/modules/victoriametrics/default.nix +++ b/utils/modules/victoriametrics/default.nix @@ -1,9 +1,6 @@ { config, lib, pkgs, ... }: with lib; let - cfg = config.services.victoriametrics; - serviceRegex = concatStringsSep "|" cfg.monitoredServices; - configure_prom = builtins.toFile "prometheus.yml" '' scrape_configs: # System metrics @@ -30,20 +27,13 @@ let regex: 'node_systemd_unit_state' action: keep - source_labels: [name] - regex: '(${serviceRegex}).*\.service' + regex: '(container@git|microvm@git-runner-|postfix|dovecot|openldap|wireguard-wg_cloonar).*\.service' action: keep - ${concatStringsSep "\n" cfg.extraScrapeConfigs} + ${concatStringsSep "\n" config.services.victoriametrics.extraScrapeConfigs} ''; in { options.services.victoriametrics = { - monitoredServices = mkOption { - type = types.listOf types.str; - default = []; - description = "List of systemd service name patterns to monitor (without .service suffix)"; - example = [ "mysql" "nginx" "phpfpm-.*" ]; - }; - extraScrapeConfigs = mkOption { type = types.listOf types.str; default = [];