From f3cecd422f6d7d13eda1e4d52e1631eed04b2d56 Mon Sep 17 00:00:00 2001 From: Dominik Polakovics Date: Sat, 11 Apr 2026 11:59:32 +0200 Subject: [PATCH] fix: remove arp wake up for nas, and make a limit how often wol packages are sent --- hosts/fw/modules/nas-wake-on-access.nix | 97 +++++++++++++++---------- 1 file changed, 60 insertions(+), 37 deletions(-) diff --git a/hosts/fw/modules/nas-wake-on-access.nix b/hosts/fw/modules/nas-wake-on-access.nix index c44eef8..7b6cea9 100644 --- a/hosts/fw/modules/nas-wake-on-access.nix +++ b/hosts/fw/modules/nas-wake-on-access.nix @@ -4,27 +4,32 @@ # packet so the machine comes back up on demand after it has powered itself # off (see hosts/nas/modules/auto-shutdown.nix). # -# Traffic reaches the NAS via two paths, so we need two detectors that feed -# the same wake script: +# Shape: # -# 1. Cross-VLAN traffic is routed through fw and hits nftables' forward -# chain. A logging rule tags these packets and a journal follower -# translates the log line into a wake invocation. +# 1. Detector: nftables' forward chain logs packets headed to the NAS with +# a "nas-wake: " prefix. A journal follower translates each log line +# into a wake invocation. Only cross-VLAN traffic hits this path, which +# is exactly what we want — same-VLAN ARP bursts from stale neighbor +# entries are not user intent and must not wake the NAS. # -# 2. Same-VLAN (server) traffic stays on the bridge and never reaches -# nftables. A tcpdump follower watches ARP-who-has for 10.42.97.11 on -# the server interface and triggers the wake from there. +# 2. Reachability probe: a systemd timer pings the NAS every 5s and +# writes the current epoch to last-seen-up on success. The wake script +# consults this timestamp and skips the WOL if the NAS was seen up +# recently — this both saves redundant WOLs and closes the UGREEN N100 +# PSU-ramp-down race where a magic packet arriving right after +# poweroff makes the board boot into BIOS setup instead of the OS. { config, lib, pkgs, ... }: let nasIp = "${config.networkPrefix}.97.11"; nasMac = "6c:1f:f7:8e:a9:86"; serverBroadcast = "${config.networkPrefix}.97.255"; - serverIface = "server"; stateDir = "/run/nas-wake-on-access"; lastWakeFile = "${stateDir}/last-wake"; + lastSeenFile = "${stateDir}/last-seen-up"; cooldownSeconds = 30; + holdoffSeconds = 60; wakeScript = pkgs.writeShellScript "nas-wake" '' set -euo pipefail @@ -33,19 +38,29 @@ let now=$(date +%s) # Cooldown gate: at most one WOL every ${toString cooldownSeconds}s. + # Second line of defense against burst triggers. if [[ -f "${lastWakeFile}" ]]; then - last=$(cat "${lastWakeFile}" 2>/dev/null || echo 0) - if (( now - last < ${toString cooldownSeconds} )); then + last_wake=$(cat "${lastWakeFile}" 2>/dev/null || echo 0) + if (( now - last_wake < ${toString cooldownSeconds} )); then + echo "nas-wake: cooldown active ($((now - last_wake))s < ${toString cooldownSeconds}s), skipping WOL" exit 0 fi fi - # If the NAS answers ping it is already up; skip WOL but refresh - # the cooldown so repeated probes don't spin the CPU. - if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then - echo "nas-wake: NAS already up, not sending WOL" - echo "$now" > "${lastWakeFile}" - exit 0 + # Hold-off gate: if the NAS was probed up within the last + # ${toString holdoffSeconds}s, skip WOL. Two cases covered: + # a) NAS is still up — WOL would be wasted (harmless but noisy). + # b) NAS just started powering off — PSU is ramping down and a magic + # packet arriving now is the UGREEN BIOS-cold-boot race window. + # Missing file => first boot of fw or probe has never run; fall through + # and send the WOL (state unknown, default to waking). + if [[ -f "${lastSeenFile}" ]]; then + last_seen=$(cat "${lastSeenFile}" 2>/dev/null || echo 0) + age=$(( now - last_seen )) + if (( age < ${toString holdoffSeconds} )); then + echo "nas-wake: NAS seen up ''${age}s ago (< ${toString holdoffSeconds}s), skipping WOL" + exit 0 + fi fi echo "nas-wake: sending WOL to ${nasMac} via ${serverBroadcast}" @@ -65,17 +80,14 @@ let done ''; - # ARP follower for same-VLAN traffic. Clients on the server VLAN talk to - # the NAS directly via the bridge, so their packets never hit nftables. - # An ARP "who-has 10.42.97.11" is the reliable early signal that someone - # wants to reach the NAS. - arpFollowerScript = pkgs.writeShellScript "nas-wake-arp-follower" '' + # Periodic reachability probe. One-shot: ping the NAS, on success write + # the current epoch to lastSeenFile. On failure, leave the file alone so + # the timestamp ages out naturally past holdoffSeconds. + nasProbeScript = pkgs.writeShellScript "nas-probe" '' set -euo pipefail - ${pkgs.tcpdump}/bin/tcpdump -i ${serverIface} -l -n -p -Q in \ - 'arp and host ${nasIp}' \ - | while IFS= read -r _line; do - ${wakeScript} || true - done + if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then + date +%s > "${lastSeenFile}" + fi ''; in { @@ -84,27 +96,38 @@ in after = [ "nftables.service" "systemd-journald.service" ]; requires = [ "systemd-journald.service" ]; wantedBy = [ "multi-user.target" ]; - path = with pkgs; [ coreutils iputils wol systemd gnugrep ]; + path = with pkgs; [ coreutils wol systemd gnugrep ]; serviceConfig = { Type = "simple"; ExecStart = "${journalFollowerScript}"; Restart = "always"; RestartSec = "5s"; + RuntimeDirectory = "nas-wake-on-access"; + RuntimeDirectoryPreserve = "yes"; }; }; - systemd.services.nas-wake-arp = { - description = "Wake NAS on same-VLAN ARP (server bridge)"; + systemd.services.nas-probe = { + description = "Probe NAS reachability for wake-on-access hold-off"; after = [ "network-online.target" ]; wants = [ "network-online.target" ]; - wantedBy = [ "multi-user.target" ]; - path = with pkgs; [ coreutils iputils wol tcpdump ]; + path = with pkgs; [ coreutils iputils ]; serviceConfig = { - Type = "simple"; - ExecStart = "${arpFollowerScript}"; - Restart = "always"; - RestartSec = "5s"; - AmbientCapabilities = [ "CAP_NET_RAW" "CAP_NET_ADMIN" ]; + Type = "oneshot"; + ExecStart = "${nasProbeScript}"; + TimeoutStartSec = "3s"; + RuntimeDirectory = "nas-wake-on-access"; + RuntimeDirectoryPreserve = "yes"; + }; + }; + + systemd.timers.nas-probe = { + description = "Run NAS reachability probe every 5s"; + wantedBy = [ "timers.target" ]; + timerConfig = { + OnBootSec = "10s"; + OnUnitActiveSec = "5s"; + AccuracySec = "1s"; }; }; }