fix: remove arp wake up for nas, and make a limit how often wol packages are sent

This commit is contained in:
Dominik Polakovics Polakovics 2026-04-11 11:59:32 +02:00
parent 46f42dab4b
commit f3cecd422f

View file

@ -4,27 +4,32 @@
# packet so the machine comes back up on demand after it has powered itself # packet so the machine comes back up on demand after it has powered itself
# off (see hosts/nas/modules/auto-shutdown.nix). # off (see hosts/nas/modules/auto-shutdown.nix).
# #
# Traffic reaches the NAS via two paths, so we need two detectors that feed # Shape:
# the same wake script:
# #
# 1. Cross-VLAN traffic is routed through fw and hits nftables' forward # 1. Detector: nftables' forward chain logs packets headed to the NAS with
# chain. A logging rule tags these packets and a journal follower # a "nas-wake: " prefix. A journal follower translates each log line
# translates the log line into a wake invocation. # into a wake invocation. Only cross-VLAN traffic hits this path, which
# is exactly what we want — same-VLAN ARP bursts from stale neighbor
# entries are not user intent and must not wake the NAS.
# #
# 2. Same-VLAN (server) traffic stays on the bridge and never reaches # 2. Reachability probe: a systemd timer pings the NAS every 5s and
# nftables. A tcpdump follower watches ARP-who-has for 10.42.97.11 on # writes the current epoch to last-seen-up on success. The wake script
# the server interface and triggers the wake from there. # consults this timestamp and skips the WOL if the NAS was seen up
# recently — this both saves redundant WOLs and closes the UGREEN N100
# PSU-ramp-down race where a magic packet arriving right after
# poweroff makes the board boot into BIOS setup instead of the OS.
{ config, lib, pkgs, ... }: { config, lib, pkgs, ... }:
let let
nasIp = "${config.networkPrefix}.97.11"; nasIp = "${config.networkPrefix}.97.11";
nasMac = "6c:1f:f7:8e:a9:86"; nasMac = "6c:1f:f7:8e:a9:86";
serverBroadcast = "${config.networkPrefix}.97.255"; serverBroadcast = "${config.networkPrefix}.97.255";
serverIface = "server";
stateDir = "/run/nas-wake-on-access"; stateDir = "/run/nas-wake-on-access";
lastWakeFile = "${stateDir}/last-wake"; lastWakeFile = "${stateDir}/last-wake";
lastSeenFile = "${stateDir}/last-seen-up";
cooldownSeconds = 30; cooldownSeconds = 30;
holdoffSeconds = 60;
wakeScript = pkgs.writeShellScript "nas-wake" '' wakeScript = pkgs.writeShellScript "nas-wake" ''
set -euo pipefail set -euo pipefail
@ -33,20 +38,30 @@ let
now=$(date +%s) now=$(date +%s)
# Cooldown gate: at most one WOL every ${toString cooldownSeconds}s. # Cooldown gate: at most one WOL every ${toString cooldownSeconds}s.
# Second line of defense against burst triggers.
if [[ -f "${lastWakeFile}" ]]; then if [[ -f "${lastWakeFile}" ]]; then
last=$(cat "${lastWakeFile}" 2>/dev/null || echo 0) last_wake=$(cat "${lastWakeFile}" 2>/dev/null || echo 0)
if (( now - last < ${toString cooldownSeconds} )); then if (( now - last_wake < ${toString cooldownSeconds} )); then
echo "nas-wake: cooldown active ($((now - last_wake))s < ${toString cooldownSeconds}s), skipping WOL"
exit 0 exit 0
fi fi
fi fi
# If the NAS answers ping it is already up; skip WOL but refresh # Hold-off gate: if the NAS was probed up within the last
# the cooldown so repeated probes don't spin the CPU. # ${toString holdoffSeconds}s, skip WOL. Two cases covered:
if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then # a) NAS is still up — WOL would be wasted (harmless but noisy).
echo "nas-wake: NAS already up, not sending WOL" # b) NAS just started powering off — PSU is ramping down and a magic
echo "$now" > "${lastWakeFile}" # packet arriving now is the UGREEN BIOS-cold-boot race window.
# Missing file => first boot of fw or probe has never run; fall through
# and send the WOL (state unknown, default to waking).
if [[ -f "${lastSeenFile}" ]]; then
last_seen=$(cat "${lastSeenFile}" 2>/dev/null || echo 0)
age=$(( now - last_seen ))
if (( age < ${toString holdoffSeconds} )); then
echo "nas-wake: NAS seen up ''${age}s ago (< ${toString holdoffSeconds}s), skipping WOL"
exit 0 exit 0
fi fi
fi
echo "nas-wake: sending WOL to ${nasMac} via ${serverBroadcast}" echo "nas-wake: sending WOL to ${nasMac} via ${serverBroadcast}"
${pkgs.wol}/bin/wol -i ${serverBroadcast} ${nasMac} || true ${pkgs.wol}/bin/wol -i ${serverBroadcast} ${nasMac} || true
@ -65,17 +80,14 @@ let
done done
''; '';
# ARP follower for same-VLAN traffic. Clients on the server VLAN talk to # Periodic reachability probe. One-shot: ping the NAS, on success write
# the NAS directly via the bridge, so their packets never hit nftables. # the current epoch to lastSeenFile. On failure, leave the file alone so
# An ARP "who-has 10.42.97.11" is the reliable early signal that someone # the timestamp ages out naturally past holdoffSeconds.
# wants to reach the NAS. nasProbeScript = pkgs.writeShellScript "nas-probe" ''
arpFollowerScript = pkgs.writeShellScript "nas-wake-arp-follower" ''
set -euo pipefail set -euo pipefail
${pkgs.tcpdump}/bin/tcpdump -i ${serverIface} -l -n -p -Q in \ if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then
'arp and host ${nasIp}' \ date +%s > "${lastSeenFile}"
| while IFS= read -r _line; do fi
${wakeScript} || true
done
''; '';
in in
{ {
@ -84,27 +96,38 @@ in
after = [ "nftables.service" "systemd-journald.service" ]; after = [ "nftables.service" "systemd-journald.service" ];
requires = [ "systemd-journald.service" ]; requires = [ "systemd-journald.service" ];
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
path = with pkgs; [ coreutils iputils wol systemd gnugrep ]; path = with pkgs; [ coreutils wol systemd gnugrep ];
serviceConfig = { serviceConfig = {
Type = "simple"; Type = "simple";
ExecStart = "${journalFollowerScript}"; ExecStart = "${journalFollowerScript}";
Restart = "always"; Restart = "always";
RestartSec = "5s"; RestartSec = "5s";
RuntimeDirectory = "nas-wake-on-access";
RuntimeDirectoryPreserve = "yes";
}; };
}; };
systemd.services.nas-wake-arp = { systemd.services.nas-probe = {
description = "Wake NAS on same-VLAN ARP (server bridge)"; description = "Probe NAS reachability for wake-on-access hold-off";
after = [ "network-online.target" ]; after = [ "network-online.target" ];
wants = [ "network-online.target" ]; wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ]; path = with pkgs; [ coreutils iputils ];
path = with pkgs; [ coreutils iputils wol tcpdump ];
serviceConfig = { serviceConfig = {
Type = "simple"; Type = "oneshot";
ExecStart = "${arpFollowerScript}"; ExecStart = "${nasProbeScript}";
Restart = "always"; TimeoutStartSec = "3s";
RestartSec = "5s"; RuntimeDirectory = "nas-wake-on-access";
AmbientCapabilities = [ "CAP_NET_RAW" "CAP_NET_ADMIN" ]; RuntimeDirectoryPreserve = "yes";
};
};
systemd.timers.nas-probe = {
description = "Run NAS reachability probe every 5s";
wantedBy = [ "timers.target" ];
timerConfig = {
OnBootSec = "10s";
OnUnitActiveSec = "5s";
AccuracySec = "1s";
}; };
}; };
} }