feat: power management for nas

This commit is contained in:
Dominik Polakovics Polakovics 2026-04-11 09:31:43 +02:00
parent b02acb5b60
commit 46f42dab4b
5 changed files with 235 additions and 1 deletions

View file

@ -27,6 +27,7 @@
./modules/podman.nix
./modules/omada.nix
./modules/ddclient.nix
./modules/nas-wake-on-access.nix
# ./modules/wol.nix
@ -94,7 +95,6 @@
nixpkgs.config.allowUnfreePredicate = pkg: builtins.elem (lib.getName pkg) [
"mongodb"
"ai-mailer"
"filebot"
"claude-code"
];

View file

@ -85,6 +85,12 @@
chain forward {
type filter hook forward priority filter; policy drop;
# Wake-on-access: flag new traffic aimed at the NAS so
# nas-wake-journal.service can fire a WOL. No verdict => falls
# through, does not broaden policy. ct state new skips ongoing
# flows; rate limit caps journal spam before it leaves the kernel.
ip daddr ${config.networkPrefix}.97.11 ct state new limit rate 30/minute log prefix "nas-wake: " comment "trigger wake-on-access"
iifname "wg_cloonar" counter accept comment "test wireguard"
iifname "wg_cloonar" oifname lo counter accept comment "wireguard to server"

View file

@ -0,0 +1,110 @@
# NAS wake-on-access (fw side)
#
# Detects traffic aimed at the NAS (10.42.97.11) and sends a WOL magic
# packet so the machine comes back up on demand after it has powered itself
# off (see hosts/nas/modules/auto-shutdown.nix).
#
# Traffic reaches the NAS via two paths, so we need two detectors that feed
# the same wake script:
#
# 1. Cross-VLAN traffic is routed through fw and hits nftables' forward
# chain. A logging rule tags these packets and a journal follower
# translates the log line into a wake invocation.
#
# 2. Same-VLAN (server) traffic stays on the bridge and never reaches
# nftables. A tcpdump follower watches ARP-who-has for 10.42.97.11 on
# the server interface and triggers the wake from there.
{ config, lib, pkgs, ... }:
let
nasIp = "${config.networkPrefix}.97.11";
nasMac = "6c:1f:f7:8e:a9:86";
serverBroadcast = "${config.networkPrefix}.97.255";
serverIface = "server";
stateDir = "/run/nas-wake-on-access";
lastWakeFile = "${stateDir}/last-wake";
cooldownSeconds = 30;
wakeScript = pkgs.writeShellScript "nas-wake" ''
set -euo pipefail
mkdir -p "${stateDir}"
now=$(date +%s)
# Cooldown gate: at most one WOL every ${toString cooldownSeconds}s.
if [[ -f "${lastWakeFile}" ]]; then
last=$(cat "${lastWakeFile}" 2>/dev/null || echo 0)
if (( now - last < ${toString cooldownSeconds} )); then
exit 0
fi
fi
# If the NAS answers ping it is already up; skip WOL but refresh
# the cooldown so repeated probes don't spin the CPU.
if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then
echo "nas-wake: NAS already up, not sending WOL"
echo "$now" > "${lastWakeFile}"
exit 0
fi
echo "nas-wake: sending WOL to ${nasMac} via ${serverBroadcast}"
${pkgs.wol}/bin/wol -i ${serverBroadcast} ${nasMac} || true
echo "$now" > "${lastWakeFile}"
'';
# Journal follower for cross-VLAN (routed) traffic. nftables logs a line
# prefixed with "nas-wake: " into the kernel ring buffer for every new
# packet headed to the NAS (rate-limited kernel-side).
journalFollowerScript = pkgs.writeShellScript "nas-wake-journal-follower" ''
set -euo pipefail
${pkgs.systemd}/bin/journalctl -kf -o cat --since now \
| ${pkgs.gnugrep}/bin/grep --line-buffered -F "nas-wake:" \
| while IFS= read -r _line; do
${wakeScript} || true
done
'';
# ARP follower for same-VLAN traffic. Clients on the server VLAN talk to
# the NAS directly via the bridge, so their packets never hit nftables.
# An ARP "who-has 10.42.97.11" is the reliable early signal that someone
# wants to reach the NAS.
arpFollowerScript = pkgs.writeShellScript "nas-wake-arp-follower" ''
set -euo pipefail
${pkgs.tcpdump}/bin/tcpdump -i ${serverIface} -l -n -p -Q in \
'arp and host ${nasIp}' \
| while IFS= read -r _line; do
${wakeScript} || true
done
'';
in
{
systemd.services.nas-wake-journal = {
description = "Wake NAS on cross-VLAN traffic (nftables log follower)";
after = [ "nftables.service" "systemd-journald.service" ];
requires = [ "systemd-journald.service" ];
wantedBy = [ "multi-user.target" ];
path = with pkgs; [ coreutils iputils wol systemd gnugrep ];
serviceConfig = {
Type = "simple";
ExecStart = "${journalFollowerScript}";
Restart = "always";
RestartSec = "5s";
};
};
systemd.services.nas-wake-arp = {
description = "Wake NAS on same-VLAN ARP (server bridge)";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
path = with pkgs; [ coreutils iputils wol tcpdump ];
serviceConfig = {
Type = "simple";
ExecStart = "${arpFollowerScript}";
Restart = "always";
RestartSec = "5s";
AmbientCapabilities = [ "CAP_NET_RAW" "CAP_NET_ADMIN" ];
};
};
}

View file

@ -21,6 +21,7 @@ in
./modules/audiobookshelf.nix
./modules/power-management.nix
./modules/disk-monitoring.nix
./modules/auto-shutdown.nix
./modules/ugreen-leds.nix
./hardware-configuration.nix
@ -45,6 +46,10 @@ in
networking.firewall.enable = true;
networking.firewall.allowedTCPPorts = [ 22 ];
# Wake-on-LAN: fw re-wakes the NAS on demand after auto-shutdown.
# Assumes WOL is enabled in BIOS; translates to `ethtool -s enp2s0 wol g`.
networking.interfaces.enp2s0.wakeOnLan.enable = true;
# SOPS configuration
sops.age.sshKeyPaths = [ "/etc/ssh/ssh_host_ed25519_key" ];
sops.defaultSopsFile = ./secrets.yaml;

View file

@ -0,0 +1,113 @@
# NAS auto-shutdown
# Powers the machine off when all of the following are true:
# 1. No active SSH session
# 2. pyload is not downloading (no non-local TCP peers)
# 3. pyload has no hook children (extraction, filebot, unrar, ...)
# 4. Both spinning HDDs are in standby
# 5. At least 15 minutes have passed since boot (via OnBootSec)
# The fw host re-wakes the NAS on demand via WOL
# (see hosts/fw/modules/nas-wake-on-access.nix).
{ config, lib, pkgs, ... }:
let
# Spinning disks whose power state gates the shutdown decision.
# Only the Toshiba HDDs; NVMe drives do not spin down.
hdds = [
"/dev/disk/by-id/ata-TOSHIBA_MG10ACA20TE_8582A01SF4MJ"
"/dev/disk/by-id/ata-TOSHIBA_MG10ACA20TE_75V2A0H3F4MJ"
];
# Peer addresses that do not count as "real download traffic":
# loopback, internal VLAN (10.42.0.0/16), and IPv6 loopback / link-local.
localPeerRegex = "^(127\\.|10\\.42\\.|\\[::1\\]|\\[fe80)";
autoShutdownScript = pkgs.writeShellScript "nas-auto-shutdown" ''
set -euo pipefail
log() { echo "auto-shutdown: $*"; }
# 1. SSH sessions. Cheapest check and it prevents shutting down while an
# admin is logged in. Uses ss at the socket layer so it catches
# forwarding-only sessions and `ssh host 'cmd'` runs that utmp misses.
# NOTE: SSH port hardcoded to 22 — keep in sync with configuration.nix.
ssh_sessions=$(${pkgs.iproute2}/bin/ss -H -t -n state established '( sport = :22 )' || true)
if [[ -n "$ssh_sessions" ]]; then
log "active SSH session present, staying up"
exit 0
fi
# 2. pyload active downloads: any established/outgoing pyload-owned socket
# to a non-local peer means a download is in flight.
pyload_conns=$(${pkgs.iproute2}/bin/ss -H -t -n -p \
state established state syn-sent state syn-recv 2>/dev/null \
| grep -F '"pyload"' \
| awk '{print $5}' \
| grep -Ev '${localPeerRegex}' || true)
if [[ -n "$pyload_conns" ]]; then
log "pyload has active non-local connections, staying up"
exit 0
fi
# 3. pyload hook children (extraction, filebot, unrar, 7z, java, ...).
# The package_finished hook is launched by pyload's ExternalScripts
# plugin, so every child lives in pyload.service's cgroup. cgroup.procs
# contains PIDs/TGIDs only, not TIDs, so pyload's internal thread pool
# cannot false-positive. Fail-safe: if the file is unreadable, stay up.
cgroup_procs=/sys/fs/cgroup/system.slice/pyload.service/cgroup.procs
if [[ ! -r "$cgroup_procs" ]]; then
log "pyload cgroup procs file unreadable ($cgroup_procs), staying up"
exit 0
fi
main_pid=$(${pkgs.systemd}/bin/systemctl show -p MainPID --value pyload.service)
children=$(grep -v -x -F "$main_pid" "$cgroup_procs" || true)
if [[ -n "$children" ]]; then
log "pyload hook children running ($(echo "$children" | tr '\n' ' ')), staying up"
exit 0
fi
# 4. Both spinning HDDs must be in standby. hdparm -C is non-disturbing
# (does not wake the disk). The udev rule in power-management.nix runs
# hdparm -S 180, so standby implies >= 15 min of firmware-level idle —
# no extra shell-level debounce needed.
for disk in ${lib.concatStringsSep " " hdds}; do
if [[ ! -e "$disk" ]]; then
log "disk $disk missing, staying up"
exit 0
fi
device=$(readlink -f "$disk")
power_state=$(${pkgs.hdparm}/bin/hdparm -C "$device" 2>/dev/null \
| grep -oP '(standby|active/idle|active|idle)' | head -1 || echo "unknown")
if [[ "$power_state" != "standby" ]]; then
log "$disk is $power_state, staying up"
exit 0
fi
done
# 5. All clear.
log "all checks clear, powering off"
${pkgs.systemd}/bin/systemctl poweroff
'';
in
{
systemd.services.nas-auto-shutdown = {
description = "Power off NAS when idle (HDD standby + pyload/filebot quiet + no SSH)";
path = with pkgs; [ coreutils gawk gnugrep iproute2 hdparm systemd ];
serviceConfig = {
Type = "oneshot";
User = "root";
ExecStart = "${autoShutdownScript}";
};
};
systemd.timers.nas-auto-shutdown = {
description = "Run NAS idle check every minute";
wantedBy = [ "timers.target" ];
timerConfig = {
# Requirement: earliest shutdown is 15 min after boot, so a
# WOL-triggered wake is not immediately followed by another poweroff.
OnBootSec = "15min";
OnUnitActiveSec = "1min";
AccuracySec = "10s";
Persistent = false;
};
};
}