feat: power management for nas
This commit is contained in:
parent
b02acb5b60
commit
46f42dab4b
5 changed files with 235 additions and 1 deletions
|
|
@ -27,6 +27,7 @@
|
|||
./modules/podman.nix
|
||||
./modules/omada.nix
|
||||
./modules/ddclient.nix
|
||||
./modules/nas-wake-on-access.nix
|
||||
# ./modules/wol.nix
|
||||
|
||||
|
||||
|
|
@ -94,7 +95,6 @@
|
|||
nixpkgs.config.allowUnfreePredicate = pkg: builtins.elem (lib.getName pkg) [
|
||||
"mongodb"
|
||||
"ai-mailer"
|
||||
"filebot"
|
||||
"claude-code"
|
||||
];
|
||||
|
||||
|
|
|
|||
|
|
@ -85,6 +85,12 @@
|
|||
chain forward {
|
||||
type filter hook forward priority filter; policy drop;
|
||||
|
||||
# Wake-on-access: flag new traffic aimed at the NAS so
|
||||
# nas-wake-journal.service can fire a WOL. No verdict => falls
|
||||
# through, does not broaden policy. ct state new skips ongoing
|
||||
# flows; rate limit caps journal spam before it leaves the kernel.
|
||||
ip daddr ${config.networkPrefix}.97.11 ct state new limit rate 30/minute log prefix "nas-wake: " comment "trigger wake-on-access"
|
||||
|
||||
iifname "wg_cloonar" counter accept comment "test wireguard"
|
||||
|
||||
iifname "wg_cloonar" oifname lo counter accept comment "wireguard to server"
|
||||
|
|
|
|||
110
hosts/fw/modules/nas-wake-on-access.nix
Normal file
110
hosts/fw/modules/nas-wake-on-access.nix
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
# NAS wake-on-access (fw side)
|
||||
#
|
||||
# Detects traffic aimed at the NAS (10.42.97.11) and sends a WOL magic
|
||||
# packet so the machine comes back up on demand after it has powered itself
|
||||
# off (see hosts/nas/modules/auto-shutdown.nix).
|
||||
#
|
||||
# Traffic reaches the NAS via two paths, so we need two detectors that feed
|
||||
# the same wake script:
|
||||
#
|
||||
# 1. Cross-VLAN traffic is routed through fw and hits nftables' forward
|
||||
# chain. A logging rule tags these packets and a journal follower
|
||||
# translates the log line into a wake invocation.
|
||||
#
|
||||
# 2. Same-VLAN (server) traffic stays on the bridge and never reaches
|
||||
# nftables. A tcpdump follower watches ARP-who-has for 10.42.97.11 on
|
||||
# the server interface and triggers the wake from there.
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
let
|
||||
nasIp = "${config.networkPrefix}.97.11";
|
||||
nasMac = "6c:1f:f7:8e:a9:86";
|
||||
serverBroadcast = "${config.networkPrefix}.97.255";
|
||||
serverIface = "server";
|
||||
|
||||
stateDir = "/run/nas-wake-on-access";
|
||||
lastWakeFile = "${stateDir}/last-wake";
|
||||
cooldownSeconds = 30;
|
||||
|
||||
wakeScript = pkgs.writeShellScript "nas-wake" ''
|
||||
set -euo pipefail
|
||||
|
||||
mkdir -p "${stateDir}"
|
||||
now=$(date +%s)
|
||||
|
||||
# Cooldown gate: at most one WOL every ${toString cooldownSeconds}s.
|
||||
if [[ -f "${lastWakeFile}" ]]; then
|
||||
last=$(cat "${lastWakeFile}" 2>/dev/null || echo 0)
|
||||
if (( now - last < ${toString cooldownSeconds} )); then
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# If the NAS answers ping it is already up; skip WOL but refresh
|
||||
# the cooldown so repeated probes don't spin the CPU.
|
||||
if ${pkgs.iputils}/bin/ping -c1 -W1 -n ${nasIp} >/dev/null 2>&1; then
|
||||
echo "nas-wake: NAS already up, not sending WOL"
|
||||
echo "$now" > "${lastWakeFile}"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "nas-wake: sending WOL to ${nasMac} via ${serverBroadcast}"
|
||||
${pkgs.wol}/bin/wol -i ${serverBroadcast} ${nasMac} || true
|
||||
echo "$now" > "${lastWakeFile}"
|
||||
'';
|
||||
|
||||
# Journal follower for cross-VLAN (routed) traffic. nftables logs a line
|
||||
# prefixed with "nas-wake: " into the kernel ring buffer for every new
|
||||
# packet headed to the NAS (rate-limited kernel-side).
|
||||
journalFollowerScript = pkgs.writeShellScript "nas-wake-journal-follower" ''
|
||||
set -euo pipefail
|
||||
${pkgs.systemd}/bin/journalctl -kf -o cat --since now \
|
||||
| ${pkgs.gnugrep}/bin/grep --line-buffered -F "nas-wake:" \
|
||||
| while IFS= read -r _line; do
|
||||
${wakeScript} || true
|
||||
done
|
||||
'';
|
||||
|
||||
# ARP follower for same-VLAN traffic. Clients on the server VLAN talk to
|
||||
# the NAS directly via the bridge, so their packets never hit nftables.
|
||||
# An ARP "who-has 10.42.97.11" is the reliable early signal that someone
|
||||
# wants to reach the NAS.
|
||||
arpFollowerScript = pkgs.writeShellScript "nas-wake-arp-follower" ''
|
||||
set -euo pipefail
|
||||
${pkgs.tcpdump}/bin/tcpdump -i ${serverIface} -l -n -p -Q in \
|
||||
'arp and host ${nasIp}' \
|
||||
| while IFS= read -r _line; do
|
||||
${wakeScript} || true
|
||||
done
|
||||
'';
|
||||
in
|
||||
{
|
||||
systemd.services.nas-wake-journal = {
|
||||
description = "Wake NAS on cross-VLAN traffic (nftables log follower)";
|
||||
after = [ "nftables.service" "systemd-journald.service" ];
|
||||
requires = [ "systemd-journald.service" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
path = with pkgs; [ coreutils iputils wol systemd gnugrep ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${journalFollowerScript}";
|
||||
Restart = "always";
|
||||
RestartSec = "5s";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.nas-wake-arp = {
|
||||
description = "Wake NAS on same-VLAN ARP (server bridge)";
|
||||
after = [ "network-online.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
path = with pkgs; [ coreutils iputils wol tcpdump ];
|
||||
serviceConfig = {
|
||||
Type = "simple";
|
||||
ExecStart = "${arpFollowerScript}";
|
||||
Restart = "always";
|
||||
RestartSec = "5s";
|
||||
AmbientCapabilities = [ "CAP_NET_RAW" "CAP_NET_ADMIN" ];
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
@ -21,6 +21,7 @@ in
|
|||
./modules/audiobookshelf.nix
|
||||
./modules/power-management.nix
|
||||
./modules/disk-monitoring.nix
|
||||
./modules/auto-shutdown.nix
|
||||
./modules/ugreen-leds.nix
|
||||
|
||||
./hardware-configuration.nix
|
||||
|
|
@ -45,6 +46,10 @@ in
|
|||
networking.firewall.enable = true;
|
||||
networking.firewall.allowedTCPPorts = [ 22 ];
|
||||
|
||||
# Wake-on-LAN: fw re-wakes the NAS on demand after auto-shutdown.
|
||||
# Assumes WOL is enabled in BIOS; translates to `ethtool -s enp2s0 wol g`.
|
||||
networking.interfaces.enp2s0.wakeOnLan.enable = true;
|
||||
|
||||
# SOPS configuration
|
||||
sops.age.sshKeyPaths = [ "/etc/ssh/ssh_host_ed25519_key" ];
|
||||
sops.defaultSopsFile = ./secrets.yaml;
|
||||
|
|
|
|||
113
hosts/nas/modules/auto-shutdown.nix
Normal file
113
hosts/nas/modules/auto-shutdown.nix
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
# NAS auto-shutdown
|
||||
# Powers the machine off when all of the following are true:
|
||||
# 1. No active SSH session
|
||||
# 2. pyload is not downloading (no non-local TCP peers)
|
||||
# 3. pyload has no hook children (extraction, filebot, unrar, ...)
|
||||
# 4. Both spinning HDDs are in standby
|
||||
# 5. At least 15 minutes have passed since boot (via OnBootSec)
|
||||
# The fw host re-wakes the NAS on demand via WOL
|
||||
# (see hosts/fw/modules/nas-wake-on-access.nix).
|
||||
{ config, lib, pkgs, ... }:
|
||||
|
||||
let
|
||||
# Spinning disks whose power state gates the shutdown decision.
|
||||
# Only the Toshiba HDDs; NVMe drives do not spin down.
|
||||
hdds = [
|
||||
"/dev/disk/by-id/ata-TOSHIBA_MG10ACA20TE_8582A01SF4MJ"
|
||||
"/dev/disk/by-id/ata-TOSHIBA_MG10ACA20TE_75V2A0H3F4MJ"
|
||||
];
|
||||
|
||||
# Peer addresses that do not count as "real download traffic":
|
||||
# loopback, internal VLAN (10.42.0.0/16), and IPv6 loopback / link-local.
|
||||
localPeerRegex = "^(127\\.|10\\.42\\.|\\[::1\\]|\\[fe80)";
|
||||
|
||||
autoShutdownScript = pkgs.writeShellScript "nas-auto-shutdown" ''
|
||||
set -euo pipefail
|
||||
log() { echo "auto-shutdown: $*"; }
|
||||
|
||||
# 1. SSH sessions. Cheapest check and it prevents shutting down while an
|
||||
# admin is logged in. Uses ss at the socket layer so it catches
|
||||
# forwarding-only sessions and `ssh host 'cmd'` runs that utmp misses.
|
||||
# NOTE: SSH port hardcoded to 22 — keep in sync with configuration.nix.
|
||||
ssh_sessions=$(${pkgs.iproute2}/bin/ss -H -t -n state established '( sport = :22 )' || true)
|
||||
if [[ -n "$ssh_sessions" ]]; then
|
||||
log "active SSH session present, staying up"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 2. pyload active downloads: any established/outgoing pyload-owned socket
|
||||
# to a non-local peer means a download is in flight.
|
||||
pyload_conns=$(${pkgs.iproute2}/bin/ss -H -t -n -p \
|
||||
state established state syn-sent state syn-recv 2>/dev/null \
|
||||
| grep -F '"pyload"' \
|
||||
| awk '{print $5}' \
|
||||
| grep -Ev '${localPeerRegex}' || true)
|
||||
if [[ -n "$pyload_conns" ]]; then
|
||||
log "pyload has active non-local connections, staying up"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 3. pyload hook children (extraction, filebot, unrar, 7z, java, ...).
|
||||
# The package_finished hook is launched by pyload's ExternalScripts
|
||||
# plugin, so every child lives in pyload.service's cgroup. cgroup.procs
|
||||
# contains PIDs/TGIDs only, not TIDs, so pyload's internal thread pool
|
||||
# cannot false-positive. Fail-safe: if the file is unreadable, stay up.
|
||||
cgroup_procs=/sys/fs/cgroup/system.slice/pyload.service/cgroup.procs
|
||||
if [[ ! -r "$cgroup_procs" ]]; then
|
||||
log "pyload cgroup procs file unreadable ($cgroup_procs), staying up"
|
||||
exit 0
|
||||
fi
|
||||
main_pid=$(${pkgs.systemd}/bin/systemctl show -p MainPID --value pyload.service)
|
||||
children=$(grep -v -x -F "$main_pid" "$cgroup_procs" || true)
|
||||
if [[ -n "$children" ]]; then
|
||||
log "pyload hook children running ($(echo "$children" | tr '\n' ' ')), staying up"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 4. Both spinning HDDs must be in standby. hdparm -C is non-disturbing
|
||||
# (does not wake the disk). The udev rule in power-management.nix runs
|
||||
# hdparm -S 180, so standby implies >= 15 min of firmware-level idle —
|
||||
# no extra shell-level debounce needed.
|
||||
for disk in ${lib.concatStringsSep " " hdds}; do
|
||||
if [[ ! -e "$disk" ]]; then
|
||||
log "disk $disk missing, staying up"
|
||||
exit 0
|
||||
fi
|
||||
device=$(readlink -f "$disk")
|
||||
power_state=$(${pkgs.hdparm}/bin/hdparm -C "$device" 2>/dev/null \
|
||||
| grep -oP '(standby|active/idle|active|idle)' | head -1 || echo "unknown")
|
||||
if [[ "$power_state" != "standby" ]]; then
|
||||
log "$disk is $power_state, staying up"
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
|
||||
# 5. All clear.
|
||||
log "all checks clear, powering off"
|
||||
${pkgs.systemd}/bin/systemctl poweroff
|
||||
'';
|
||||
in
|
||||
{
|
||||
systemd.services.nas-auto-shutdown = {
|
||||
description = "Power off NAS when idle (HDD standby + pyload/filebot quiet + no SSH)";
|
||||
path = with pkgs; [ coreutils gawk gnugrep iproute2 hdparm systemd ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
User = "root";
|
||||
ExecStart = "${autoShutdownScript}";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.timers.nas-auto-shutdown = {
|
||||
description = "Run NAS idle check every minute";
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
# Requirement: earliest shutdown is 15 min after boot, so a
|
||||
# WOL-triggered wake is not immediately followed by another poweroff.
|
||||
OnBootSec = "15min";
|
||||
OnUnitActiveSec = "1min";
|
||||
AccuracySec = "10s";
|
||||
Persistent = false;
|
||||
};
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue