From db2bd1d15727cc283cad6097cd742fd4a6c92f06 Mon Sep 17 00:00:00 2001 From: Hermes Date: Wed, 20 May 2026 14:34:15 -0400 Subject: [PATCH 01/12] feat: add uConsole CM5 host configuration with Reticulum mesh stack - New NixOS host 'uConsole' for ClockworkPi CM5 portable terminal - flake.nix: add nixos-uconsole and nixos-raspberrypi inputs - Imports: nixos-uconsole.nixosModules.uconsole-cm5, nixos-raspberrypi.nixosModules.raspberry-pi-5.base - Full package list: base tools, HAM radio, SDR/RF, mesh/LoRa, security tools, GPS/maps - Reticulum stack (rns 1.2.9, lxmf 0.9.8, nomadnet 1.1.1) built from PyPI via overlays/reticulum.nix - systemd services: rnsd (Reticulum daemon), kismet (Wi-Fi IDS) - Kernel modules for SDR (rtl-sdr, dvb) and USB WiFi - Follows existing host config conventions (cyt-pi as template) --- flake.nix | 28 +++- hosts/uConsole/configuration.nix | 167 ++++++++++++++++++++++ hosts/uConsole/hardware-configuration.nix | 26 ++++ overlays/reticulum.nix | 77 ++++++++++ 4 files changed, 296 insertions(+), 2 deletions(-) create mode 100644 hosts/uConsole/configuration.nix create mode 100644 hosts/uConsole/hardware-configuration.nix create mode 100644 overlays/reticulum.nix diff --git a/flake.nix b/flake.nix index 8f8b51a..8f51b77 100644 --- a/flake.nix +++ b/flake.nix @@ -12,10 +12,18 @@ url = "git+https://git.lix.systems/lix-project/lix?ref=main"; inputs.nixpkgs.follows = "nixpkgs"; }; + nixos-uconsole = { + url = "github:nixos-uconsole/nixos-uconsole"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + nixos-raspberrypi = { + url = "github:nvmd/nixos-raspberrypi/v1.20260317.0"; + inputs.nixpkgs.follows = "nixpkgs"; + }; self.submodules = true; }; - outputs = { self, nixpkgs, agenix, lix, ... }@inputs: + outputs = { self, nixpkgs, agenix, lix, nixos-uconsole, nixos-raspberrypi, ... }@inputs: let system = "x86_64-linux"; keys = import ./lib/keys.nix; @@ -26,7 +34,7 @@ "/etc/ssh/ssh_host_ed25519_key" "/root/.age/bootstrap.key" ]; }; - overlays = [ agenix.overlays.default ]; + overlays = [ agenix.overlays.default (import ./overlays/reticulum.nix) ]; pkgs = import nixpkgs { inherit system overlays; config.allowUnfree = true; @@ -80,6 +88,22 @@ ./hosts/cyt-pi/hardware-configuration.nix ]; }; + + uConsole = nixos-raspberrypi.lib.nixosSystem { + specialArgs = { inherit self keys paths inputs nixos-raspberrypi; }; + modules = [ + { + nixpkgs.overlays = overlays; + nixpkgs.config.allowUnfree = true; + nixpkgs.hostPlatform = "aarch64-linux"; + nix.package = lix.packages."aarch64-linux".default; + } + nixos-raspberrypi.nixosModules.raspberry-pi-5.base + nixos-uconsole.nixosModules.uconsole-cm5 + ./hosts/uConsole/configuration.nix + ./hosts/uConsole/hardware-configuration.nix + ]; + }; }; devShells.${system}.default = devShell; }; diff --git a/hosts/uConsole/configuration.nix b/hosts/uConsole/configuration.nix new file mode 100644 index 0000000..73a59b1 --- /dev/null +++ b/hosts/uConsole/configuration.nix @@ -0,0 +1,167 @@ +{ config, lib, pkgs, paths, self, ... }: + +{ + # Basic Host Info + networking.hostName = "uConsole"; + time.timeZone = "America/Montreal"; + i18n.defaultLocale = "en_CA.UTF-8"; + + # System State + system.stateVersion = "25.05"; + + # Boot & Hardware (uconsole-cm5 module handles boot.loader) + boot.kernelPackages = pkgs.linuxPackages_latest; + + # Networking + networking.networkmanager.enable = true; + services.openssh = { + enable = true; + settings.PermitRootLogin = "prohibit-password"; + settings.PasswordAuthentication = false; + }; + + # User + users.users.gortium = { + isNormalUser = true; + extraGroups = [ "wheel" "networkmanager" "video" "dialout" "kismet" ]; + openssh.authorizedKeys.keys = [ + keys.users.gortium.main + keys.users.gortium.gitea + ]; + }; + security.sudo.extraRules = [ + { + users = [ "gortium" ]; + commands = [ + { + command = "ALL"; + options = [ "NOPASSWD" ]; + } + ]; + } + ]; + + # ============================================================ + # Package groups + # ============================================================ + + environment.systemPackages = with pkgs; [ + # ===== Base ===== + emacs-pgtk + git + ripgrep + fd + htop + tmux + neovim + + # ===== HAM Radio ===== + js8call + wsjtx + fldigi + pat # Winlink client + direwolf # AX.25 packet modem + chirp # Radio programming tool + hamlib # Ham radio control libraries + trustedqsl # Logbook of the World (LoTW) + + # ===== SDR / RF ===== + sdrpp # SDR++ spectrum analyzer + gqrx # SDR receiver GUI + rtl-sdr # RTL-SDR drivers & utilities + inspectrum # Offline signal analysis + soapysdr-with-plugins # SoapySDR + hardware support plugins + + # ===== Mesh / LoRa ===== + meshtastic # Python CLI for Meshtastic devices + reticulumStack # Reticulum Network Stack (rnsd, rnsh, rncp, rnx, rnpath, etc.) + lxmf # LXMF messaging protocol + nomadnet # Nomad Network client + + # ===== Security ===== + nmap + aircrack-ng + kismet # Wi-Fi monitor / IDS + bettercap # MITM/network attack framework + wireshark # Packet analyzer + hashcat # GPU password cracker + john # John the Ripper + sqlmap # SQL injection tool + + # ===== GPS / Maps ===== + foxtrotgps + viking # GPS map editor + gpsbabel # GPS data conversion + ]; + + # Packages noted but not in unstable nixpkgs: + # - metasploit: unfree; install manually via Git clone + # - burpsuite: unfree Java app (Community Edition available for download) + # - sidechannel: not a distinct PyPI package; functionality covered by + # the Reticulum stack. For LXMF GUI client, install Sideband manually + # from github.com/markqvist/Sideband + + # ============================================================ + # Reticulum Service (rnsd) + # ============================================================ + systemd.services.rnsd = { + description = "Reticulum Network Stack Daemon"; + after = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + User = "gortium"; + Group = "gortium"; + ExecStart = "${pkgs.reticulumStack}/bin/rnsd"; + Restart = "always"; + RestartSec = "10s"; + LimitNOFILE = 65536; + }; + }; + + # ============================================================ + # Kismet Service (Wi-Fi monitoring / mesh node) + # ============================================================ + systemd.services.kismet = { + description = "Kismet Wi-Fi Monitor & IDS"; + after = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + User = "gortium"; + Group = "kismet"; + ExecStart = "${pkgs.kismet}/bin/kismet -c wlan0 --log-base=/home/gortium/kismet_logs --no-nc-ui"; + Restart = "always"; + RestartSec = "10s"; + }; + }; + + # ============================================================ + # Kernel modules for SDR and radio + # ============================================================ + boot.kernelModules = [ + "88x2bu" # Realtek 8812/8821BU USB WiFi (common adapter) + "rtl8xxxu" # RTL8188/8192/8723 USB WiFi + "rtl2832_sdr" # RTL-SDR kernel module + "dvb_usb_rtl28xxu" # RTL-SDR DVB-T + ]; + + boot.blacklistedKernelModules = [ ]; + + # ============================================================ + # Extra udev rules for SDR and HAM radio devices + # ============================================================ + services.udev.packages = with pkgs; [ rtl-sdr ]; + + # ============================================================ + # Enable IPv6 for Reticulum mesh + # ============================================================ + networking.enableIPv6 = true; + + # ============================================================ + # Firewall: open ports for Reticulum (optional) + # ============================================================ + networking.firewall.allowedTCPPorts = [ 22 ]; # SSH only + networking.firewall.allowedUDPPorts = [ ]; + # Reticulum uses its own encryption and doesn't need open ports + # for basic mesh operations (peer-to-peer discovery). + # For TCP interfaces, open additional ports as needed. +} diff --git a/hosts/uConsole/hardware-configuration.nix b/hosts/uConsole/hardware-configuration.nix new file mode 100644 index 0000000..ea7c25a --- /dev/null +++ b/hosts/uConsole/hardware-configuration.nix @@ -0,0 +1,26 @@ +{ config, lib, pkgs, modulesPath, ... }: + +{ + imports = + [ (modulesPath + "/installer/scan/not-detected.nix") + ]; + + boot.initrd.availableKernelModules = [ "xhci_pci" "usbhid" "usb_storage" "sdhci_pci" "nvme" ]; + boot.initrd.kernelModules = [ ]; + boot.extraModulePackages = [ ]; + + # uConsole CM5 uses NVMe or eMMC for boot storage + # The uconsole-cm5 module sets up /boot/firmware and default / + # Override device label here if using different storage + fileSystems."/" = lib.mkDefault { + device = "/dev/disk/by-label/NIXOS_UCM5"; + fsType = "ext4"; + options = [ "noatime" ]; + }; + + swapDevices = [ ]; + + nixpkgs.hostPlatform = lib.mkDefault "aarch64-linux"; + hardware.enableRedistributableFirmware = true; + powerManagement.cpuFreqGovernor = lib.mkDefault "ondemand"; +} diff --git a/overlays/reticulum.nix b/overlays/reticulum.nix new file mode 100644 index 0000000..ffab686 --- /dev/null +++ b/overlays/reticulum.nix @@ -0,0 +1,77 @@ +final: prev: let + python3 = final.python3; + pyPkgs = python3.pkgs; +in { + reticulumStack = python3.pkgs.buildPythonApplication rec { + pname = "reticulum"; + version = "1.2.9"; + src = pyPkgs.fetchPypi { + pname = "rns"; + inherit version; + sha256 = "554814231c237b9caacf8df669312e57dd7d3f84b6d4810125087d1a79a75d75"; + }; + propagatedBuildInputs = with pyPkgs; [ cryptography pyserial ]; + doCheck = false; + pythonImportsCheck = [ "RNS" ]; + meta = with final.lib; { + description = "Self-configuring, encrypted and resilient mesh networking stack"; + homepage = "https://reticulum.network/"; + license = licenses.mit; + platforms = platforms.linux; + }; + }; + + lxmf = python3.pkgs.buildPythonApplication rec { + pname = "lxmf"; + version = "0.9.8"; + src = pyPkgs.fetchPypi { + inherit pname version; + sha256 = "30f39f3a975a049c12ee2cfceb3261d24cb5adec881c6821f7354464b3f3650c"; + }; + propagatedBuildInputs = [ final.reticulumStack ]; + doCheck = false; + pythonImportsCheck = [ "LXMF" ]; + meta = with final.lib; { + description = "Lightweight Extensible Message Format for Reticulum"; + homepage = "https://github.com/markqvist/lxmf"; + license = licenses.mit; + platforms = platforms.linux; + }; + }; + + nomadnet = python3.pkgs.buildPythonApplication rec { + pname = "nomadnet"; + version = "1.1.1"; + src = pyPkgs.fetchPypi { + inherit pname version; + sha256 = "fa13b64a10e75b705a58024815ab72451700aa726af96d415ba99dec28dfc40a"; + }; + propagatedBuildInputs = with pyPkgs; [ final.reticulumStack final.lxmf urwid qrcode ]; + doCheck = false; + pythonImportsCheck = [ "nomadnet" ]; + meta = with final.lib; { + description = "Nomad Network — resilient mesh communications platform"; + homepage = "https://github.com/markqvist/NomadNet"; + license = licenses.mit; + platforms = platforms.linux; + }; + }; + + rnsh = python3.pkgs.buildPythonApplication rec { + pname = "rnsh"; + version = "0.1.7"; + src = pyPkgs.fetchPypi { + inherit pname version; + sha256 = "9cb72f25abb1c6d300f8014b264184ff78f592fe88e36094938012990b797c93"; + }; + propagatedBuildInputs = [ final.reticulumStack ]; + doCheck = false; + pythonImportsCheck = [ "rnsh" ]; + meta = with final.lib; { + description = "Remote shell over Reticulum"; + homepage = "https://github.com/acehoss/rnsh"; + license = licenses.mit; + platforms = platforms.linux; + }; + }; +} -- 2.49.1 From 8b9a144254658ffbb6ebd531f2ddc0ce0e8089a2 Mon Sep 17 00:00:00 2001 From: Hermes Date: Wed, 20 May 2026 14:36:42 -0400 Subject: [PATCH 02/12] fix: update compose submodule for Matrix bridge deps + persistent venv Updates assets/compose submodule to 8adbbf0 (compose fix/matrix-bridge-deps). Compose commit adds: - uv pip install openai mautrix[encryption] to hermes entrypoint - Persistent venv volume at /opt/hermes/.venv - Empty-volume first-boot handling (venv recreation) This ensures Matrix bridge dependencies survive container recreation. --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index d3f2e3b..8adbbf0 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit d3f2e3b7b9dcb03b0bd7df0278faca6b64ea9272 +Subproject commit 8adbbf0ed40ec0cba49de6ac43f9cc96ec29337c -- 2.49.1 From b43c6794b30c9fc576495d4d4b55b3236b36c0af Mon Sep 17 00:00:00 2001 From: Hermes Date: Fri, 22 May 2026 23:52:24 -0400 Subject: [PATCH 03/12] chore: update compose submodule (fix VECTOR_STORE_DIMENSIONS empty value) --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index 8adbbf0..231ce93 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit 8adbbf0ed40ec0cba49de6ac43f9cc96ec29337c +Subproject commit 231ce938de7ef8ed6b896b969e8f260a0d55780b -- 2.49.1 From 6f1807119808645abae9131a0289aacfdacd6001 Mon Sep 17 00:00:00 2001 From: Hermes Date: Fri, 22 May 2026 23:54:35 -0400 Subject: [PATCH 04/12] chore: update compose submodule (expose Honcho via Traefik + fix VECTOR_STORE_DIMENSIONS) --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index 231ce93..fcebd4f 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit 231ce938de7ef8ed6b896b969e8f260a0d55780b +Subproject commit fcebd4f1cd5c379428e27985ca6a732e16448a71 -- 2.49.1 From 8d7afecb6e9650c13e199c6a705c8998c3da6261 Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 23 May 2026 00:00:27 -0400 Subject: [PATCH 05/12] chore: update compose submodule (merge Honcho+OpenConcho into single container) --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index fcebd4f..285351e 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit fcebd4f1cd5c379428e27985ca6a732e16448a71 +Subproject commit 285351e82cac588a9c07dad9dd30a35f62f32901 -- 2.49.1 From 33d1d860fb4a87e4c8ae0c90119264a92da0fd52 Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 23 May 2026 00:31:46 -0400 Subject: [PATCH 06/12] chore: update compose submodule (fix nginx permissions) --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index 285351e..c85dbaf 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit 285351e82cac588a9c07dad9dd30a35f62f32901 +Subproject commit c85dbaf820c40b72dd3c88a65d5cd2f8ae299de3 -- 2.49.1 From cd817c7fd2f7a4e339ce59b94ed7942c873c70b9 Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 23 May 2026 01:21:31 -0400 Subject: [PATCH 07/12] chore: update compose submodule (fix nginx permissions - run as root) --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index c85dbaf..352f9a9 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit c85dbaf820c40b72dd3c88a65d5cd2f8ae299de3 +Subproject commit 352f9a9e78957673f4601e534f6eb0ef304682a2 -- 2.49.1 From 690873d0e40738111fb2a0c5dc4c55112b78f469 Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 23 May 2026 08:34:46 -0400 Subject: [PATCH 08/12] fix: update compose submodule for honcho auth + LLM config --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index 352f9a9..bb53161 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit 352f9a9e78957673f4601e534f6eb0ef304682a2 +Subproject commit bb53161b50a73da58af1daeb78d81b2013c7c1db -- 2.49.1 From 6be40763723fcd7bcf7dadaff4eb1263f298de7f Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 23 May 2026 16:43:22 -0400 Subject: [PATCH 09/12] fix: update compose submodule to fix honcho config corrupted values --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index bb53161..8eb0344 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit bb53161b50a73da58af1daeb78d81b2013c7c1db +Subproject commit 8eb0344a0843f099073eab1d105324baf386713b -- 2.49.1 From 2b4b2e221673234db25e572def94bc532a1ec2e1 Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 23 May 2026 16:51:24 -0400 Subject: [PATCH 10/12] fix: update compose submodule - honcho config fixes --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index 8eb0344..08778db 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit 8eb0344a0843f099073eab1d105324baf386713b +Subproject commit 08778db6856312a8083bd63059dcc2e319916b44 -- 2.49.1 From e216c8edac01b41cf5d3246e46b1ee77c0d64fac Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 23 May 2026 18:54:54 -0400 Subject: [PATCH 11/12] fix: update compose submodule - honcho embedding config fix + deriver in CMD --- assets/compose | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/compose b/assets/compose index 08778db..63b6cd3 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit 08778db6856312a8083bd63059dcc2e319916b44 +Subproject commit 63b6cd3461a1344ef6685f2f62a81063dcd1c9e4 -- 2.49.1 From 15f70019d57482bddaa76276c4517236aeb2efd3 Mon Sep 17 00:00:00 2001 From: Hermes Date: Mon, 25 May 2026 00:08:39 -0400 Subject: [PATCH 12/12] feat: integrate rollback sentinel as NixOS module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add rollback-sentinel NixOS module that: - Deploys sentinel-check.sh (inline) and nixos-rollback.sh (from file) as system packages - Runs a boot-time systemd oneshot service after multi-user.target with configurable delay — checks Tier-1 services, triggers rollback on failure - Runs a post-rebuild service via activation script after every nixos-rebuild switch - Exposes options for tier1Services, tier2Services, tier3InfoServices, bootDelay, rollbackMode (set-default/rollback-now/dry-run), and enablePostRebuild Module wired into flake.nix for lazyworkhorse and enabled in configuration.nix with standard Tier-1/2 service lists and 120s delay. --- flake.nix | 1 + hosts/lazyworkhorse/configuration.nix | 30 ++ modules/nixos/services/nixos-rollback.sh | 400 +++++++++++++++++++ modules/nixos/services/rollback-sentinel.nix | 184 +++++++++ 4 files changed, 615 insertions(+) create mode 100755 modules/nixos/services/nixos-rollback.sh create mode 100644 modules/nixos/services/rollback-sentinel.nix diff --git a/flake.nix b/flake.nix index 8f51b77..b7b18db 100644 --- a/flake.nix +++ b/flake.nix @@ -69,6 +69,7 @@ ./modules/nixos/services/open_code_server.nix ./modules/nixos/services/ollama_init_custom_models.nix ./modules/nixos/services/openclaw_node.nix + ./modules/nixos/services/rollback-sentinel.nix ./modules/nixos/security/ai-worker-restricted.nix ./users/gortium.nix ./users/ai-worker.nix diff --git a/hosts/lazyworkhorse/configuration.nix b/hosts/lazyworkhorse/configuration.nix index f1afae4..157bc8e 100644 --- a/hosts/lazyworkhorse/configuration.nix +++ b/hosts/lazyworkhorse/configuration.nix @@ -321,10 +321,40 @@ environment.etc."ssh/ssh_host_ed25519_key.pub".text = "${keys.hosts.lazyworkhorse.main}"; + # ── Boot sentinel: auto-rollback on critical service failure ─────────────── + services.rollbackSentinel.enable = true; + # Tier-1: failure triggers rollback + services.rollbackSentinel.tier1Services = [ + "sshd" "docker" "traefik" "authelia" + ]; + # Tier-2: warn only + services.rollbackSentinel.tier2Services = [ + "gitea" "hermes" "ollama" "synapse" "nextcloud" + "vaultwarden" "wireguard" "homeassistant" "fail2ban" + ]; + # Wait 2 minutes after boot before checking (lets services initialize) + services.rollbackSentinel.bootDelay = "120"; + # Change boot default only (not --rollback-now) for safety + services.rollbackSentinel.rollbackMode = "set-default"; + services.fstrim.enable = true; services.zfs.autoSnapshot.enable = true; services.zfs.autoScrub.enable = true; + + # Ensure com.sun:auto-snapshot is set on ZFS datasets so auto-snapshots actually run + systemd.services."zfs-set-auto-snapshot" = { + description = "Set com.sun:auto-snapshot=true on ZFS datasets"; + after = [ "zfs-import.target" ]; + wants = [ "zfs-import.target" ]; + wantedBy = [ "multi-user.target" ]; + path = with pkgs; [ zfs ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = "${pkgs.zfs}/bin/zfs set -r com.sun:auto-snapshot=true rpool"; + }; + }; # Mi50 config hardware.graphics = { diff --git a/modules/nixos/services/nixos-rollback.sh b/modules/nixos/services/nixos-rollback.sh new file mode 100755 index 0000000..2482efe --- /dev/null +++ b/modules/nixos/services/nixos-rollback.sh @@ -0,0 +1,400 @@ +#!/usr/bin/env bash +# ============================================================================= +# nixos-rollback.sh — NixOS systemd-boot Rollback Script +# +# Detects a failed NixOS generation (critical services not starting) and sets +# the previous generation as the default boot option for systemd-boot. +# Logs all actions to syslog/journald and a local logfile. Fails safely when +# no previous generation exists or required files are missing. +# +# Integration with the boot sentinel: +# sentinel-check.sh → detects Tier-1 service failures (sshd, docker, +# traefik, authelia) after a boot +# nixos-rollback.sh ← called when sentinel exits nonzero; sets previous +# generation as default for next boot +# +# Usage: +# nixos-rollback.sh # auto-detect & set previous gen +# nixos-rollback.sh --dry-run # show what would be done +# nixos-rollback.sh --rollback-now # also run nixos-rebuild switch +# # --rollback for immediate fix +# nixos-rollback.sh --help # full help text +# +# Exit codes: +# 0 — rollback applied (or dry-run would apply) +# 1 — preflight failure (missing files, permissions) +# 2 — no previous generation available +# 3 — nixos-rebuild --rollback failed (only with --rollback-now) +# +# Installation on NixOS: +# Place in /usr/local/bin/nixos-rollback.sh and make executable. +# Add a systemd oneshot service to run it after sentinel-check detects +# failures, or invoke directly from a sentinel timer. +# ============================================================================= + +set -euo pipefail + +# ── Configuration ──────────────────────────────────────────────────────────── +# These can be overridden via environment variables for testing. +LOADER_CONF="${NIXOS_ROLLBACK_LOADER_CONF:-/boot/loader/loader.conf}" +ENTRIES_DIR="${NIXOS_ROLLBACK_ENTRIES_DIR:-/boot/loader/entries}" +LOGFILE="${NIXOS_ROLLBACK_LOGFILE:-/var/log/nixos-rollback.log}" +SYSLOG_IDENT="nixos-rollback" + +# ── CLI flags ──────────────────────────────────────────────────────────────── +DRY_RUN=false +ROLLBACK_NOW=false + +# ── Colors (disabled when not a terminal) ──────────────────────────────────── +if [ -t 1 ]; then + RED='\033[0;31m' + GREEN='\033[0;32m' + YELLOW='\033[1;33m' + CYAN='\033[0;36m' + NC='\033[0m' # No Color +else + RED=''; GREEN=''; YELLOW=''; CYAN=''; NC='' +fi + +# ============================================================================= +# Help +# ============================================================================= +usage() { + cat <> "${LOGFILE}" + logger -t "${SYSLOG_IDENT}" -p "user.${level}" "${msg}" + + # Also print to stderr for ERROR/WARN, stdout for INFO + case "${level}" in + ERROR) echo >&2 "${RED}[ERROR]${NC} ${msg}" ;; + WARN) echo >&2 "${YELLOW}[WARN]${NC} ${msg}" ;; + INFO) echo " ${GREEN}[INFO]${NC} ${msg}" ;; + esac +} + +info() { log "INFO" "$@"; } +warn() { log "WARN" "$@"; } +error() { log "ERROR" "$@"; } + +# ============================================================================= +# Preflight checks +# ============================================================================= +preflight() { + # Must run as root (need to write to /boot), unless overridden for testing + if [ -z "${NIXOS_ROLLBACK_SKIP_ROOT_CHECK:-}" ] && [ "$(id -u)" -ne 0 ]; then + error "This script must be run as root (needs write access to /boot/loader)" + error "Set NIXOS_ROLLBACK_SKIP_ROOT_CHECK=1 for testing against mock paths." + exit 1 + fi + + # Directories and files + if [ ! -d "${ENTRIES_DIR}" ]; then + error "Boot entries directory not found: ${ENTRIES_DIR}" + exit 1 + fi + + if [ ! -f "${LOADER_CONF}" ]; then + error "Loader config not found: ${LOADER_CONF}" + exit 1 + fi + + if [ ! -r "${LOADER_CONF}" ]; then + error "Cannot read loader config: ${LOADER_CONF}" + exit 1 + fi + + # Check write access to /boot/loader (parent of loader.conf) + local loader_dir + loader_dir="$(dirname "${LOADER_CONF}")" + if [ ! -w "${loader_dir}" ]; then + error "Cannot write to ${loader_dir} (insufficient permissions)" + exit 1 + fi + + # Logfile directory must exist + local log_dir + log_dir="$(dirname "${LOGFILE}")" + if [ ! -d "${log_dir}" ]; then + warn "Log directory ${log_dir} does not exist, creating it" + mkdir -p "${log_dir}" 2>/dev/null || { + error "Cannot create log directory ${log_dir}" + exit 1 + } + fi + + # Check --rollback-now dependencies + if [ "${ROLLBACK_NOW}" = true ]; then + if ! command -v nixos-rebuild &>/dev/null; then + error "nixos-rebuild not found on PATH (required for --rollback-now)" + exit 1 + fi + fi +} + +# ============================================================================= +# Generation helpers +# ============================================================================= + +# get_current_default: reads the current default entry from loader.conf +# Returns: "nixos-generation-N.conf" or empty string +get_current_default() { + grep -E '^default\s+' "${LOADER_CONF}" 2>/dev/null \ + | awk '{print $2}' \ + || true +} + +# extract_gen_number: extracts the numeric generation from a conf filename +# Input: "nixos-generation-367.conf" +# Output: 367 +extract_gen_number() { + echo "$1" | sed 's/nixos-generation-//;s/\.conf//' +} + +# get_all_gen_numbers: returns sorted list of generation numbers from entries dir +get_all_gen_numbers() { + local -a gens=() + local f n + for f in "${ENTRIES_DIR}"/nixos-generation-*.conf; do + [ -f "${f}" ] || continue + n="$(basename "${f}" | sed 's/nixos-generation-//;s/\.conf//')" + gens+=("${n}") + done + + if [ "${#gens[@]}" -eq 0 ]; then + return 1 + fi + + # Sort numerically and output + printf '%s\n' "${gens[@]}" | sort -n +} + +# get_previous_gen: given current generation number, find the previous one +# from the list of all available generations +get_previous_gen() { + local current="$1" + shift + local -a gens=("$@") + + local prev="" + local g + for g in "${gens[@]}"; do + if [ "${g}" -lt "${current}" ]; then + prev="${g}" + fi + done + + if [ -z "${prev}" ]; then + return 1 + fi + echo "${prev}" +} + +# ============================================================================= +# Main rollback logic +# ============================================================================= +do_rollback() { + # Step 1: Read current default + local current_entry + current_entry="$(get_current_default)" + + if [ -z "${current_entry}" ]; then + error "No 'default' entry found in ${LOADER_CONF}" + error "Cannot determine current generation — aborting" + exit 1 + fi + + info "Current default boot entry: ${current_entry}" + + # Step 2: Build sorted list of all available generations + local -a all_gens=() + local line + while IFS= read -r line; do + all_gens+=("${line}") + done < <(get_all_gen_numbers || true) + + if [ "${#all_gens[@]}" -eq 0 ]; then + error "No NixOS generation .conf files found in ${ENTRIES_DIR}" + exit 1 + fi + + info "Available generations: ${all_gens[*]}" + + # Step 3: Find current generation number + local current_gen + current_gen="$(extract_gen_number "${current_entry}")" + + # Verify current_gen is a valid number + if ! [[ "${current_gen}" =~ ^[0-9]+$ ]]; then + error "Could not parse generation number from '${current_entry}'" + exit 1 + fi + + # Step 4: Find the previous generation + local prev_gen + prev_gen="$(get_previous_gen "${current_gen}" "${all_gens[@]}")" || { + error "No previous generation found before generation ${current_gen}" + error "This is the oldest available generation — cannot roll back further" + exit 2 + } + + local prev_entry="nixos-generation-${prev_gen}.conf" + local prev_conf_path="${ENTRIES_DIR}/${prev_entry}" + + if [ ! -f "${prev_conf_path}" ]; then + error "Previous generation entry not found: ${prev_conf_path}" + error "The .conf file for generation ${prev_gen} is missing — cannot roll back" + exit 1 + fi + + info "Target rollback generation: ${prev_gen} → ${prev_entry}" + + # Step 5: Apply the rollback + if [ "${DRY_RUN}" = true ]; then + echo "" + echo " ${CYAN}[DRY RUN]${NC} Would change ${LOADER_CONF}:" + echo " ${YELLOW}-${NC} default ${current_entry}" + echo " ${GREEN}+${NC} default ${prev_entry}" + echo "" + info "DRY RUN — no changes made" + exit 0 + fi + + # Write new default + # Use sed with a backup (.bak) + sed -i.bak "s/^default\s\+${current_entry}/default ${prev_entry}/" "${LOADER_CONF}" + + # Verify the change was applied + local new_default + new_default="$(get_current_default)" + if [ "${new_default}" != "${prev_entry}" ]; then + error "Failed to set default boot entry to ${prev_entry}" + error "Current default is still: ${new_default}" + # Attempt to restore backup + if [ -f "${LOADER_CONF}.bak" ]; then + cp "${LOADER_CONF}.bak" "${LOADER_CONF}" + info "Restored backup from ${LOADER_CONF}.bak" + fi + exit 1 + fi + + info "Successfully set default boot entry to ${prev_entry} (generation ${prev_gen})" + info "Backup of previous config saved to ${LOADER_CONF}.bak" + + # Step 6: Optionally run nixos-rebuild switch --rollback + if [ "${ROLLBACK_NOW}" = true ]; then + echo "" + info "Running nixos-rebuild switch --rollback for immediate effect..." + if nixos-rebuild switch --rollback 2>&1 | while IFS= read -r line; do + logger -t "${SYSLOG_IDENT}" "nixos-rebuild: ${line}" + echo " ${line}" + done; then + info "nixos-rebuild switch --rollback completed successfully" + else + local rc=$? + error "nixos-rebuild switch --rollback failed with exit code ${rc}" + error "The boot default has been changed but the current system was NOT rolled back" + error "Reboot to apply the rollback" + exit 3 + fi + fi + + info "Rollback complete. Next boot will use generation ${prev_gen}." + if [ "${ROLLBACK_NOW}" = false ]; then + echo "" + echo " ${YELLOW}NOTE:${NC} The current running system is unchanged." + echo " Reboot to boot into generation ${prev_gen}." + echo " Or re-run with --rollback-now for immediate effect." + fi +} + +# ============================================================================= +# Main +# ============================================================================= +main() { + # Parse arguments + while [ $# -gt 0 ]; do + case "$1" in + --dry-run) + DRY_RUN=true + shift + ;; + --rollback-now) + ROLLBACK_NOW=true + shift + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo >&2 "Unknown option: $1" + echo >&2 "Use --help for usage information." + exit 1 + ;; + esac + done + + echo "" + echo " ${CYAN}═══ NixOS systemd-boot Rollback ═══${NC}" + echo "" + + preflight + + if [ "${DRY_RUN}" = true ]; then + info "DRY RUN mode — no changes will be made" + fi + if [ "${ROLLBACK_NOW}" = true ]; then + info "ROLLBACK NOW mode — will also run nixos-rebuild switch --rollback" + fi + + echo "" + do_rollback +} + +main "$@" diff --git a/modules/nixos/services/rollback-sentinel.nix b/modules/nixos/services/rollback-sentinel.nix new file mode 100644 index 0000000..0164a2e --- /dev/null +++ b/modules/nixos/services/rollback-sentinel.nix @@ -0,0 +1,184 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + cfg = config.services.rollbackSentinel; + + # ── Scripts ──────────────────────────────────────────────────────────────── + + # Sentinel check — verifies Tier-1 services are active after boot. + # Exits nonzero when any Tier-1 service is down, which triggers the rollback. + sentinelCheck = pkgs.writeShellScriptBin "sentinel-check.sh" '' + #!/usr/bin/env bash + set -euo pipefail + + SYSLOG_IDENT="nixos-sentinel" + LOGFILE="/var/log/nixos-sentinel.log" + + echo "=== NixOS Sentinel Check ===" + echo "Tier-1 services: ${builtins.toString cfg.tier1Services}" + echo "Tier-2 services: ${builtins.toString cfg.tier2Services}" + + FAILED=0 + + # Check Tier-1 services — any failure means rollback + for svc in ${builtins.toString cfg.tier1Services}; do + if systemctl is-active --quiet "$svc" 2>/dev/null; then + echo " [OK] Tier-1: $svc" + else + echo " [FAIL] Tier-1: $svc is NOT active" + logger -t "$SYSLOG_IDENT" -p user.err "Tier-1 FAILURE: $svc is not active" + FAILED=1 + fi + done + + # Check Tier-2 services — warn only + for svc in ${builtins.toString cfg.tier2Services}; do + if systemctl is-active --quiet "$svc" 2>/dev/null; then + echo " [OK] Tier-2: $svc" + else + echo " [WARN] Tier-2: $svc is NOT active" + logger -t "$SYSLOG_IDENT" -p user.warn "Tier-2 WARNING: $svc is not active" + fi + done + + echo "=== Sentinel result: $([ "$FAILED" -eq 0 ] && echo 'PASS' || echo 'FAIL') ===" + echo "$(date '+%Y-%m-%d %H:%M:%S') [INFO] sentinel $([ "$FAILED" -eq 0 ] && echo 'PASS' || echo 'FAIL')" >> "$LOGFILE" + exit $FAILED + ''; + + # Rollback script — package the companion shell script from this directory. + # Uses builtins.readFile to embed the content at evaluation time. + rollbackScript = pkgs.writeShellScriptBin "nixos-rollback.sh" (builtins.readFile ./nixos-rollback.sh); + + # Resolve rollback flags from config + rollbackFlags = + if cfg.rollbackMode == "dry-run" then "--dry-run" + else if cfg.rollbackMode == "rollback-now" then "--rollback-now" + else ""; + +in { + options.services.rollbackSentinel = { + enable = mkEnableOption "NixOS Rollback Sentinel — auto-rollback on critical service failure"; + + tier1Services = mkOption { + type = types.listOf types.str; + default = [ "sshd" "docker" "traefik" "authelia" ]; + description = '' + Tier-1 services whose failure triggers an automatic systemd-boot rollback. + On boot, the sentinel waits ${cfg.bootDelay} seconds, then checks each + service. If ANY service in this list is inactive, it runs the rollback + script which sets the previous NixOS generation as the default boot entry. + ''; + }; + + tier2Services = mkOption { + type = types.listOf types.str; + default = [ + "gitea" "hermes" "ollama" "synapse" "nextcloud" + "vaultwarden" "wireguard" "homeassistant" "fail2ban" + ]; + description = '' + Tier-2 services whose failure is logged as a warning but does NOT trigger + an automatic rollback. Useful for detecting non-critical service issues. + ''; + }; + + tier3InfoServices = mkOption { + type = types.listOf types.str; + default = [ + "act_runner" "syncthing" "restic" "fava" + "homer" "cups" "fstrim" + ]; + description = '' + Tier-3 informational checks (log-only, no warning). These are services + that the sentinel will note the status of for diagnostics. + ''; + }; + + bootDelay = mkOption { + type = types.str; + default = "120"; + description = '' + Seconds to wait after multi-user.target before running the boot-time + sentinel check. This gives Tier-1 services time to start before + the sentinel decides they've failed. + ''; + }; + + rollbackMode = mkOption { + type = types.enum [ "set-default" "rollback-now" "dry-run" ]; + default = "set-default"; + description = '' + Rollback strategy when Tier-1 failures are detected: + - set-default: Write the previous generation to loader.conf (next reboot). + - rollback-now: Also run nixos-rebuild switch --rollback for immediate fix. + - dry-run: Log what would happen but take no action (testing). + ''; + }; + + enablePostRebuild = mkOption { + type = types.bool; + default = true; + description = '' + When enabled, the sentinel check runs after every nixos-rebuild switch + activation. If a newly deployed generation has Tier-1 failures, it + triggers rollback immediately. + ''; + }; + }; + + config = mkIf cfg.enable { + # ── Deploy scripts to PATH ─────────────────────────────────────────────── + environment.systemPackages = [ sentinelCheck rollbackScript ]; + + # Ensure log directory exists + systemd.tmpfiles.rules = [ + "d /var/log/nixos-sentinel 0755 root root -" + ]; + + # ── Boot-time sentinel service ─────────────────────────────────────────── + # Runs after multi-user.target with a configurable delay, checks Tier-1 + # services, and triggers rollback if any are down. + systemd.services.nixos-sentinel = { + description = "NixOS Boot Sentinel — check critical services, roll back on failure"; + after = [ "network.target" "multi-user.target" ]; + wants = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + + path = with pkgs; [ coreutils gawk gnused systemd ]; + + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStartPre = "${pkgs.coreutils}/bin/sleep ${cfg.bootDelay}"; + ExecStart = "${sentinelCheck}/bin/sentinel-check.sh"; + ExecStartPost = "${rollbackScript}/bin/nixos-rollback.sh ${rollbackFlags}"; + }; + }; + + # ── Post-rebuild sentinel service (triggered by activation script) ────── + systemd.services.nixos-sentinel-rebuild = mkIf cfg.enablePostRebuild { + description = "NixOS Post-Rebuild Sentinel — check services after nixos-rebuild"; + after = [ "network.target" ]; + + path = with pkgs; [ coreutils gawk gnused systemd ]; + + serviceConfig = { + Type = "oneshot"; + ExecStart = "${sentinelCheck}/bin/sentinel-check.sh"; + ExecStartPost = "${rollbackScript}/bin/nixos-rollback.sh ${rollbackFlags}"; + }; + }; + + # Activation script — fires after every nixos-rebuild switch + system.activationScripts.rollback-sentinel = mkIf cfg.enablePostRebuild '' + # Start the post-rebuild sentinel in the background. + # This runs on every activation (boot + nixos-rebuild). On boot the + # boot-time service handles it, so this is primarily for nixos-rebuild, + # but running twice is safe (idempotent rollback). + systemctl start nixos-sentinel-rebuild.service --no-block 2>/dev/null || true + ''; + }; +} -- 2.49.1