#!/usr/bin/env bash # ============================================================================= # nixos-rollback.sh — NixOS systemd-boot Rollback Script # # Detects a failed NixOS generation (critical services not starting) and sets # the previous generation as the default boot option for systemd-boot. # Logs all actions to syslog/journald and a local logfile. Fails safely when # no previous generation exists or required files are missing. # # Integration with the boot sentinel: # sentinel-check.sh → detects Tier-1 service failures (sshd, docker, # traefik, authelia) after a boot # nixos-rollback.sh ← called when sentinel exits nonzero; sets previous # generation as default for next boot # # Usage: # nixos-rollback.sh # auto-detect & set previous gen # nixos-rollback.sh --dry-run # show what would be done # nixos-rollback.sh --rollback-now # also run nixos-rebuild switch # # --rollback for immediate fix # nixos-rollback.sh --help # full help text # # Exit codes: # 0 — rollback applied (or dry-run would apply) # 1 — preflight failure (missing files, permissions) # 2 — no previous generation available # 3 — nixos-rebuild --rollback failed (only with --rollback-now) # # Installation on NixOS: # Place in /usr/local/bin/nixos-rollback.sh and make executable. # Add a systemd oneshot service to run it after sentinel-check detects # failures, or invoke directly from a sentinel timer. # ============================================================================= set -euo pipefail # ── Configuration ──────────────────────────────────────────────────────────── # These can be overridden via environment variables for testing. LOADER_CONF="${NIXOS_ROLLBACK_LOADER_CONF:-/boot/loader/loader.conf}" ENTRIES_DIR="${NIXOS_ROLLBACK_ENTRIES_DIR:-/boot/loader/entries}" LOGFILE="${NIXOS_ROLLBACK_LOGFILE:-/var/log/nixos-rollback.log}" SYSLOG_IDENT="nixos-rollback" # ── CLI flags ──────────────────────────────────────────────────────────────── DRY_RUN=false ROLLBACK_NOW=false # ── Colors (disabled when not a terminal) ──────────────────────────────────── if [ -t 1 ]; then RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' NC='\033[0m' # No Color else RED=''; GREEN=''; YELLOW=''; CYAN=''; NC='' fi # ============================================================================= # Help # ============================================================================= usage() { cat <> "${LOGFILE}" logger -t "${SYSLOG_IDENT}" -p "user.${level}" "${msg}" # Also print to stderr for ERROR/WARN, stdout for INFO case "${level}" in ERROR) echo >&2 "${RED}[ERROR]${NC} ${msg}" ;; WARN) echo >&2 "${YELLOW}[WARN]${NC} ${msg}" ;; INFO) echo " ${GREEN}[INFO]${NC} ${msg}" ;; esac } info() { log "INFO" "$@"; } warn() { log "WARN" "$@"; } error() { log "ERROR" "$@"; } # ============================================================================= # Preflight checks # ============================================================================= preflight() { # Must run as root (need to write to /boot), unless overridden for testing if [ -z "${NIXOS_ROLLBACK_SKIP_ROOT_CHECK:-}" ] && [ "$(id -u)" -ne 0 ]; then error "This script must be run as root (needs write access to /boot/loader)" error "Set NIXOS_ROLLBACK_SKIP_ROOT_CHECK=1 for testing against mock paths." exit 1 fi # Directories and files if [ ! -d "${ENTRIES_DIR}" ]; then error "Boot entries directory not found: ${ENTRIES_DIR}" exit 1 fi if [ ! -f "${LOADER_CONF}" ]; then error "Loader config not found: ${LOADER_CONF}" exit 1 fi if [ ! -r "${LOADER_CONF}" ]; then error "Cannot read loader config: ${LOADER_CONF}" exit 1 fi # Check write access to /boot/loader (parent of loader.conf) local loader_dir loader_dir="$(dirname "${LOADER_CONF}")" if [ ! -w "${loader_dir}" ]; then error "Cannot write to ${loader_dir} (insufficient permissions)" exit 1 fi # Logfile directory must exist local log_dir log_dir="$(dirname "${LOGFILE}")" if [ ! -d "${log_dir}" ]; then warn "Log directory ${log_dir} does not exist, creating it" mkdir -p "${log_dir}" 2>/dev/null || { error "Cannot create log directory ${log_dir}" exit 1 } fi # Check --rollback-now dependencies if [ "${ROLLBACK_NOW}" = true ]; then if ! command -v nixos-rebuild &>/dev/null; then error "nixos-rebuild not found on PATH (required for --rollback-now)" exit 1 fi fi } # ============================================================================= # Generation helpers # ============================================================================= # get_current_default: reads the current default entry from loader.conf # Returns: "nixos-generation-N.conf" or empty string get_current_default() { grep -E '^default\s+' "${LOADER_CONF}" 2>/dev/null \ | awk '{print $2}' \ || true } # extract_gen_number: extracts the numeric generation from a conf filename # Input: "nixos-generation-367.conf" # Output: 367 extract_gen_number() { echo "$1" | sed 's/nixos-generation-//;s/\.conf//' } # get_all_gen_numbers: returns sorted list of generation numbers from entries dir get_all_gen_numbers() { local -a gens=() local f n for f in "${ENTRIES_DIR}"/nixos-generation-*.conf; do [ -f "${f}" ] || continue n="$(basename "${f}" | sed 's/nixos-generation-//;s/\.conf//')" gens+=("${n}") done if [ "${#gens[@]}" -eq 0 ]; then return 1 fi # Sort numerically and output printf '%s\n' "${gens[@]}" | sort -n } # get_previous_gen: given current generation number, find the previous one # from the list of all available generations get_previous_gen() { local current="$1" shift local -a gens=("$@") local prev="" local g for g in "${gens[@]}"; do if [ "${g}" -lt "${current}" ]; then prev="${g}" fi done if [ -z "${prev}" ]; then return 1 fi echo "${prev}" } # ============================================================================= # Main rollback logic # ============================================================================= do_rollback() { # Step 1: Read current default local current_entry current_entry="$(get_current_default)" if [ -z "${current_entry}" ]; then error "No 'default' entry found in ${LOADER_CONF}" error "Cannot determine current generation — aborting" exit 1 fi info "Current default boot entry: ${current_entry}" # Step 2: Build sorted list of all available generations local -a all_gens=() local line while IFS= read -r line; do all_gens+=("${line}") done < <(get_all_gen_numbers || true) if [ "${#all_gens[@]}" -eq 0 ]; then error "No NixOS generation .conf files found in ${ENTRIES_DIR}" exit 1 fi info "Available generations: ${all_gens[*]}" # Step 3: Find current generation number local current_gen current_gen="$(extract_gen_number "${current_entry}")" # Verify current_gen is a valid number if ! [[ "${current_gen}" =~ ^[0-9]+$ ]]; then error "Could not parse generation number from '${current_entry}'" exit 1 fi # Step 4: Find the previous generation local prev_gen prev_gen="$(get_previous_gen "${current_gen}" "${all_gens[@]}")" || { error "No previous generation found before generation ${current_gen}" error "This is the oldest available generation — cannot roll back further" exit 2 } local prev_entry="nixos-generation-${prev_gen}.conf" local prev_conf_path="${ENTRIES_DIR}/${prev_entry}" if [ ! -f "${prev_conf_path}" ]; then error "Previous generation entry not found: ${prev_conf_path}" error "The .conf file for generation ${prev_gen} is missing — cannot roll back" exit 1 fi info "Target rollback generation: ${prev_gen} → ${prev_entry}" # Step 5: Apply the rollback if [ "${DRY_RUN}" = true ]; then echo "" echo " ${CYAN}[DRY RUN]${NC} Would change ${LOADER_CONF}:" echo " ${YELLOW}-${NC} default ${current_entry}" echo " ${GREEN}+${NC} default ${prev_entry}" echo "" info "DRY RUN — no changes made" exit 0 fi # Write new default # Use sed with a backup (.bak) sed -i.bak "s/^default\s\+${current_entry}/default ${prev_entry}/" "${LOADER_CONF}" # Verify the change was applied local new_default new_default="$(get_current_default)" if [ "${new_default}" != "${prev_entry}" ]; then error "Failed to set default boot entry to ${prev_entry}" error "Current default is still: ${new_default}" # Attempt to restore backup if [ -f "${LOADER_CONF}.bak" ]; then cp "${LOADER_CONF}.bak" "${LOADER_CONF}" info "Restored backup from ${LOADER_CONF}.bak" fi exit 1 fi info "Successfully set default boot entry to ${prev_entry} (generation ${prev_gen})" info "Backup of previous config saved to ${LOADER_CONF}.bak" # Step 6: Optionally run nixos-rebuild switch --rollback if [ "${ROLLBACK_NOW}" = true ]; then echo "" info "Running nixos-rebuild switch --rollback for immediate effect..." if nixos-rebuild switch --rollback 2>&1 | while IFS= read -r line; do logger -t "${SYSLOG_IDENT}" "nixos-rebuild: ${line}" echo " ${line}" done; then info "nixos-rebuild switch --rollback completed successfully" else local rc=$? error "nixos-rebuild switch --rollback failed with exit code ${rc}" error "The boot default has been changed but the current system was NOT rolled back" error "Reboot to apply the rollback" exit 3 fi fi info "Rollback complete. Next boot will use generation ${prev_gen}." if [ "${ROLLBACK_NOW}" = false ]; then echo "" echo " ${YELLOW}NOTE:${NC} The current running system is unchanged." echo " Reboot to boot into generation ${prev_gen}." echo " Or re-run with --rollback-now for immediate effect." fi } # ============================================================================= # Main # ============================================================================= main() { # Parse arguments while [ $# -gt 0 ]; do case "$1" in --dry-run) DRY_RUN=true shift ;; --rollback-now) ROLLBACK_NOW=true shift ;; -h|--help) usage exit 0 ;; *) echo >&2 "Unknown option: $1" echo >&2 "Use --help for usage information." exit 1 ;; esac done echo "" echo " ${CYAN}═══ NixOS systemd-boot Rollback ═══${NC}" echo "" preflight if [ "${DRY_RUN}" = true ]; then info "DRY RUN mode — no changes will be made" fi if [ "${ROLLBACK_NOW}" = true ]; then info "ROLLBACK NOW mode — will also run nixos-rebuild switch --rollback" fi echo "" do_rollback } main "$@"