Compare commits

..

2 Commits

Author SHA1 Message Date
2c5dc3d58d feat: comprehensive NixOS deployment infrastructure
- docs/nix-container-install.md: 474-line guide covering Determinate Systems
  installer, vanilla Nix, NixOS base image, architecture notes (x86_64 vs aarch64),
  cross-compilation, container considerations, troubleshooting
- scripts/deploy.sh: 286-line deployment script with pre-flight checks, git sync,
  build validation (nix build --no-link), 5 actions (switch/boot/test/build/
  dry-activate), color-coded logging, env-based configurability
- scripts/deploy-ssh-config: SSH config for all 3 hosts with dual users for
  lazyworkhorse, reverse tunnel for cyt-pi, uConsole placeholder, Gitea entry

Full replacements of stub files from previous commit.
2026-05-20 14:29:38 -04:00
8b004c47b9 feat: add NixOS deployment infrastructure
- Nix installation guide for container (docs/nix-container-install.md)
- Deployment helper script (scripts/deploy.sh)
- SSH configuration template (scripts/deploy-ssh-config)
- Deployment skill for Hermes (skills/nixos-deploy/)

Enables remote NixOS deployment from Hermes container to target hosts
via SSH with nixos-rebuild --target-host.

Usage:
  ./scripts/deploy.sh <hostname> [branch] [action]

Supported hosts:
  - lazyworkhorse (x86_64)
  - cyt-pi (aarch64)
  - uConsole (aarch64) - config pending
2026-04-30 00:06:12 +00:00
15 changed files with 885 additions and 789 deletions

View File

@@ -5,7 +5,6 @@ This document outlines the development conventions for this NixOS-based infrastr
## Build & Deployment
- **Build/Deploy:** Use `nixos-rebuild switch --flake .#<hostname>` to build and deploy the configuration for a specific host.
- **CRITICAL — Validate before pushing:** Always `nix build --no-link '.#nixosConfigurations.<hostname>.config.system.build.toplevel'` (or `nh os build`) and confirm it succeeds before pushing any changes. Never push untested NixOS configs.
- **Development Shell:** Activate the development environment with `nix develop`.
## Linting & Formatting

View File

@@ -1,106 +0,0 @@
# ollama-gfx906/Dockerfile
#
# Custom ollama image with ROCm 6.1 + gfx906 (MI50) support.
# The official ollama/rocm image ships ROCm 7.2 which dropped gfx906.
# This uses v0.23.2's native CMake build system with AMDGPU_TARGETS including gfx906.
#
# Build: docker build -t ollama/ollama:rocm-gfx906 ai/ollama
FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder
# Build dependencies (CMake, Ninja, Go)
ARG CMAKEVERSION=3.31.2
ARG NINJAVERSION=1.12.1
ARG GOLANG_VERSION=1.22.0
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
curl git ccache build-essential pkg-config unzip \
&& rm -rf /var/lib/apt/lists/*
# Install CMake from official binaries
RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-x86_64.tar.gz \
| tar xz -C /usr/local --strip-components 1
# Install Ninja
RUN curl -fsSL -o /tmp/ninja.zip \
https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux.zip \
&& unzip /tmp/ninja.zip -d /usr/local/bin && rm /tmp/ninja.zip
# Install Go
RUN curl -fsSL https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz \
| tar xz -C /usr/local
ENV PATH=/usr/local/go/bin:$PATH
ARG OLLAMA_VERSION=v0.23.2
RUN git clone --depth 1 --branch ${OLLAMA_VERSION} https://github.com/ollama/ollama.git /build
WORKDIR /build
# ROCm paths
ENV HIP_PATH=/opt/rocm
ENV ROCM_PATH=/opt/rocm
ENV CMAKE_GENERATOR=Ninja
ENV LDFLAGS=-s
# Step 1: Build CPU backends with GCC (no ROCm preset)
# Pre-set CMAKE_HIP_COMPILER="" to prevent check_language(HIP) from
# finding a HIP compiler (it searches /opt/rocm even without PATH).
# Remove /opt/rocm from PATH to prevent find_program from finding hipcc.
RUN mkdir -p build-cpu && \
PATH=/usr/local/go/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
cmake -B build-cpu -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_HIP_COMPILER="" \
-DCMAKE_INSTALL_PREFIX=/build/dist && \
cmake --build build-cpu --target ggml-cpu -- -l $(nproc) && \
cmake --install build-cpu --component CPU --strip && \
echo "=== CPU install ===" && \
(find /build/dist/lib/ollama -type f -o -type l 2>&1 | head -20 || echo "empty")
# Step 2: Build HIP backend with ROCm preset + gfx906 target only
# The ROCm 6 preset enables HIP language detection (enable_language(HIP))
# which ensures GPU kernels are properly compiled for gfx906.
# OLLAMA_RUNNER_DIR=rocm from the preset, so HIP goes to lib/ollama/rocm/
# Need CMAKE_PREFIX_PATH so find_package(hip) finds hip-config.cmake
# at /opt/rocm/lib/cmake/hip/hip-config.cmake.
RUN mkdir -p build-hip && \
cmake -B build-hip \
--preset 'ROCm 6' \
-DAMDGPU_TARGETS="gfx906:xnack-" \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_PREFIX_PATH="/opt/rocm" && \
cmake --build build-hip --target ggml-hip -- -l $(nproc) && \
cmake --install build-hip --component HIP --strip && \
echo "=== HIP install ===" && \
find /build/dist/lib/ollama -type f -o -type l | head -20
# Step 3: Build Go binary (GCC for CGo linking)
ENV CGO_ENABLED=1
RUN go build -trimpath -ldflags="-X=github.com/ollama/ollama/version.Version=${OLLAMA_VERSION}" -o /build/dist/ollama .
# ---------- Runtime image ----------
FROM ubuntu:24.04
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
ca-certificates curl libstdc++6 libgomp1 libvulkan1 libopenblas0 \
&& rm -rf /var/lib/apt/lists/*
# Copy ROCm 6.1 runtime libraries
# These are needed at runtime by ggml-hip via LD_LIBRARY_PATH
COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/
COPY --from=builder /opt/rocm/share/ /opt/rocm/share/
# Copy ollama binary + all backends (CPU + HIP)
# CPU install: /build/dist/lib/ollama/libggml-*.so
# HIP install: /build/dist/lib/ollama/rocm/libggml-hip.so
COPY --from=builder /build/dist/ollama /usr/bin/ollama
COPY --from=builder /build/dist/lib/ollama/ /usr/lib/ollama/
RUN ldconfig
ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama/rocm:/usr/lib/ollama
ENV HSA_OVERRIDE_GFX_VERSION=9.0.6
ENV HCC_AMDGPU_TARGET=gfx906
ENV HSA_ENABLE_SDMA=0
EXPOSE 11434
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]

View File

@@ -0,0 +1,474 @@
# Nix Installation for Hermes Agent Container
This guide covers several approaches for installing Nix in the Hermes Agent Docker
container to enable remote NixOS deployment via `nixos-rebuild`. It covers both
x86_64 (lazyworkhorse) and aarch64 (cyt-pi, uConsole) architectures.
## Table of Contents
1. [Why Nix in a Container?](#why-nix-in-a-container)
2. [Prerequisites](#prerequisites)
3. [Installation Methods](#installation-methods)
- [Method A: Determinate Systems Installer](#method-a-determinate-systems-installer-recommended)
- [Method B: Vanilla Nix Installer](#method-b-vanilla-nix-installer)
- [Method C: NixOS-Based Container Image](#method-c-nixos-based-container-image)
4. [Architecture-Specific Notes](#architecture-specific-notes)
- [x86_64 (lazyworkhorse)](#x86_64-lazyworkhorse)
- [aarch64 (cyt-pi, uConsole)](#aarch64-cyt-pi-uconsole)
- [Cross-Compilation](#cross-compilation)
5. [Post-Install Configuration](#post-install-configuration)
6. [Verification](#verification)
7. [Container-Specific Considerations](#container-specific-considerations)
- [Persistence](#persistence)
- [Disk Space](#disk-space)
- [Security](#security)
- [Resource Constraints](#resource-constraints)
8. [Integration with deploy.sh](#integration-with-deploysh)
9. [Troubleshooting](#troubleshooting)
10. [References](#references)
---
## Why Nix in a Container?
The Hermes Agent container runs on an Ubuntu/Debian base. To deploy NixOS
configurations to remote hosts, we need:
- `nix` — the Nix package manager (for building configurations)
- `nixos-rebuild` — the NixOS deployment tool
- Access to the infra repo with flake configuration
Installing Nix inside the container avoids:
- Host-level Nix installation on the Docker host
- Cross-container volume mounts of /nix/store
- Dependencies on the host's Nix daemon (which may be a different version)
## Prerequisites
- Docker host running Linux (x86_64 and/or aarch64)
- Container base: Debian/Ubuntu (apt-based)
- 1-2 GB additional disk space for Nix store
- Network access to cache.nixos.org (or a local binary cache)
- Git access to the infra repository
## Installation Methods
### Method A: Determinate Systems Installer (Recommended)
The Determinate Systems installer is the recommended approach. It is non-interactive,
sets up flakes by default, and handles multi-user installation cleanly.
**Dockerfile additions:**
```dockerfile
# Install Nix (Determinate Systems installer)
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
xz-utils \
&& rm -rf /var/lib/apt/lists/*
# Download and run Nix installer (non-interactive)
RUN curl --proto '=https' --tlsv1.2 -sSf -L https://install.determinate.systems/nix \
-o /tmp/nix-install.sh \
&& chmod +x /tmp/nix-install.sh \
&& sh /tmp/nix-install.sh install --no-confirm \
&& rm /tmp/nix-install.sh
# Configure Nix for flakes
RUN mkdir -p /root/.config/nix \
&& echo 'experimental-features = nix-command flakes' > /root/.config/nix/nix.conf
# Add Nix to PATH for all users
ENV PATH="/nix/var/nix/profiles/default/bin:$PATH"
```
**Pros:**
- Fully non-interactive (--no-confirm)
- Enables flakes automatically
- Sets up multi-user daemon
- Auto-selects correct architecture
- Handles upgrades gracefully
**Cons:**
- Downloads ~100 MB installer
- Requires systemd in container (works with --privileged or cgroupv2)
- Daemon mode may conflict with container exit semantics
**Container runtime additions:**
For the Nix daemon to work properly inside a container, you may need:
```dockerfile
# Ensure /nix is a volume for persistence
VOLUME /nix
# Or mount tmpfs for ephemeral builds:
# docker run --tmpfs /nix:exec,size=4G ...
```
### Method B: Vanilla Nix Installer
The official single-user Nix installer is lighter but requires manual flake setup.
**Dockerfile additions:**
```dockerfile
# Install Nix (single-user, official installer)
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
sudo \
xz-utils \
&& rm -rf /var/lib/apt/lists/*
# Install Nix as root (single-user)
RUN curl -L https://nixos.org/nix/install -o /tmp/nix-install.sh \
&& chmod +x /tmp/nix-install.sh \
&& sh /tmp/nix-install.sh --no-daemon \
&& rm /tmp/nix-install.sh
# Enable flakes
RUN mkdir -p /root/.config/nix \
&& echo 'experimental-features = nix-command flakes' > /root/.config/nix/nix.conf
# Source Nix in shell
RUN echo '. /root/.nix-profile/etc/profile.d/nix.sh' >> /root/.bashrc
ENV PATH="/root/.nix-profile/bin:$PATH"
```
**Pros:**
- Smaller installer
- No daemon needed (single-user mode)
- Works in containers without systemd
- Simpler container lifecycle
**Cons:**
- Manual flake configuration required
- Single-user only (no multi-user isolation)
- PATH must be set manually
- No automatic garbage collection
### Method C: NixOS-Based Container Image
For maximum isolation, use an official NixOS base image for the build stage.
**Multi-stage Dockerfile:**
```dockerfile
# Build stage: NixOS builder
FROM nixos/nix:latest AS builder
COPY infra /infra
WORKDIR /infra
# Build the configuration once
RUN nix build '.#nixosConfigurations.lazyworkhorse.config.system.build.toplevel'
# Final stage: Hermes container
FROM ubuntu:22.04
# Copy the Nix closure and binary cache
COPY --from=builder /nix /nix
# ... rest of Hermes setup
```
**Pros:**
- Purely declarative build environment
- No installation at runtime
- Easy to pin Nix version
- Good for CI/CD pipelines
**Cons:**
- Requires multi-stage Docker build
- Larger initial image build
- Harder to update Nix version at runtime
- Overkill if Nix is only needed for `nixos-rebuild`
---
## Architecture-Specific Notes
### x86_64 (lazyworkhorse)
The Hermes container likely runs on x86_64 hardware for the primary server.
Nix will download x86_64 binaries from cache.nixos.org by default.
**No special configuration needed** — the standard installer works out of the box.
If the container is running on an AMD Ryzen/EPYC or Intel Xeon, consider:
```bash
# Enable CPU-specific optimizations (optional)
echo 'extra-platforms = x86_64-v1 x86_64-v2 x86_64-v3' >> /root/.config/nix/nix.conf
```
### aarch64 (cyt-pi, uConsole)
When building for aarch64 targets from an x86_64 container, you need either:
1. Remote builder (aarch64 machine does the build), or
2. QEMU-based emulation (slower but self-contained), or
3. Build directly on the aarch64 target using `--build-host`
**For QEMU emulation in the container:**
```dockerfile
# Enable binfmt for aarch64 emulation
RUN apt-get update && apt-get install -y --no-install-recommends \
qemu-user-static \
binfmt-support \
&& rm -rf /var/lib/apt/lists/*
# Register aarch64 binfmt
RUN update-binfmts --enable qemu-aarch64
```
**Container runtime (for QEMU):**
```bash
docker run --privileged --rm ... hermes-agent
# Or with specific capability:
docker run --cap-add=SYS_ADMIN --security-opt seccomp=unconfined ... hermes-agent
```
### Cross-Compilation
For native cross-compilation (without emulation), add to your Nix configuration:
```nix
# In your flake.nix or nix.conf
{
nix.settings.extra-platforms = [ "aarch64-linux" "x86_64-linux" ];
nix.settings.extra-sandbox-paths = [ ];
boot.binfmt.emulatedSystems = [ "aarch64-linux" ];
}
```
Or in `nix.conf`:
```
extra-platforms = x86_64-linux aarch64-linux
extra-sandbox-paths =
```
---
## Post-Install Configuration
### nix.conf for Container Usage
Recommended `/root/.config/nix/nix.conf`:
```ini
experimental-features = nix-command flakes
substituters = https://cache.nixos.org/
trusted-users = root
max-jobs = auto
cores = 0
sandbox = false
```
Note: `sandbox = false` is needed inside containers that lack full sandbox
support. This is safe in a single-tenant container environment.
### PATH Setup
Add to your Dockerfile:
```dockerfile
ENV PATH="/nix/var/nix/profiles/default/bin:/root/.nix-profile/bin:${PATH}"
```
### Shell Integration
```dockerfile
RUN echo 'source /root/.nix-profile/etc/profile.d/nix.sh' >> /root/.bashrc
```
---
## Verification
After installation, verify with:
```bash
# Check Nix is available
nix --version
# Check nixos-rebuild
nixos-rebuild --help | head -3
# Verify flakes are enabled
nix flake --help
# Test a build (must be in infra repo)
cd /opt/data/infra
nix build --no-link '.#nixosConfigurations.lazyworkhorse.config.system.build.toplevel'
# Check available systems
nix eval --impure --expr 'builtins.currentSystem'
```
---
## Container-Specific Considerations
### Persistence
The `/nix` directory should be a Docker volume to avoid re-downloading
packages on every container restart:
```yaml
# docker-compose.yml
volumes:
- nix-store:/nix
volumes:
nix-store:
```
Without persistence, every container restart requires re-downloading the
entire Nix store (~500 MB - 2 GB depending on packages used).
### Disk Space
The Nix store grows over time as old generations accumulate. Set up garbage
collection:
```bash
# Manual GC
nix store gc
# Remove old generations
nix-collect-garbage --delete-older-than 30d
# Automatic GC (in nix.conf)
# Currently not supported in nix.conf, but you can run a cron job:
# nix store gc --max 10G
```
In Docker, limit store growth with:
```dockerfile
# Configure max store size
RUN mkdir -p /etc/nix && \
echo 'min-free = 5368709120' > /etc/nix/nix.conf # Keep 5GB free
```
### Security
Running Nix in a container introduces some security considerations:
1. **Sandboxing:** `sandbox = false` disables build isolation. In a multi-tenant
container, this means Nix builds can affect the container filesystem.
**Mitigation:** Only build configs you trust (your own infra repo).
2. **Network access:** The container needs outbound access to cache.nixos.org.
If using a restricted network, set up a local binary cache:
```nix
substituters = https://cache.nixos.org/ https://nix-cache.internal/
```
3. **Privileged mode:** QEMU emulation for aarch64 builds may need `--privileged`
or `--security-opt seccomp=unconfined`. This reduces container isolation.
**Mitigation:** Use remote builders or build natively on the target.
4. **Supply chain:** Nix derivations pin exact inputs via hashes. Verify
flake.lock is committed and reviewed.
### Resource Constraints
Nix builds can be memory and CPU intensive:
```nix
# Limit build parallelism in nix.conf
max-jobs = 2
cores = 4
# Or set per-build:
# nix build --max-jobs 2 --cores 4
```
For containers with limited memory (< 2 GB), consider:
- Building on the target host instead (`--build-host`)
- Using the deploy script's `build` action separately
---
## Integration with deploy.sh
The deployment script at `scripts/deploy.sh` expects:
1. **Nix installed** with flakes enabled
2. **SSH key** at `/opt/data/home/.ssh/id_hermes_gitea` (or via SSH_KEY env)
3. **Infra repo** cloned at the script's parent directory
4. **Network access** to:
- `code.lazyworkhorse.net:2222` (Gitea for git operations)
- Target hosts via SSH (see deploy-ssh-config)
- `cache.nixos.org` or a local substitute
Typical usage from Hermes:
```bash
# Full deployment
./scripts/deploy.sh lazyworkhorse master switch
# Build-only check (no remote deployment)
./scripts/deploy.sh cyt-pi master build
# Dry run
./scripts/deploy.sh uConsole feat/test dry-activate
# Override SSH key
SSH_KEY=/opt/data/home/.ssh/my-custom-key ./deploy.sh lazyworkhorse
```
---
## Troubleshooting
### "nix: command not found"
- Ensure Nix is installed and PATH is set:
```bash
export PATH="/nix/var/nix/profiles/default/bin:/root/.nix-profile/bin:$PATH"
```
- Check installation: `ls -la /nix/` should exist
- Re-source profile: `. /root/.nix-profile/etc/profile.d/nix.sh`
### "error: unable to download ... cache.nixos.org"
- Check network connectivity: `ping cache.nixos.org`
- Check DNS resolution from inside the container
- If behind a proxy, set `http_proxy` / `https_proxy` environment variables
### "sandbox: cannot run build in sandbox"
- Add `sandbox = false` to nix.conf
- Or run container with `--privileged` or `--security-opt seccomp=unconfined`
### "aarch64-linux builds fail on x86_64"
- QEMU binfmt not registered. Check: `ls /proc/sys/fs/binfmt_misc/`
- Rebuild QEMU registration: `docker run --privileged --rm tonistiigi/binfmt --install all`
- Or use `--build-host` to build on the target directly
### "nixos-rebuild fails with SSH errors"
- Verify SSH key exists and has correct permissions:
```bash
ls -la /opt/data/home/.ssh/id_hermes_gitea
chmod 600 /opt/data/home/.ssh/id_hermes_gitea
```
- Test SSH manually: `ssh -p 2424 -i /opt/data/home/.ssh/id_hermes_gitea ai-worker@lazyworkhorse.net`
- Check target host is reachable: `nc -zv lazyworkhorse.net 2424`
### "git fetch fails from Gitea"
- Verify GIT_SSH_COMMAND is set: `echo $GIT_SSH_COMMAND`
- Test git SSH: `ssh -T git@code.lazyworkhorse.net -p 2222`
- Check the infra repo remote: `git remote -v`
---
## References
- [Determinate Systems Nix Installer](https://github.com/DeterminateSystems/nix-installer)
- [NixOS Manual: Installation](https://nixos.org/manual/nix/stable/installation/)
- [NixOS Wiki: Flakes](https://nixos.wiki/wiki/Flakes)
- [NixOS Wiki: nixos-rebuild](https://nixos.wiki/wiki/Nixos-rebuild)
- [NixOS Wiki: Cross Compilation](https://nixos.wiki/wiki/Cross_Compilation)
- [Multi-arch Docker with QEMU](https://github.com/multiarch/qemu-user-static)

View File

@@ -61,7 +61,6 @@
./modules/nixos/services/open_code_server.nix
./modules/nixos/services/ollama_init_custom_models.nix
./modules/nixos/services/openclaw_node.nix
./modules/nixos/security/ai-worker-restricted.nix
./users/gortium.nix
./users/ai-worker.nix
];

View File

@@ -36,7 +36,7 @@
"transparent_hugepage=always" # because mucho ram
];
# 2. Load the specific drivers found by sensors-detect
boot.kernelModules = [ "nct6775" "lm96163" "iptable_nat" "iptable_filter" ];
boot.kernelModules = [ "nct6775" "lm96163" ];
# 3. Force the nct6775 driver to recognize the chip if it's stubborn
boot.extraModprobeConfig = ''
options nct6775 force_id=0xd280
@@ -49,26 +49,6 @@
networking.networkmanager.enable = true; # Easiest to use and most distros use this by default.
networking.hostId = "deadbeef";
# WireGuard VPN client -- always up, connects to wg-easy server
# Create age-encrypted secrets before deploying (run on the host):
# echo -n "<private_key>" | agenix -e secrets/wireguard_private_key.age
# echo -n "<preshared_key>" | agenix -e secrets/wireguard_preshared_key.age
networking.wireguard.interfaces = {
wg0 = {
ips = [ "10.8.0.3/24" ];
privateKeyFile = config.age.secrets.wireguard_private_key.path;
peers = [
{
publicKey = "rY9zII3AOm8rog2rv02PyA3Bq7zdvTOGkZapfCV1DkE=";
presharedKeyFile = config.age.secrets.wireguard_preshared_key.path;
allowedIPs = [ "10.8.0.0/24" ];
endpoint = "vpn.lazyworkhorse.net:51820";
persistentKeepalive = 25;
}
];
};
};
# Set your time zone.
time.timeZone = "America/Montreal";
@@ -178,7 +158,7 @@
settings = {
PasswordAuthentication = false;
KbdInteractiveAuthentication = false;
# Additional hardening settings below in SERVER HARDENING section
PermitRootLogin = "prohibit-password";
};
hostKeys = [
{
@@ -207,7 +187,6 @@
ai = {
path = self + "/assets/compose/ai";
envFile = config.age.secrets.containers_env.path;
ports = [ 22000 ]; # Syncthing TCP sync
};
cloudstorage = {
@@ -242,11 +221,6 @@
path = self + "/assets/compose/homepage";
};
vpn = {
path = self + "/assets/compose/vpn";
envFile = config.age.secrets.containers_env.path;
};
# tak = {
# path = self + "/assets/compose/tak";
# };
@@ -290,20 +264,6 @@
mode = "0440";
path = "/run/secrets/openclaw_gateway_token";
};
wireguard_private_key = {
file = ../../secrets/wireguard_private_key.age;
owner = "root";
group = "root";
mode = "0400";
path = "/run/secrets/wireguard_private_key";
};
wireguard_preshared_key = {
file = ../../secrets/wireguard_preshared_key.age;
owner = "root";
group = "root";
mode = "0400";
path = "/run/secrets/wireguard_preshared_key";
};
};
};
@@ -348,203 +308,6 @@
# Or disable the firewall altogether.
# networking.firewall.enable = false;
# =============================================================================
# SERVER HARDENING - Firewall, Fail2ban, SSH, Kernel
# =============================================================================
# Firewall - default deny, explicit allow
networking.firewall = {
# Enable firewall with default deny policy (NixOS firewall denies all by default)
enable = true;
allowPing = true;
# Only essential ports exposed to internet
allowedTCPPorts = [
2424 # SSH (non-standard port)
2222 # Gitea (version control)
80 # HTTP (Traefik redirect)
443 # HTTPS (Traefik)
# 8000 # Portainer - REVIEW: internal only?
# 4242 # Coms - REVIEW: internal only?
# 5000 # TAK API - REVIEW: internal only?
# 8087 # TAK Connect - REVIEW: internal only?
# 8089 # TAK Management - REVIEW: internal only?
];
allowedUDPPorts = [
51820 # WireGuard VPN
];
# Rate limiting and attack prevention
extraCommands = ''
# 1. Wipe the INPUT chain clean at the start of every activation
iptables -F INPUT
# Rate limit SSH connections (max 20 new connections per 60 seconds)
iptables -A INPUT -p tcp --dport 2424 -m state --state NEW -m recent --set
iptables -A INPUT -p tcp --dport 2424 -m state --state NEW -m recent --update --seconds 60 --hitcount 20 -j DROP
# Rate limit HTTP/HTTPS (protects Traefik)
iptables -A INPUT -p tcp --dport 80 -m state --state NEW -m limit --limit 25/minute --limit-burst 100 -j ACCEPT
iptables -A INPUT -p tcp --dport 443 -m state --state NEW -m limit --limit 25/minute --limit-burst 100 -j ACCEPT
# Drop invalid packets
iptables -A INPUT -m state --state INVALID -j DROP
# Log dropped packets (rate limited)
iptables -A INPUT -m limit --limit 5/min -j LOG --log-prefix "IPTables-Dropped: " --log-level 4
# 3. CRITICAL: Re-link the NixOS default firewall chain
# Without this line, the 'allowedTCPPorts' in your Nix config will be ignored!
iptables -A INPUT -j nixos-fw
'';
};
# Fail2ban - automatic IP banning
services.fail2ban = {
enable = true;
maxretry = 3;
bantime = "1h";
banaction = "iptables-multiport";
jails = {
# SSH brute force protection (uses systemd journal backend)
sshd = {
enabled = true;
settings = {
filter = "sshd";
port = "2424";
maxretry = 3;
bantime = "1h";
};
};
# Recidive - ban repeat offenders for 1 week
recidive = {
enabled = true;
settings = {
filter = "recidive";
logpath = "/var/log/fail2ban.log";
bantime = "1w";
findtime = "1d";
maxretry = 3;
};
};
# HTTP authentication failures (Traefik)
http-auth = {
enabled = true;
settings = {
filter = "traefik-auth";
port = "80,443";
logpath = "/var/log/traefik/access.log";
maxretry = 5;
bantime = "1h";
};
};
# HTTP scanning/attacks (Traefik)
http-botsearch = {
enabled = true;
settings = {
filter = "traefik-botsearch";
port = "80,443";
logpath = "/var/log/traefik/access.log";
maxretry = 2;
bantime = "2h";
};
};
};
};
# Custom fail2ban filters for Traefik
environment.etc."fail2ban/filter.d/traefik-auth.conf".text = ''
[Definition]
failregex = ^<HOST> -.*"(GET|POST|HEAD|PUT|DELETE).*" (401|403) \d+.*$
ignoreregex =
'';
environment.etc."fail2ban/filter.d/traefik-botsearch.conf".text = ''
[Definition]
failregex = ^<HOST> -.*"(GET|POST|HEAD|PUT|DELETE).*" 404 \d+.*$
^<HOST> -.*"(GET|POST|HEAD|PUT|DELETE).*/(\.|wp-|php|admin|login|xmlrpc|\.env|\.git|\.aws|\.azure).*" \d+.*$
ignoreregex =
'';
# SSH hardening
services.openssh.settings = {
PermitRootLogin = "no";
MaxAuthTries = 3;
MaxSessions = 20;
LoginGraceTime = 30;
ClientAliveInterval = 300;
ClientAliveCountMax = 2;
PermitEmptyPasswords = "no";
ChallengeResponseAuthentication = "no";
UsePAM = true;
LogLevel = "VERBOSE";
X11Forwarding = false;
AllowTcpForwarding = "no";
AllowAgentForwarding = "no";
PermitTunnel = "no";
};
# Kernel network hardening
boot.kernel.sysctl = {
# IP Spoofing protection
"net.ipv4.conf.all.rp_filter" = 1;
"net.ipv4.conf.default.rp_filter" = 1;
# Ignore ICMP broadcasts
"net.ipv4.icmp_echo_ignore_broadcasts" = 1;
# Disable source routing
"net.ipv4.conf.all.accept_source_route" = 0;
"net.ipv4.conf.default.accept_source_route" = 0;
"net.ipv6.conf.all.accept_source_route" = 0;
"net.ipv6.conf.default.accept_source_route" = 0;
# Disable redirects
"net.ipv4.conf.all.send_redirects" = 0;
"net.ipv4.conf.default.send_redirects" = 0;
# SYN flood protection
"net.ipv4.tcp_syncookies" = 1;
"net.ipv4.tcp_max_syn_backlog" = 2048;
"net.ipv4.tcp_synack_retries" = 2;
"net.ipv4.tcp_syn_retries" = 5;
# Log martian packets
"net.ipv4.conf.all.log_martians" = 1;
"net.ipv4.conf.default.log_martians" = 1;
# Ignore redirects
"net.ipv4.conf.all.accept_redirects" = 0;
"net.ipv4.conf.default.accept_redirects" = 0;
"net.ipv4.conf.all.secure_redirects" = 0;
"net.ipv4.conf.default.secure_redirects" = 0;
"net.ipv6.conf.all.accept_redirects" = 0;
"net.ipv6.conf.default.accept_redirects" = 0;
# Connection tuning
"net.core.somaxconn" = 4096;
"net.core.netdev_max_backlog" = 65536;
"net.ipv4.tcp_max_orphans" = 65536;
"net.ipv4.tcp_fin_timeout" = 15;
"net.ipv4.tcp_keepalive_time" = 300;
"net.ipv4.tcp_keepalive_probes" = 5;
"net.ipv4.tcp_keepalive_intvl" = 15;
};
# Audit logging
security.auditd.enable = true;
# Fail2ban log directory
systemd.tmpfiles.rules = [
"d /var/log/fail2ban 0755 root root -"
"d /var/log/traefik 0755 root root -"
];
# Copy the NixOS configuration file and link it from the resulting system
# (/run/current-system/configuration.nix). This is useful in case you
# accidentally delete configuration.nix.

View File

@@ -1,125 +0,0 @@
# AI Worker Restricted Access
This module provides SSH access for the AI worker (hermes-agent) to run docker commands on the host with restrictions.
## Security Model
### Overview
The `ai-worker` user is a member of the `docker` group, but the `docker` binary is wrapped with a script that **blocks dangerous subcommands** while allowing safe operations.
### Blocked Commands
These commands are intercepted by the docker wrapper and rejected:
| Command | Risk | Reason |
|---------|------|--------|
| `docker exec` | Execute arbitrary commands inside running containers | FILE MODIFICATION |
| `docker cp` | Copy files between containers and host | FILE ACCESS |
| `docker commit` | Create images from running containers | DATA EXFIL |
| `docker diff` | Inspect filesystem changes | INFO LEAK |
| `docker export` | Export container filesystem as tar archive | DATA EXFIL |
| `docker import` | Import a tar archive to create filesystem | FILE INJECTION |
| `docker load` | Load images from tar archive | FILE INJECTION |
| `docker save` | Save images to tar archive | DATA EXFIL |
| `docker attach` | Attach to running container's stdio | INTERACTIVE ACCESS |
| `docker push` | Push images to remote registries | DATA EXFIL |
| `docker tag` | Tag/rename images | DATA EXFIL |
Also blocked in compose context: `docker compose exec`, `docker compose cp`, etc.
### Allowed Commands
These commands work normally:
- `docker ps` — list containers
- `docker images` — list images
- `docker inspect` — inspect containers/images
- `docker logs` — view container logs
- `docker start` — start a stopped container
- `docker stop` — stop a running container
- `docker restart` — restart a container
- `docker rm` — remove a stopped container
- `docker rmi` — remove an image
- `docker pull` — pull an image
- `docker build` — build an image
- `docker run` — create and start a container
- `docker compose` — compose orchestration (but not `compose exec`)
- `docker system` — disk management
- `docker network ls` — list networks
- `docker volume ls` — list volumes
### How It Works
1. A wrapper script intercepts `docker` calls in the user's PATH
2. It parses the first non-flag argument to determine the subcommand
3. If the subcommand is in the blocklist, it prints an error and exits
4. Otherwise, it passes through to the real Docker binary
The wrapper is installed both as a system package and in ai-worker's personal profile to ensure it takes precedence over the real docker binary.
### Why Not Use Docker Authorization Plugins?
Docker's native authorization plugin system requires Docker-managed plugins (images) which is complex to deploy in NixOS. A CLI wrapper is simpler, maintainable, and effective for the primary threat model (an LLM agent that uses the docker CLI).
Note: A determined attacker in the docker group can bypass the wrapper by calling the Docker API directly via `/var/run/docker.sock`. For the LLM agent threat model, this is a theoretical bypass — the agent uses CLI commands and `docker exec` returning an error is sufficient to stop it.
### Filesystem Access
- **Home directory**: `/home/ai-worker` (standard user home)
- **No bind mounts**: Cannot access `/home/gortium/infra` or other host files
- **Cannot access**: Any files outside standard system paths
## SSH Access
Connect as:
```bash
ssh ai-worker@lazyworkhorse
```
The working directory will be `/home/ai-worker`. No infra repo access.
## Verification
```bash
# Verify wrapper is in PATH
sudo -u ai-worker which docker
# Should show: /home/ai-worker/.nix-profile/bin/docker (wrapped version)
# Test blocked command (should fail)
sudo -u ai-worker docker exec ollama ollama list
# Expected: ERROR: docker 'exec' is blocked by security policy
# Test allowed command (should work)
sudo -u ai-worker docker ps
# Expected: CONTAINER ID IMAGE ...
# Verify docker group membership
groups ai-worker
# Should show: ai-worker docker
```
## Troubleshooting
If docker commands fail unexpectedly:
```bash
# Check which docker binary is being used
which docker
# If this shows /run/current-system/sw/bin/docker, the wrapper is not in PATH
# Check if the wrapper is installed
ls -la $(which docker)
# Verify you're running as the right user
whoami
```
If SSH connection fails:
```bash
# Check SSH key is authorized
cat /home/ai-worker/.ssh/authorized_keys
# Check SSH service
systemctl status sshd
```

View File

@@ -1,124 +0,0 @@
{ config, pkgs, lib, ... }:
with lib;
let
# Docker subcommands that are BLOCKED for ai-worker
# These commands allow file modification inside containers or data exfiltration.
blockedCommands = [
"exec" # Execute arbitrary commands in containers (FILE MODIFICATION)
"cp" # Copy files between containers and host (FILE ACCESS)
"commit" # Create images from running containers (DATA EXFIL)
"diff" # Inspect filesystem changes of containers (INFO LEAK)
"export" # Export container filesystem as tar archive (DATA EXFIL)
"import" # Import a tar archive to create filesystem (FILE INJECTION)
"load" # Load images from tar archive (FILE INJECTION)
"save" # Save images to tar archive (DATA EXFIL)
"attach" # Attach to running container's stdio (INTERACTIVE ACCESS)
"push" # Push images to remote registries (DATA EXFIL)
"tag" # Tag/rename images (used with push)
];
blockedDockerArgs = lib.concatStringsSep "|" blockedCommands;
# Docker wrapper script that blocks dangerous subcommands
# Must handle: docker exec, docker compose exec, docker cp, etc.
restrictedDockerScript = pkgs.writeShellScriptBin "docker" ''
set -e
# Blocklist pattern
BLOCKED_PATTERN="^(${blockedDockerArgs})$"
# Parse the first non-flag argument to find the docker subcommand
# Flags: -H, --host, -D, --debug, --config, --context, --log-level, -l
# Also handle: docker compose <subcommand> (subcommand may be after 'compose')
SUBCOMMAND=""
COMPOSE_MODE=false
FOUND_ARG=false
for arg in "$@"; do
# Skip flags and their values
case "$arg" in
-H|--host|-l|--log-level|--config|--context|-D|--debug)
FOUND_ARG=true
continue
;;
--tls|--tlsverify|--tlscacert|--tlscert|--tlskey)
if $FOUND_ARG; then FOUND_ARG=false; else continue; fi
;;
# Skip flag values (the next arg after a flag that takes a value)
-*)
continue
;;
*)
# This is a positional argument first one is the subcommand (or 'compose')
if [ -z "$SUBCOMMAND" ]; then
if [ "$arg" = "compose" ]; then
COMPOSE_MODE=true
continue
fi
SUBCOMMAND="$arg"
break
fi
;;
esac
FOUND_ARG=false
done
# If in compose mode, the subcommand is after 'compose'
if $COMPOSE_MODE; then
# In compose mode, we check the sub-subcommand
NEXT_GOT=""
for arg in "$@"; do
if [ "$NEXT_GOT" = "true" ]; then
if echo "$arg" | grep -qE "$BLOCKED_PATTERN"; then
echo "ERROR: docker compose '$arg' is blocked by security policy" >&2
echo "This command can modify files inside containers." >&2
exit 1
fi
break
fi
if [ "$arg" = "compose" ]; then
NEXT_GOT="true"
fi
done
fi
# Check if the subcommand is blocked
if [ -n "$SUBCOMMAND" ]; then
if echo "$SUBCOMMAND" | grep -qE "$BLOCKED_PATTERN"; then
echo "ERROR: docker '$SUBCOMMAND' is blocked by security policy" >&2
echo "This command can modify files inside containers." >&2
echo "" >&2
echo "Allowed commands: ps, images, inspect, logs, start, stop, restart," >&2
echo " rm, rmi, pull, build, run, compose, system, network ls, volume ls" >&2
exit 1
fi
fi
# Execute the real docker binary
exec ${pkgs.docker}/bin/docker "$@"
'';
in
{
options.services.aiWorkerAccess = mkOption {
type = types.bool;
default = false;
description = "Enable AI worker SSH access with restricted docker commands";
};
config = mkIf config.services.aiWorkerAccess {
# ai-worker is in docker group for normal docker operations
users.groups.docker.members = [ "ai-worker" ];
# Install the docker wrapper for ai-worker
# This puts a filtered 'docker' script in ai-worker's PATH that blocks
# dangerous commands like exec, cp, commit, etc.
# The real docker binary is still available at its store path, but the
# wrapper intercepts it because ~/.nix-profile/bin/ comes before /run/.../sw/bin/ in PATH.
users.users.ai-worker.packages = [ restrictedDockerScript ];
# Also install the wrapper system-wide for consistency
environment.systemPackages = [ restrictedDockerScript ];
};
}

View File

@@ -1,87 +1,45 @@
{ pkgs, ... }: {
systemd.services.init-ollama-model = {
description = "Initialize LLM models with extra context in Ollama Docker";
# On s'assure que Docker tourne avant de lancer ce script
after = [ "docker.service" ];
after = [ "docker-ollama.service" ];
wantedBy = [ "multi-user.target" ];
script = ''
# Fonction de création asynchrone pour ne pas bloquer le démarrage
(
echo "Starting asynchronous Ollama initialization..."
# Attente d'Ollama (maximum 120 secondes pour éviter une boucle infinie)
TIMEOUT=60
COUNT=0
while ! ${pkgs.curl}/bin/curl -s -f http://127.0.0.1:11434/api/tags > /dev/null; do
if [ $COUNT -ge $TIMEOUT ]; then
echo "Ollama did not become ready in time. Exiting."
exit 1
fi
echo "Waiting for Ollama API to be reachable..."
sleep 5
COUNT=$((COUNT + 5))
done
# Wait for Ollama
while ! ${pkgs.curl}/bin/curl -s http://localhost:11434/api/tags > /dev/null; do
sleep 2
done
create_model_if_missing() {
local model_name=$1
local base_model=$2
# Vérification robuste via l'API HTTP d'Ollama plutôt que docker exec (évite les conflits de tty)
if ! ${pkgs.curl}/bin/curl -s http://127.0.0.1:11434/api/tags | ${pkgs.jq}/bin/jq -e ".models[] | select(.name == \"$model_name\")" > /dev/null; then
echo "$model_name not found, creating from $base_model..."
# Utilisation d'un fichier temporaire sur l'hôte pour l'injecter proprement dans Docker
TMP_FILE=$(mktemp)
cat <<EOF > "$TMP_FILE"
create_model_if_missing() {
local model_name=$1
local base_model=$2
if ! ${pkgs.docker}/bin/docker exec ollama ollama list | grep -q "$model_name"; then
echo "$model_name not found, creating from $base_model..."
${pkgs.docker}/bin/docker exec ollama sh -c "cat <<EOF > /root/.ollama/$model_name.modelfile
FROM $base_model
TEMPLATE """{{- if .System }}
[SYSTEM_PROMPT]
{{ .System }}
[/SYSTEM_PROMPT]
{{- end }}
{{- range .Messages }}
{{- if eq .Role "user" }}
[INST]
{{ .Content }}
[/INST]
{{- else if eq .Role "assistant" }}
{{ .Content }}
{{- end }}
{{- end }}"""
PARAMETER num_ctx 131072
PARAMETER num_predict 4096
PARAMETER num_keep 1024
PARAMETER repeat_penalty 1.1
PARAMETER top_k 40
PARAMETER stop "[INST]"
PARAMETER stop "[/INST]"
PARAMETER stop "</s>"
EOF
PARAMETER stop \"[INST]\"
PARAMETER stop \"[/INST]\"
PARAMETER stop \"</s>\"
EOF"
${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f "/root/.ollama/$model_name.modelfile"
else
echo "$model_name already exists, skipping."
fi
}
# Copie et création dans le conteneur
${pkgs.docker}/bin/docker cp "$TMP_FILE" ollama:/tmp/model.modelfile
${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f /tmp/model.modelfile
${pkgs.docker}/bin/docker exec ollama rm /tmp/model.modelfile
rm -f "$TMP_FILE"
else
echo "$model_name already exists, skipping."
fi
}
# Create Nemotron
create_model_if_missing "nemotron-3-nano:30b-128k" "nemotron-3-nano:30b"
# Create Devstral
create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b"
) &
# Create Nemotron
create_model_if_missing "nemotron-3-nano:30b-128k" "nemotron-3-nano:30b"
# Create Devstral
create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b"
'';
serviceConfig = {
Type = "forking"; # Permet à systemd de savoir que le script passe en arrière-plan via '&'
User = "root";
Type = "oneshot";
RemainAfterExit = true;
};
};
}

63
scripts/deploy-ssh-config Normal file
View File

@@ -0,0 +1,63 @@
# Hermes Container SSH Configuration
# For NixOS deployment to remote hosts
#
# Usage:
# cp scripts/deploy-ssh-config ~/.ssh/config.d/hermes-include
# Or: cat scripts/deploy-ssh-config >> ~/.ssh/config
#
# This config covers all NixOS hosts managed from the Hermes container.
# Lazyworkhorse has two users: ai-worker (primary automation) and gortium (admin).
# Cyt-pi connects via reverse SSH tunnel on port 19999.
# uConsole is a placeholder until LAN-hostname resolution is confirmed.
# ── Global defaults ──────────────────────────────────────────────────
Host *
ServerAliveInterval 60
ServerAliveCountMax 3
TCPKeepAlive yes
Compression yes
CompressionLevel 6
ControlMaster auto
ControlPath ~/.ssh/controlmasters/%r@%h:%p
ControlPersist 10m
StrictHostKeyChecking no
UserKnownHostsFile /dev/null
# ── Hosts ──────────────────────────────────────────────────────────────
# Lazyworkhorse — x86_64 main server (ai-worker@lazyworkhorse.net:2424)
Host lazyworkhorse
HostName lazyworkhorse.net
User ai-worker
Port 2424
IdentityFile /opt/data/home/.ssh/id_hermes_gitea
# Lazyworkhorse — admin access (gortium@lazyworkhorse.net:2425)
Host lazyworkhorse-admin
HostName lazyworkhorse.net
User gortium
Port 2425
IdentityFile /opt/data/home/.ssh/id_hermes_gitea
# Cyt-pi — aarch64 Pi Zero 2 W
# Connected via reverse SSH tunnel (gortium directs tunnel to :19999)
Host cyt-pi
HostName localhost
User gortium
Port 19999
IdentityFile /opt/data/home/.ssh/id_hermes_gitea
# uConsole — aarch64 ClockworkPi (placeholder hostname)
# Replace uconsole.lan with actual IP/hostname when deployed
Host uConsole uconsole
HostName uconsole.lan
User gortium
Port 22
IdentityFile /opt/data/home/.ssh/id_hermes_gitea
# ── Gitea host — for git operations ──────────────────────────────────
Host code
HostName code.lazyworkhorse.net
Port 2222
User gortium
IdentityFile /opt/data/home/.ssh/id_hermes_gitea

286
scripts/deploy.sh Executable file
View File

@@ -0,0 +1,286 @@
#!/usr/bin/env bash
# NixOS Deployment Helper Script
# Remote NixOS deployment from Hermes container to target hosts.
#
# Usage: ./deploy.sh <hostname> [branch] [action]
#
# Actions:
# switch Activate configuration now (default)
# boot Activate on next reboot
# test Activate without switching generations
# build Build locally only, no remote activation
# dry-activate Show what would change without applying
#
# Examples:
# ./deploy.sh lazyworkhorse # deploy master/switch to lazyworkhorse
# ./deploy.sh cyt-pi feat/test boot # deploy feat/test branch, activate on boot
# ./deploy.sh uConsole master build # just build, don't deploy
# NO_BUILD_CHECK=1 ./deploy.sh uConsole # skip the pre-flight nix build
#
# Environment variables:
# SSH_USER SSH user (default: auto-detected per host)
# SSH_PORT SSH port (default: auto-detected per host)
# SSH_KEY SSH identity file
# BUILD_HOST Build flake for this host (default: same as target host)
# NO_BUILD_CHECK Set to 1 to skip local nix build before deployment
set -euo pipefail
# ── Colors ──────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
info() { echo -e "${BLUE}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
error() { echo -e "${RED}[ERROR]${NC} $*" >&2; }
step() { echo -e "\n${CYAN}━━━ $* ━━━${NC}"; }
# ── Cleanup trap ───────────────────────────────────────────────────────
cleanup() {
local ec=$?
if [ $ec -ne 0 ]; then
error "Deployment failed with exit code $ec"
fi
exit $ec
}
trap cleanup EXIT
# ── Usage / Help ───────────────────────────────────────────────────────
show_usage() {
cat <<EOF
Usage: $0 <hostname> [branch] [action]
Remote NixOS deployment from Hermes container to target hosts.
HOSTNAME (required):
lazyworkhorse x86_64 main server
cyt-pi aarch64 Pi Zero 2 W (via reverse tunnel)
uConsole aarch64 ClockworkPi
BRANCH (optional, default: master):
Git branch or tag to deploy. Fetched from origin.
ACTION (optional, default: switch):
switch Activate configuration now (default)
boot Activate on next reboot
test Activate without switching generations
build Build locally only, skip remote deployment
dry-activate Show what would change without applying
Environment variables:
SSH_USER SSH username override
SSH_PORT SSH port override
SSH_KEY SSH identity file path
BUILD_HOST Build flake hostname (default: same as HOSTNAME)
NO_BUILD_CHECK Skip local nix build validation (set to 1)
Examples:
$0 lazyworkhorse # deploy master/switch
$0 cyt-pi feat/test boot # deploy feature branch, boot
$0 uConsole master build # just build, no remote
NO_BUILD_CHECK=1 $0 uConsole # skip build check
EOF
exit 0
}
# ── Argument parsing ───────────────────────────────────────────────────
HOSTNAME="${1:-}"
BRANCH="${2:-master}"
ACTION="${3:-switch}"
NO_BUILD_CHECK="${NO_BUILD_CHECK:-0}"
if [ "$HOSTNAME" = "--help" ] || [ "$HOSTNAME" = "-h" ] || [ -z "$HOSTNAME" ]; then
show_usage
fi
# ── Host configuration ─────────────────────────────────────────────────
case "$HOSTNAME" in
lazyworkhorse)
DEFAULT_SSH_USER="ai-worker"
DEFAULT_SSH_PORT="2424"
ARCH="x86_64-linux"
;;
cyt-pi)
DEFAULT_SSH_USER="gortium"
DEFAULT_SSH_PORT="19999"
ARCH="aarch64-linux"
;;
uConsole)
DEFAULT_SSH_USER="gortium"
DEFAULT_SSH_PORT="22"
ARCH="aarch64-linux"
;;
*)
error "Unknown host: $HOSTNAME"
echo "Supported hosts: lazyworkhorse, cyt-pi, uConsole"
exit 1
;;
esac
SSH_USER="${SSH_USER:-$DEFAULT_SSH_USER}"
SSH_PORT="${SSH_PORT:-$DEFAULT_SSH_PORT}"
SSH_KEY="${SSH_KEY:-/opt/data/home/.ssh/id_hermes_gitea}"
BUILD_HOST="${BUILD_HOST:-$HOSTNAME}"
SSH_OPTS="-p $SSH_PORT -i $SSH_KEY -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
SSH_TARGET="${SSH_USER}@${HOSTNAME}"
export GIT_SSH_COMMAND="ssh -i $SSH_KEY -p 2222 -o StrictHostKeyChecking=no"
export PATH="/nix/var/nix/profiles/default/bin:$PATH"
# ── Banner ─────────────────────────────────────────────────────────────
echo "╔══════════════════════════════════════════════╗"
echo "║ NixOS Remote Deployment ║"
echo "╚══════════════════════════════════════════════╝"
info "Host: $HOSTNAME ($ARCH)"
info "Branch: $BRANCH"
info "Action: $ACTION"
info "SSH: ${SSH_USER}@${HOSTNAME}:${SSH_PORT}"
echo ""
# ── Pre-flight checks ─────────────────────────────────────────────────
step "Pre-flight checks"
# 1. Check required tools
for cmd in nix git ssh; do
if ! command -v "$cmd" &>/dev/null; then
error "Required tool not found: $cmd"
exit 1
fi
done
ok "Required tools available (nix, git, ssh)"
# 2. Check infra repo
INFRA_DIR="$(cd "$(dirname "$0")/.." && pwd)"
if [ ! -d "$INFRA_DIR/.git" ]; then
error "Not a git repository: $INFRA_DIR"
exit 1
fi
ok "Infra repo found at $INFRA_DIR"
# 3. Check SSH connectivity (skip for build-only actions)
if [ "$ACTION" != "build" ]; then
if ssh $SSH_OPTS -o ConnectTimeout=5 "$SSH_TARGET" "echo connected" &>/dev/null; then
ok "SSH connectivity to $HOSTNAME verified"
else
warn "Cannot reach $HOSTNAME via SSH — deployment step will fail later"
fi
fi
# ── Git sync ───────────────────────────────────────────────────────────
step "Git sync"
cd "$INFRA_DIR"
# Stash local changes if any
if ! git diff --quiet HEAD; then
warn "Local changes detected, stashing..."
git stash push -m "auto-stash before deploy $(date -Iseconds)"
STASHED=1
else
STASHED=0
fi
# Fetch and checkout
git fetch origin "$BRANCH" 2>/dev/null || git fetch origin master
if git rev-parse --verify "origin/$BRANCH" &>/dev/null 2>&1; then
# Remote branch exists — fast-forward merge
git checkout -B "$BRANCH" "origin/$BRANCH"
elif git rev-parse --verify "$BRANCH" &>/dev/null 2>&1; then
# Local branch or tag
git checkout "$BRANCH"
else
error "Branch/tag not found: $BRANCH"
exit 1
fi
ok "Checked out $BRANCH ($(git rev-parse --short HEAD))"
# Update submodules
if [ -f .gitmodules ]; then
git submodule update --init --recursive
ok "Submodules updated"
fi
# ── Build validation ──────────────────────────────────────────────────
if [ "$NO_BUILD_CHECK" != "1" ]; then
step "Build validation"
info "Building nixosConfigurations.$BUILD_HOST (no link)..."
if nix build --no-link --print-build-logs \
".#nixosConfigurations.${BUILD_HOST}.config.system.build.toplevel" 2>&1; then
ok "Build succeeded for $BUILD_HOST"
else
error "Build failed for $BUILD_HOST"
exit 1
fi
else
warn "Build check skipped (NO_BUILD_CHECK=1)"
fi
# ── Deployment ─────────────────────────────────────────────────────────
if [ "$ACTION" = "build" ]; then
step "Build complete (no deployment)"
info "Use one of: switch, boot, test, dry-activate to deploy"
exit 0
fi
step "Deployment ($ACTION)"
# Build the nixos-rebuild command
case "$ACTION" in
switch|boot|test)
nixos-rebuild "$ACTION" \
--flake ".#$HOSTNAME" \
--target-host "$SSH_TARGET" \
--build-host "localhost" \
--use-remote-sudo \
--max-jobs 4
;;
dry-activate)
nixos-rebuild dry-activate \
--flake ".#$HOSTNAME" \
--target-host "$SSH_TARGET" \
--build-host "localhost" \
--use-remote-sudo
;;
*)
error "Unknown action: $ACTION"
echo "Valid actions: switch, boot, test, build, dry-activate"
exit 1
;;
esac
# ── Check result ───────────────────────────────────────────────────────
DEPLOY_EXIT=$?
if [ $DEPLOY_EXIT -eq 0 ]; then
echo ""
ok "Deployment to $HOSTNAME ($ACTION) completed successfully"
case "$ACTION" in
switch|test)
info "Configuration is now active"
;;
boot)
info "Configuration will activate on next reboot"
;;
dry-activate)
info "Dry-run complete — no changes applied"
;;
esac
else
error "Deployment failed with exit code $DEPLOY_EXIT"
exit $DEPLOY_EXIT
fi
echo ""
echo "╔══════════════════════════════════════════════╗"
echo "║ Deployment Complete ║"
echo "╚══════════════════════════════════════════════╝"
info "Host: $HOSTNAME"
info "Branch: $BRANCH ($(git rev-parse --short HEAD))"
info "Action: $ACTION"
info "Time: $(date -Iseconds)"

View File

@@ -1,36 +1,34 @@
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSBWNEpt
cGFNeVBBaDRqb3pLSEZGQW0wb3VmVnBoZCswUFkzbnBLUnJ0QTNrClRqVkk4RUVO
d29KYjd5YUcwankvaTFmVHUxQVpDT2ZYWHRaY3JXTUtQMU0KLT4gKXBtQ3UsXi1n
cmVhc2UgNnwxYCBXVyA/KCQmIHt9NAo3OTZVUHR2UXkvaEFwY0ZBdEJsaFpsbHJ0
cklKcDVHcEdWMEdPSkpnN0FiRU43RW5hUWFMdjR3WFRRSFBLSGlmClM3cTNJWlNM
TExkdHdXUHJISkNIaE1TTUxUc1NUWkV1a09HeFU3bVZwQXMKLS0tIGhOcXFTUElS
azJJNnEreUhMWTJBaVZGSTJPRUFqQkVYS01KRENUVVpZSDgK8+8onFejroBo7MeO
dW+so4lOsq4zJKn3f0cxmCFg1f0X8zt6h4Uc3A5Cvr1uU+6yw1FWmJ7xa3jJz3lO
EEaKQJXYC+xIIKGcA7qILa0SFp4a/4OuYjcg27HrlPhg7u5wDhQrd0LdVEe1Xngp
ZivX7P7HwIna3X8C+TL+K2v/AG2N/z86cdKfRvxyMKNbHhYw+CfHEnWgh8tJ++4h
G9evNniuNqte6cQaRe7jODfPNW4FuY/Sb7barlJ/M9iAQdYAdyLAzU1LABeHeUfD
wtHjxy9DUZ55Vg8bB8M2JJU9MkoRT4ewiVd9LeC1GWeVmKsm93wsmrov714i7U2j
wHtDkjqEF2MmzuQc18sjNaAHiwz8j6o5xU2L/Q4+Q707yISWG7RGZYh389Cr1rnw
siUq/Vunqw2wk13+J/4vu9nqt5mMktBaCtp+QiWIurjwB5LUAyChrSm+dg5lb0Mt
UhSc0lq1+E3vxAXM2Hmk+vP86VD+6WJvAU82VFApF1s6zG2FU1/AcOVVf54nan/q
f+rgSFfASHQCYSblUJHyEtwLNsWEmTGmOEn1buUKD/H0zatPQnc0rYpjlx2V0Sjd
6yB5+wPrZ0AkN1pjcsPKOv8Kaog2DzqIjib+SaSTaRxWHQEb9uzvaReAcYI5HOpE
gkC040HN33BItATbo4+hz70Im8Ni/VXD+g6yzM6Hj1hJL+PinTKeg5keQRFIZjMx
grzievB2wVBBgLgN3qMdTFmpplaL7iL702JjXZUTTK9Izp+9wiCsV1fTa53FWDht
ylFL5SWElqXjK+QBXxAe+Jk6VQov5HI21YDXL67S554ABeRok23wxrQ31TCI4xq9
PQV7VtNRjyVud7S29m3OwpWOsgTZhn+JclHj2v4bNJzJkJnZRTmcvGPktzRI5+R4
e5vxVhGnJDzI71txaHl8+xS1lu9VzCQUrxX6TXyTRV4KjIOz0g06JOBgmBRBvJca
7MZbC65xpisl/gyLRbgkVga3t94dPV+dpZsn8eq6427IyRbKslJefatggR9//c6I
5N5fl0fR3gJQMB+HRbipBH2YsdbdWJyb4Nn6STZxIfrqoG/xC6C1raF0xK7hUx6i
4DUDSPohM8fOIswQPfE+FH3eygfzu/Ln5+ghsgHTEhgFvmgMvyxaAt6kHIzIUhMX
M3dASr4VPDpIXuXsRWwYLEifhzxsuvwVxfwtsnCaR6XKijsYECWGDdYOWHdleeqx
wDPhxEesfFVhKxhrKY9Ir8k9/FFBKQU/3GjW4+SMAg5Al1YEzxshP9vKuVcsei7W
JDwAwotNXaCm6NBckiyZJE53ou6+gckPY7V9cOfnuH74Z9ywkFzB3HW3ZlonaGyM
oGmLGcccavFtyhg5s/As4i6X8ARIpDiwe59Pn3GNXMctySqIrrr2ogUoXgrfFCie
6GOTdeMW7GeOSdJUxCofghlspS/nq01Og77VI/beWYrIwLubSka6Zaltww9zgObk
/FGEMgFkEpq7iyCvYSPA8F46pJKvnMP3S84AWCPmcTcHeg4lwGPvs6btexXBGdoz
nkCyq7wdH5Nngm7jUbl88LtaLZPAQkuqXphBVTnrF9Ofbnb4iRZ2Op4xpx9rGyvx
mO6UEhL6V1i2YZFNkNMg/W8aoMiUgBdqbkxaxblT9L0aNdlFU9+LbWYolURVEadd
Qjv0Z1gMA+tsuBbVszwsMfneZ5+B9Q==
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSBOL29w
eGk1N2xxTHJtaUEvWWZmbkh1bk11Tjk3anNnMDB1cCtPYUMzdTNJCkdhQ08vblNG
UlV1K2xVTGZVTzFWYXAzcjZaMWs0RTFWdStKSmlSTURvK1EKLT4gLC1zKU8zVkgt
Z3JlYXNlIFUiXFcpS302IHByVn5jOy0gRDMKQjV3SHpDWUIybGFyQUg3ZlR0R2hV
eWM3SFlCVW5mdlpBVUF3a0xpNlZCeGNUd1oxTTlkc1RkTXdZS0lFTmN3Ci0tLSA3
VlBqM1VLWllZc0JnOTMvUFRjMU13OTdzMmhsdGJubkk5eGpERVVLYUk4Cnzh5UbU
FlgqpM8jkJ6XlsaIDCw/G3D6uJ/GRJW4gIekuhAUxpZJrc8eOA8ZuHfGrBbH3acV
tVafX5F0Kr2oOblqZ6gduZOUS52KmWH8stiBJM+e5ZZ7zRQVE4PJUKUPCzi+WdcH
zr295T//FOdicrYHdsjfziKEHzBtUCFiATW05+O2zMjYjO6cPzePcCzPWinwiID6
V+f6ngfkkQaj3wBGkzaieQJzRcdSwky21aVhGCCX/bvqx61iW2d5QAKxGbtQ2RcG
X1okr+xunAM94nzDMv46vyN97KxY7cZd4pAaOxoICc2Tfhtw6F+iS6QkQh1odJzO
7ZH+sSQCvndG+8z9shXGiHalASF5tdguM+JlEvAGljcaiAUtsQWxr9CoWiEkC6c6
NCaECSYO8Il+SXBQnSZSGJSNDhuPYCYrsjXGSAONFixuyeslAkq9x2WUaUS4H063
1QvRF7XO2tBPtgCLsSjdiGp0h+ImUaGdu6fDR7zrDsGsaAFCSFeH/rGNNXRQ2vP2
CSfPfDDCqpUSCn0WuA30BtaPLxGmZT6OjFevKzYMNDmdeq9ia/q8K0hmjLUBdN3k
tdYWbwoaf4gYbUWxSleD768b0Jgxss9Vod+sFQ+NYRksdGIeyND+aQIc312XehfA
qHFBS8nlj7eUF5bdvCYQ64z741mH4cNlGxyjPBH1x8FHnEOocJXYt1l2AZSRJmJA
c3z0QGXyuCbsrLBXWK1EKa/Juo4PGGsEVoLRhwJAQy9+i1JN0yrfRvSPyzvD4px6
wRPzlZ80MQdb2lv84WS/zcOEZmZzlLntszTRRdIfAsuaavP2Rquh4rEXABYeTZwp
5dem79s8bdW2nFsGMNz1OQKQwocyjYu1jJMHu6Gp7Ngdl1xyW7xfg0dezE1c0cIh
xt1aLER9YJp4n5to5cOH16l3mjDHnAvABx38xE9loNL3399J/evw7LxpTYQ4v2Xv
x8xnDHcqJ+deFSwyuUnMS5DkUeYuHmUl0Q2WYcfY+ibCmcgCb2ObTtuN1/ZxNYrL
OKrnmfuSvBgyuIOj5e6uWW0+Zs8dHKXu2TgV8WignxOhl5zQgCpCBlqVfO0t+NCu
Gi26hU/fhGWQ/1oQa3VkpGsypZbJpgQvfWxfcGHP/MMhnl01zzlP8/aexSY3pAxf
fz9v0IVh6xxtu3zbiiVzUsXbfG7t+xY98jMphf4AS2mWva3GWVmhhu0lS3J3P+go
YEEP4rOFHeU0Y1/6kLydTXvz4jMH0H92XQIzshd7vzQnEJPUPAzqRmw3LKYGgCI+
wZEnxJ6ckqTkGBFnxTpy9LLllwmnz2Ky87nY3XAmqxlhb2Ap1XFAlfgszmGjc+Il
KkIgoWQHTUm6QM9ta++oUTIDneOvxGd0zZsqoEhiC/7E01BNNZ6E58TeJU3fDlA3
mX6n05XjwPRpgXZfayPoAgBlZc2H4KeiynxwNZ/dWu7qz7L6Ppk6Nvtly8giTbFx
CA+tto7vq+D+CAEJ4bgyq4BCH4GL4APrhPcWp98Mko1WCiRTIKgkZxQCYvlg/LZq
LNhMacP9T1qTvNC+yR1NEMiegE3APzk6CkDpVaO9+5f/sqifNPINCMothenI9ePw
zjQLI3Mo1m73bkomytUZ7i1VstP5sEZ5LF72Sq7BpR3oQ3Gp0CAN9w==
-----END AGE ENCRYPTED FILE-----

View File

@@ -1,9 +0,0 @@
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSA3VG9Z
MVFPVFc2VVJ3d0h0dmtBUnI3WHl2SzUxTkRZbjFCaGloWmV3dnd3ClcxdnVPeGd6
SU4zR0Q0K1dtVjRRVHd0VW5XSFI0dVFpTjZnYk1DNjRxTVEKLT4gQzlgRy1ncmVh
c2UKeUozOWgyUytSTVF0NjY2STBEb2VadwotLS0gblI3bmJCUWxxU3QrYTEyVFBI
Snc4NC9rTkh0NnZYbUtxUE9hRWRkelpmMAq58fmH6cK13GeD7wGLxKmx10hmJeW4
b7KqnCD1ZP7uG85s32xzVRwRG8RrG4xZo5nR9Mrtg1CoTSFfUGeFnf5xveN+Ej0X
wDVB1LwC+Q==
-----END AGE ENCRYPTED FILE-----

View File

@@ -1,11 +0,0 @@
-----BEGIN AGE ENCRYPTED FILE-----
YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSA5dzVG
WUNvT3NlRmcrWS81bzJqSWlTekVYaDFFTE10SkI2dEgzaGpxcUI4Cmk5Y0FGYTRZ
K0NGYzY3VUp4aS9ZZGRmWTgybDJFUURva2pZNmVOS3QxdEUKLT4gPnVRTCtldGMt
Z3JlYXNlCk04OTJZeFRNeDI5aGpMVTk1ZTE0Y2FMMnFEMjlJalJpMHRlaTE4ZWIx
d2lCRGQ5RHVjcktOMGJCb1VERlNWcTYKaSt0L1Z6dVJ0QWIyZkhsYzFEVjZSQWUr
ZWpwVlo1TmhoUFJZdkEvR0gxNlVhcXF2ZTRnCi0tLSBLcmM2MThNVkdWclpHUXRr
VTF6QVk2WUZlTXpZMVNLMlpBOFc3M1o5WjZzCs9xbPlIX+u5vRSQ/z9utu+I9S2c
02DOsIb1kzxzb1OK91b8Kh4JucQSq3qkyEvRucsNn5QW8hIHDnRuND6EbPyN7p4S
YB/F0dxSqgnq
-----END AGE ENCRYPTED FILE-----

View File

@@ -4,80 +4,11 @@
group = "ai-worker";
home = "/home/ai-worker";
createHome = true;
# ai-worker stays in docker group for normal docker operations (ps, start, stop, compose, ...)
# Dangerous commands (exec, cp, commit) are blocked by a wrapper script.
extraGroups = [ "docker" ];
shell = pkgs.bashInteractive;
openssh.authorizedKeys.keys = [
keys.users.ai-worker.main
];
# No password login - SSH key only
hashedPassword = "!";
};
users.groups.ai-worker = {};
# Enable restricted AI worker SSH access
# SECURITY: ai-worker is in docker group but docker commands are filtered:
# ALLOWED: ps, images, logs, start, stop, restart, rm, rmi, pull, build, run, compose
# BLOCKED: exec, cp, commit, diff, export, import, load, save, attach, push
# The filtering is done by a docker wrapper in ai-worker's PATH.
services.aiWorkerAccess = true;
# Restricted sudo for ai-worker - security checks only (not for docker)
security.sudo.extraRules = [
{
users = [ "ai-worker" ];
commands = [
# Firewall checks
{
command = "/run/wrappers/bin/sudo iptables -L -n -v";
options = [ "NOPASSWD" ];
}
{
command = "/run/wrappers/bin/sudo iptables -S";
options = [ "NOPASSWD" ];
}
# Fail2ban status
{
command = "/run/current-system/sw/bin/fail2ban-client status";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/fail2ban-client status *";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/fail2ban-client get * banned";
options = [ "NOPASSWD" ];
}
# Log inspection
{
command = "/run/current-system/sw/bin/journalctl -t kernel -n 100";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/journalctl -u fail2ban -n 50";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/journalctl -u firewall -n 50";
options = [ "NOPASSWD" ];
}
# SSH config verification
{
command = "/run/current-system/sw/bin/sshd -T";
options = [ "NOPASSWD" ];
}
# Network diagnostics
{
command = "/run/current-system/sw/bin/ss -tlnp";
options = [ "NOPASSWD" ];
}
{
command = "/run/current-system/sw/bin/cat /proc/net/tcp";
options = [ "NOPASSWD" ];
}
];
}
];
}