diff --git a/AGENTS.md b/AGENTS.md index 8459aa8..fe0de1d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,6 +5,7 @@ This document outlines the development conventions for this NixOS-based infrastr ## Build & Deployment - **Build/Deploy:** Use `nixos-rebuild switch --flake .#` to build and deploy the configuration for a specific host. +- **CRITICAL — Validate before pushing:** Always `nix build --no-link '.#nixosConfigurations..config.system.build.toplevel'` (or `nh os build`) and confirm it succeeds before pushing any changes. Never push untested NixOS configs. - **Development Shell:** Activate the development environment with `nix develop`. ## Linting & Formatting diff --git a/assets/compose b/assets/compose index fb0f2cb..a79fe9d 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit fb0f2cbe844b25ca413ded99debc77985bea6281 +Subproject commit a79fe9dffacebae6d4ee17502885e9cdfa852073 diff --git a/docker/hermes/Dockerfile.full b/docker/hermes/Dockerfile.full new file mode 100644 index 0000000..1edd524 --- /dev/null +++ b/docker/hermes/Dockerfile.full @@ -0,0 +1,71 @@ +FROM ghcr.io/astral-sh/uv:0.11.6-python3.13-trixie@sha256:b3c543b6c4f23a5f2df22866bd7857e5d304b67a564f4feab6ac22044dde719b AS uv_source +FROM tianon/gosu:1.19-trixie@sha256:3b176695959c71e123eb390d427efc665eeb561b1540e82679c15e992006b8b9 AS gosu_source +FROM debian:13.4 + +# Disable Python stdout buffering to ensure logs are printed immediately +ENV PYTHONUNBUFFERED=1 + +# Store Playwright browsers outside the volume mount so the build-time +# install survives the /opt/data volume overlay at runtime. +ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright + +# Install system dependencies in one layer, clear APT cache +# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.) +# that would otherwise accumulate when hermes runs as PID 1. See #15012. +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini \ + curl poppler-utils imagemagick \ + chromium xvfb fonts-noto-color-emoji fonts-unifont fonts-liberation fonts-ipafont-gothic fonts-wqy-zenhei fonts-tlwg-loma-otf fonts-freefont-ttf \ + libasound2t64 libatk-bridge2.0-0t64 libatk1.0-0t64 libatspi2.0-0t64 libcairo2 libcups2t64 libdbus-1-3 libdrm2 libgbm1 libglib2.0-0t64 libnspr4 libnss3 libpango-1.0-0 libx11-6 libxcb1 libxcomposite1 libxdamage1 libxext6 libxfixes3 libxkbcommon0 libxrandr2 \ + texlive-latex-base texlive-latex-extra texlive-fonts-recommended texlive-xetex texlive-science \ + qemu-user-static binfmt-support qemu-user-binfmt \ + emacs-nox \ + libportaudio2 && \ + rm -rf /var/lib/apt/lists/* + +# Non-root user for runtime; UID can be overridden via HERMES_UID at runtime +RUN useradd -u 10000 -m -d /opt/data hermes + +COPY --chmod=0755 --from=gosu_source /gosu /usr/local/bin/ +COPY --chmod=0755 --from=uv_source /usr/local/bin/uv /usr/local/bin/uvx /usr/local/bin/ + +WORKDIR /opt/hermes + +# ---------- Layer-cached dependency install ---------- +# Copy only package manifests first so npm install + Playwright are cached +# unless the lockfiles themselves change. +COPY package.json package-lock.json ./ +COPY web/package.json web/package-lock.json web/ + +RUN npm install --prefer-offline --no-audit && \ + npx playwright install --with-deps chromium --only-shell && \ + (cd web && npm install --prefer-offline --no-audit) && \ + npm cache clean --force + +# ---------- Source code ---------- +# .dockerignore excludes node_modules, so the installs above survive. +COPY --chown=hermes:hermes . . + +# Build web dashboard (Vite outputs to hermes_cli/web_dist/) +RUN cd web && npm run build + +# ---------- Permissions ---------- +# Make install dir world-readable so any HERMES_UID can read it at runtime. +# The venv needs to be traversable too. +USER root +RUN chmod -R a+rX /opt/hermes +# Start as root so the entrypoint can usermod/groupmod + gosu. +# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000). + +# ---------- Python virtualenv ---------- +RUN uv venv && \ + uv pip install --no-cache-dir -e ".[all]" && \ + uv pip install --no-cache-dir sounddevice numpy faster-whisper + +# ---------- Runtime ---------- +ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist +ENV HERMES_HOME=/opt/data +ENV PATH="/opt/data/.local/bin:${PATH}" +VOLUME [ "/opt/data" ] +ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ] diff --git a/hosts/lazyworkhorse/configuration.nix b/hosts/lazyworkhorse/configuration.nix index 1593b0f..3963650 100644 --- a/hosts/lazyworkhorse/configuration.nix +++ b/hosts/lazyworkhorse/configuration.nix @@ -158,7 +158,7 @@ settings = { PasswordAuthentication = false; KbdInteractiveAuthentication = false; - PermitRootLogin = "prohibit-password"; + # Additional hardening settings below in SERVER HARDENING section }; hostKeys = [ { @@ -308,6 +308,196 @@ # Or disable the firewall altogether. # networking.firewall.enable = false; + # ============================================================================= + # SERVER HARDENING - Firewall, Fail2ban, SSH, Kernel + # ============================================================================= + + # Firewall - default deny, explicit allow + networking.firewall = { + # Enable firewall with default deny policy (NixOS firewall denies all by default) + enable = true; + allowPing = true; + + # Only essential ports exposed to internet + allowedTCPPorts = [ + 2424 # SSH (non-standard port) + 2222 # Gitea (version control) + 80 # HTTP (Traefik redirect) + 443 # HTTPS (Traefik) + # 8000 # Portainer - REVIEW: internal only? + # 4242 # Coms - REVIEW: internal only? + # 5000 # TAK API - REVIEW: internal only? + # 8087 # TAK Connect - REVIEW: internal only? + # 8089 # TAK Management - REVIEW: internal only? + ]; + + allowedUDPPorts = [ + # Add UDP ports if required + ]; + + # Rate limiting and attack prevention + extraCommands = '' + # Rate limit SSH connections (max 4 new connections per 60 seconds) + iptables -A INPUT -p tcp --dport 2424 -m state --state NEW -m recent --set + iptables -A INPUT -p tcp --dport 2424 -m state --state NEW -m recent --update --seconds 60 --hitcount 4 -j DROP + + # Rate limit HTTP/HTTPS (protects Traefik) + iptables -A INPUT -p tcp --dport 80 -m state --state NEW -m limit --limit 25/minute --limit-burst 100 -j ACCEPT + iptables -A INPUT -p tcp --dport 443 -m state --state NEW -m limit --limit 25/minute --limit-burst 100 -j ACCEPT + + # Drop invalid packets + iptables -A INPUT -m state --state INVALID -j DROP + + # Log dropped packets (rate limited) + iptables -A INPUT -m limit --limit 5/min -j LOG --log-prefix "IPTables-Dropped: " --log-level 4 + ''; + }; + + # Fail2ban - automatic IP banning + services.fail2ban = { + enable = true; + maxretry = 3; + bantime = "1h"; + banaction = "iptables-multiport"; + + jails = { + # SSH brute force protection (uses systemd journal backend) + sshd = { + enabled = true; + settings = { + filter = "sshd"; + port = "2424"; + maxretry = 3; + bantime = "1h"; + }; + }; + + # Recidive - ban repeat offenders for 1 week + recidive = { + enabled = true; + settings = { + filter = "recidive"; + logpath = "/var/log/fail2ban.log"; + bantime = "1w"; + findtime = "1d"; + maxretry = 3; + }; + }; + + # HTTP authentication failures (Traefik) + http-auth = { + enabled = true; + settings = { + filter = "traefik-auth"; + port = "80,443"; + logpath = "/var/log/traefik/access.log"; + maxretry = 5; + bantime = "1h"; + }; + }; + + # HTTP scanning/attacks (Traefik) + http-botsearch = { + enabled = true; + settings = { + filter = "traefik-botsearch"; + port = "80,443"; + logpath = "/var/log/traefik/access.log"; + maxretry = 2; + bantime = "2h"; + }; + }; + }; + }; + + # Custom fail2ban filters for Traefik + environment.etc."fail2ban/filter.d/traefik-auth.conf".text = '' + [Definition] + failregex = ^ -.*"(GET|POST|HEAD|PUT|DELETE).*" (401|403) \d+.*$ + ignoreregex = + ''; + + environment.etc."fail2ban/filter.d/traefik-botsearch.conf".text = '' + [Definition] + failregex = ^ -.*"(GET|POST|HEAD|PUT|DELETE).*" 404 \d+.*$ + ^ -.*"(GET|POST|HEAD|PUT|DELETE).*/(\.|wp-|php|admin|login|xmlrpc|\.env|\.git|\.aws|\.azure).*" \d+.*$ + ignoreregex = + ''; + + # SSH hardening + services.openssh.settings = { + PermitRootLogin = "no"; + MaxAuthTries = 3; + MaxSessions = 5; + LoginGraceTime = 30; + ClientAliveInterval = 300; + ClientAliveCountMax = 2; + PermitEmptyPasswords = "no"; + ChallengeResponseAuthentication = "no"; + UsePAM = true; + LogLevel = "VERBOSE"; + X11Forwarding = false; + AllowTcpForwarding = "no"; + AllowAgentForwarding = "no"; + PermitTunnel = "no"; + }; + + # Kernel network hardening + boot.kernel.sysctl = { + # IP Spoofing protection + "net.ipv4.conf.all.rp_filter" = 1; + "net.ipv4.conf.default.rp_filter" = 1; + + # Ignore ICMP broadcasts + "net.ipv4.icmp_echo_ignore_broadcasts" = 1; + + # Disable source routing + "net.ipv4.conf.all.accept_source_route" = 0; + "net.ipv4.conf.default.accept_source_route" = 0; + "net.ipv6.conf.all.accept_source_route" = 0; + "net.ipv6.conf.default.accept_source_route" = 0; + + # Disable redirects + "net.ipv4.conf.all.send_redirects" = 0; + "net.ipv4.conf.default.send_redirects" = 0; + + # SYN flood protection + "net.ipv4.tcp_syncookies" = 1; + "net.ipv4.tcp_max_syn_backlog" = 2048; + "net.ipv4.tcp_synack_retries" = 2; + "net.ipv4.tcp_syn_retries" = 5; + + # Log martian packets + "net.ipv4.conf.all.log_martians" = 1; + "net.ipv4.conf.default.log_martians" = 1; + + # Ignore redirects + "net.ipv4.conf.all.accept_redirects" = 0; + "net.ipv4.conf.default.accept_redirects" = 0; + "net.ipv4.conf.all.secure_redirects" = 0; + "net.ipv4.conf.default.secure_redirects" = 0; + "net.ipv6.conf.all.accept_redirects" = 0; + "net.ipv6.conf.default.accept_redirects" = 0; + + # Connection tuning + "net.core.somaxconn" = 4096; + "net.core.netdev_max_backlog" = 65536; + "net.ipv4.tcp_max_orphans" = 65536; + "net.ipv4.tcp_fin_timeout" = 15; + "net.ipv4.tcp_keepalive_time" = 300; + "net.ipv4.tcp_keepalive_probes" = 5; + "net.ipv4.tcp_keepalive_intvl" = 15; + }; + + # Audit logging + security.auditd.enable = true; + + # Fail2ban log directory + systemd.tmpfiles.rules = [ + "d /var/log/fail2ban 0755 root root -" + "d /var/log/traefik 0755 root root -" + ]; + # Copy the NixOS configuration file and link it from the resulting system # (/run/current-system/configuration.nix). This is useful in case you # accidentally delete configuration.nix. diff --git a/modules/nixos/services/ollama_init_custom_models.nix b/modules/nixos/services/ollama_init_custom_models.nix index 812849e..4dc965d 100644 --- a/modules/nixos/services/ollama_init_custom_models.nix +++ b/modules/nixos/services/ollama_init_custom_models.nix @@ -14,8 +14,25 @@ local base_model=$2 if ! ${pkgs.docker}/bin/docker exec ollama ollama list | grep -q "$model_name"; then echo "$model_name not found, creating from $base_model..." + + # We use a custom TEMPLATE block to strip the 'currentDate' function + # which is unsupported in Ollama 0.5.7 but present in Devstral's default manifest. ${pkgs.docker}/bin/docker exec ollama sh -c "cat < /root/.ollama/$model_name.modelfile FROM $base_model +TEMPLATE \"\"\"{{- if .System }} +[SYSTEM_PROMPT] +{{ .System }} +[/SYSTEM_PROMPT] +{{- end }} +{{- range .Messages }} +{{- if eq .Role \"user\" }} +[INST] +{{ .Content }} +[/INST] +{{- else if eq .Role \"assistant\" }} +{{ .Content }} +{{- end }} +{{- end }}\"\"\" PARAMETER num_ctx 131072 PARAMETER num_predict 4096 PARAMETER num_keep 1024 @@ -26,6 +43,7 @@ PARAMETER stop \"[/INST]\" PARAMETER stop \"\" EOF" ${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f "/root/.ollama/$model_name.modelfile" + ${pkgs.docker}/bin/docker exec ollama rm "/root/.ollama/$model_name.modelfile" else echo "$model_name already exists, skipping." fi @@ -36,6 +54,10 @@ EOF" # Create Devstral create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b" + + # create_model_if_missing "qwen2.5-coder:32b-128k" "qwen2.5-coder:32b" + + # create_model_if_missing "mistral-large-planner:123b" "mistral-large:123b-instruct-v2407-q4_K_S" ''; serviceConfig = { Type = "oneshot"; diff --git a/users/ai-worker.nix b/users/ai-worker.nix index 48b51de..6308151 100644 --- a/users/ai-worker.nix +++ b/users/ai-worker.nix @@ -18,8 +18,76 @@ # SECURITY: ai-worker can only: # - SSH into host from Hermes container # - Run docker commands (docker exec ollama ...) via docker group + # - Run specific security audit commands # - NO access to infra repo (no bind mount) # - NO sudo access (no nh, nixos-rebuild, nixpkgs-fmt, nix) # WORKFLOW: SSH from Hermes container, run docker benchmarks, return and save results to /opt/data/ai-optimizer/ services.aiWorkerAccess = true; + + # Restricted sudo for ai-worker - security checks only + security.sudo.extraRules = [ + { + users = [ "ai-worker" ]; + commands = [ + # Firewall checks + { + command = "/run/wrappers/bin/sudo iptables -L -n -v"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/wrappers/bin/sudo iptables -S"; + options = [ "NOPASSWD" ]; + } + # Fail2ban status + { + command = "/run/current-system/sw/bin/fail2ban-client status"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/fail2ban-client status *"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/fail2ban-client get * banned"; + options = [ "NOPASSWD" ]; + } + # Log inspection + { + command = "/run/current-system/sw/bin/journalctl -t kernel -n 100"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/journalctl -u fail2ban -n 50"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/journalctl -u firewall -n 50"; + options = [ "NOPASSWD" ]; + } + # SSH config verification + { + command = "/run/current-system/sw/bin/sshd -T"; + options = [ "NOPASSWD" ]; + } + # Docker service checks + { + command = "/run/current-system/sw/bin/docker ps"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/docker inspect *"; + options = [ "NOPASSWD" ]; + } + # Network diagnostics + { + command = "/run/current-system/sw/bin/ss -tlnp"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/cat /proc/net/tcp"; + options = [ "NOPASSWD" ]; + } + ]; + } + ]; }