diff --git a/assets/compose b/assets/compose index d97f1cb..6b82a26 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit d97f1cb1e5c762ce779f76ac26a61b5430f87f32 +Subproject commit 6b82a26c25f1592a2d1c9bea4f941864362fe001 diff --git a/assets/ollama/Dockerfile b/assets/ollama/Dockerfile new file mode 100644 index 0000000..438e607 --- /dev/null +++ b/assets/ollama/Dockerfile @@ -0,0 +1,106 @@ +# ollama-gfx906/Dockerfile +# +# Custom ollama image with ROCm 6.1 + gfx906 (MI50) support. +# The official ollama/rocm image ships ROCm 7.2 which dropped gfx906. +# This uses v0.23.2's native CMake build system with AMDGPU_TARGETS including gfx906. +# +# Build: docker build -t ollama/ollama:rocm-gfx906 ai/ollama + +FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder + +# Build dependencies (CMake, Ninja, Go) +ARG CMAKEVERSION=3.31.2 +ARG NINJAVERSION=1.12.1 +ARG GOLANG_VERSION=1.22.0 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + curl git ccache build-essential pkg-config unzip \ + && rm -rf /var/lib/apt/lists/* + +# Install CMake from official binaries +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-x86_64.tar.gz \ + | tar xz -C /usr/local --strip-components 1 + +# Install Ninja +RUN curl -fsSL -o /tmp/ninja.zip \ + https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux.zip \ + && unzip /tmp/ninja.zip -d /usr/local/bin && rm /tmp/ninja.zip + +# Install Go +RUN curl -fsSL https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz \ + | tar xz -C /usr/local +ENV PATH=/usr/local/go/bin:$PATH + +ARG OLLAMA_VERSION=v0.23.2 +RUN git clone --depth 1 --branch ${OLLAMA_VERSION} https://github.com/ollama/ollama.git /build +WORKDIR /build + +# ROCm paths +ENV HIP_PATH=/opt/rocm +ENV ROCM_PATH=/opt/rocm +ENV CMAKE_GENERATOR=Ninja +ENV LDFLAGS=-s + +# Step 1: Build CPU backends with GCC (no ROCm preset) +# Pre-set CMAKE_HIP_COMPILER="" to prevent check_language(HIP) from +# finding a HIP compiler (it searches /opt/rocm even without PATH). +# Remove /opt/rocm from PATH to prevent find_program from finding hipcc. +RUN mkdir -p build-cpu && \ + PATH=/usr/local/go/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + cmake -B build-cpu -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_HIP_COMPILER="" \ + -DCMAKE_INSTALL_PREFIX=/build/dist && \ + cmake --build build-cpu --target ggml-cpu -- -l $(nproc) && \ + cmake --install build-cpu --component CPU --strip && \ + echo "=== CPU install ===" && \ + (find /build/dist/lib/ollama -type f -o -type l 2>&1 | head -20 || echo "empty") + +# Step 2: Build HIP backend with ROCm preset + gfx906 target only +# The ROCm 6 preset enables HIP language detection (enable_language(HIP)) +# which ensures GPU kernels are properly compiled for gfx906. +# OLLAMA_RUNNER_DIR=rocm from the preset, so HIP goes to lib/ollama/rocm/ +# Need CMAKE_PREFIX_PATH so find_package(hip) finds hip-config.cmake +# at /opt/rocm/lib/cmake/hip/hip-config.cmake. +RUN mkdir -p build-hip && \ + cmake -B build-hip \ + --preset 'ROCm 6' \ + -DAMDGPU_TARGETS="gfx906:xnack-" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_PREFIX_PATH="/opt/rocm" && \ + cmake --build build-hip --target ggml-hip -- -l $(nproc) && \ + cmake --install build-hip --component HIP --strip && \ + echo "=== HIP install ===" && \ + find /build/dist/lib/ollama -type f -o -type l | head -20 + +# Step 3: Build Go binary (GCC for CGo linking) +ENV CGO_ENABLED=1 +RUN go build -trimpath -ldflags="-X=github.com/ollama/ollama/version.Version=${OLLAMA_VERSION}" -o /build/dist/ollama . + +# ---------- Runtime image ---------- +FROM ubuntu:24.04 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + ca-certificates curl libstdc++6 libgomp1 libvulkan1 libopenblas0 \ + && rm -rf /var/lib/apt/lists/* + +# Copy ROCm 6.1 runtime libraries +# These are needed at runtime by ggml-hip via LD_LIBRARY_PATH +COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/ +COPY --from=builder /opt/rocm/share/ /opt/rocm/share/ + +# Copy ollama binary + all backends (CPU + HIP) +# CPU install: /build/dist/lib/ollama/libggml-*.so +# HIP install: /build/dist/lib/ollama/rocm/libggml-hip.so +COPY --from=builder /build/dist/ollama /usr/bin/ollama +COPY --from=builder /build/dist/lib/ollama/ /usr/lib/ollama/ + +RUN ldconfig + +ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama/rocm:/usr/lib/ollama +ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 +ENV HCC_AMDGPU_TARGET=gfx906 +ENV HSA_ENABLE_SDMA=0 + +EXPOSE 11434 +ENTRYPOINT ["/bin/ollama"] +CMD ["serve"] diff --git a/flake.nix b/flake.nix index a06b03e..8f8b51a 100644 --- a/flake.nix +++ b/flake.nix @@ -61,6 +61,7 @@ ./modules/nixos/services/open_code_server.nix ./modules/nixos/services/ollama_init_custom_models.nix ./modules/nixos/services/openclaw_node.nix + ./modules/nixos/security/ai-worker-restricted.nix ./users/gortium.nix ./users/ai-worker.nix ]; diff --git a/modules/nixos/security/README-ai-worker.md b/modules/nixos/security/README-ai-worker.md new file mode 100644 index 0000000..6128573 --- /dev/null +++ b/modules/nixos/security/README-ai-worker.md @@ -0,0 +1,105 @@ +# AI Worker Restricted Access + +This module provides SSH access for the AI worker (hermes-agent) to run ollama benchmarks on the host. + +## Security Model + +The `ai-worker` user has: + +### Filesystem Access +- **Home directory**: `/home/ai-worker` (standard user home) +- **No bind mounts**: Cannot access `/home/gortium/infra` or other host files +- **Cannot access**: Any files outside standard system paths + +### Sudo Access +- **NONE**: ai-worker has no sudo privileges +- Cannot run `nh`, `nixos-rebuild`, `nixpkgs-fmt`, or `nix` with elevated permissions + +### Docker Access +- Member of `docker` group - can run `docker` and `docker exec` commands +- Primary use: `docker exec ollama ollama ...` for benchmarking +- Can run `docker exec --privileged ollama rocm-smi ...` for VRAM monitoring + +## Workflow: SSH + Docker Benchmarking + +The AI worker connects from the Hermes container to the host via SSH, runs ollama benchmarks, then returns to save results. + +### Example Workflow + +```bash +# From Hermes container, SSH to host +ssh -i /path/to/ssh/key ai-worker@host.docker.internal + +# On host, run ollama benchmarks via docker +docker exec ollama ollama pull devstral-small-2:24b + +# Create test modelfile +docker exec ollama bash -c 'cat < /root/.ollama/test.modelfile +FROM devstral-small-2:24b +PARAMETER num_ctx 65536 +PARAMETER num_gpu 99 +PARAMETER flash_attn true +EOF' + +# Create and test model +docker exec ollama ollama create test-model -f /root/.ollama/test.modelfile +docker exec ollama ollama run test-model "Write a Python async function" + +# Check VRAM usage +docker exec --privileged ollama rocm-smi --showmeminfo vram + +# Cleanup +docker exec ollama ollama rm test-model + +# Exit SSH, return to Hermes container +exit + +# Save results in Hermes container +# /opt/data/ai-optimizer/state.json +# /opt/data/ai-optimizer/results.csv +``` + +## SSH Access + +Connect as: +```bash +ssh ai-worker@lazyworkhorse +``` + +The working directory will be `/home/ai-worker`. No infra repo access. + +## Verification + +Check ai-worker permissions: +```bash +# On the host, as root or gortium: +sudo -u ai-worker sudo -l +# Should show: no sudo access + +# Check docker group membership +groups ai-worker +# Should show: ai-worker docker +``` + +## Troubleshooting + +If ai-worker cannot run docker commands: +```bash +# Check docker group membership +groups ai-worker + +# Verify ollama container is running +docker ps | grep ollama + +# Test docker access +sudo -u ai-worker docker exec ollama ollama list +``` + +If SSH connection fails: +```bash +# Check SSH key is authorized +cat /home/ai-worker/.ssh/authorized_keys + +# Check SSH service +systemctl status sshd +``` diff --git a/modules/nixos/security/ai-worker-restricted.nix b/modules/nixos/security/ai-worker-restricted.nix new file mode 100644 index 0000000..0e9d4f6 --- /dev/null +++ b/modules/nixos/security/ai-worker-restricted.nix @@ -0,0 +1,17 @@ +{ config, pkgs, lib, ... }: + +with lib; + +{ + options.services.aiWorkerAccess = mkOption { + type = types.bool; + default = false; + description = "Enable AI worker SSH access with docker group membership for ollama benchmarking"; + }; + + config = mkIf config.services.aiWorkerAccess { + # ai-worker is member of docker group - can run docker commands via SSH + # No bind mounts, no sudo access - docker-only for ollama benchmarking + users.groups.docker.members = [ "ai-worker" ]; + }; +} diff --git a/modules/nixos/services/ollama_init_custom_models.nix b/modules/nixos/services/ollama_init_custom_models.nix index 4dc965d..aa060cc 100644 --- a/modules/nixos/services/ollama_init_custom_models.nix +++ b/modules/nixos/services/ollama_init_custom_models.nix @@ -1,67 +1,87 @@ { pkgs, ... }: { systemd.services.init-ollama-model = { description = "Initialize LLM models with extra context in Ollama Docker"; - after = [ "docker-ollama.service" ]; + + # On s'assure que Docker tourne avant de lancer ce script + after = [ "docker.service" ]; wantedBy = [ "multi-user.target" ]; + script = '' - # Wait for Ollama - while ! ${pkgs.curl}/bin/curl -s http://localhost:11434/api/tags > /dev/null; do - sleep 2 - done + # Fonction de création asynchrone pour ne pas bloquer le démarrage + ( + echo "Starting asynchronous Ollama initialization..." + + # Attente d'Ollama (maximum 120 secondes pour éviter une boucle infinie) + TIMEOUT=60 + COUNT=0 + while ! ${pkgs.curl}/bin/curl -s -f http://127.0.0.1:11434/api/tags > /dev/null; do + if [ $COUNT -ge $TIMEOUT ]; then + echo "Ollama did not become ready in time. Exiting." + exit 1 + fi + echo "Waiting for Ollama API to be reachable..." + sleep 5 + COUNT=$((COUNT + 5)) + done - create_model_if_missing() { - local model_name=$1 - local base_model=$2 - if ! ${pkgs.docker}/bin/docker exec ollama ollama list | grep -q "$model_name"; then - echo "$model_name not found, creating from $base_model..." + create_model_if_missing() { + local model_name=$1 + local base_model=$2 - # We use a custom TEMPLATE block to strip the 'currentDate' function - # which is unsupported in Ollama 0.5.7 but present in Devstral's default manifest. - ${pkgs.docker}/bin/docker exec ollama sh -c "cat < /root/.ollama/$model_name.modelfile + # Vérification robuste via l'API HTTP d'Ollama plutôt que docker exec (évite les conflits de tty) + if ! ${pkgs.curl}/bin/curl -s http://127.0.0.1:11434/api/tags | ${pkgs.jq}/bin/jq -e ".models[] | select(.name == \"$model_name\")" > /dev/null; then + echo "$model_name not found, creating from $base_model..." + + # Utilisation d'un fichier temporaire sur l'hôte pour l'injecter proprement dans Docker + TMP_FILE=$(mktemp) + cat < "$TMP_FILE" FROM $base_model -TEMPLATE \"\"\"{{- if .System }} +TEMPLATE """{{- if .System }} [SYSTEM_PROMPT] {{ .System }} [/SYSTEM_PROMPT] {{- end }} {{- range .Messages }} -{{- if eq .Role \"user\" }} +{{- if eq .Role "user" }} [INST] {{ .Content }} [/INST] -{{- else if eq .Role \"assistant\" }} +{{- else if eq .Role "assistant" }} {{ .Content }} {{- end }} -{{- end }}\"\"\" +{{- end }}""" PARAMETER num_ctx 131072 PARAMETER num_predict 4096 PARAMETER num_keep 1024 PARAMETER repeat_penalty 1.1 PARAMETER top_k 40 -PARAMETER stop \"[INST]\" -PARAMETER stop \"[/INST]\" -PARAMETER stop \"\" -EOF" - ${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f "/root/.ollama/$model_name.modelfile" - ${pkgs.docker}/bin/docker exec ollama rm "/root/.ollama/$model_name.modelfile" - else - echo "$model_name already exists, skipping." - fi - } +PARAMETER stop "[INST]" +PARAMETER stop "[/INST]" +PARAMETER stop "" +EOF - # Create Nemotron - create_model_if_missing "nemotron-3-nano:30b-128k" "nemotron-3-nano:30b" - - # Create Devstral - create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b" - - # create_model_if_missing "qwen2.5-coder:32b-128k" "qwen2.5-coder:32b" - - # create_model_if_missing "mistral-large-planner:123b" "mistral-large:123b-instruct-v2407-q4_K_S" + # Copie et création dans le conteneur + ${pkgs.docker}/bin/docker cp "$TMP_FILE" ollama:/tmp/model.modelfile + ${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f /tmp/model.modelfile + ${pkgs.docker}/bin/docker exec ollama rm /tmp/model.modelfile + rm -f "$TMP_FILE" + else + echo "$model_name already exists, skipping." + fi + } + + # Create Nemotron + create_model_if_missing "nemotron-3-nano:30b-128k" "nemotron-3-nano:30b" + + # Create Devstral + create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b" + + ) & ''; + serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; + Type = "forking"; # Permet à systemd de savoir que le script passe en arrière-plan via '&' + User = "root"; }; }; } diff --git a/secrets/wireguard_preshared_key.age b/secrets/wireguard_preshared_key.age new file mode 100644 index 0000000..6149647 --- /dev/null +++ b/secrets/wireguard_preshared_key.age @@ -0,0 +1,9 @@ +-----BEGIN AGE ENCRYPTED FILE----- +YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSA3VG9Z +MVFPVFc2VVJ3d0h0dmtBUnI3WHl2SzUxTkRZbjFCaGloWmV3dnd3ClcxdnVPeGd6 +SU4zR0Q0K1dtVjRRVHd0VW5XSFI0dVFpTjZnYk1DNjRxTVEKLT4gQzlgRy1ncmVh +c2UKeUozOWgyUytSTVF0NjY2STBEb2VadwotLS0gblI3bmJCUWxxU3QrYTEyVFBI +Snc4NC9rTkh0NnZYbUtxUE9hRWRkelpmMAq58fmH6cK13GeD7wGLxKmx10hmJeW4 +b7KqnCD1ZP7uG85s32xzVRwRG8RrG4xZo5nR9Mrtg1CoTSFfUGeFnf5xveN+Ej0X +wDVB1LwC+Q== +-----END AGE ENCRYPTED FILE----- diff --git a/secrets/wireguard_private_key.age b/secrets/wireguard_private_key.age new file mode 100644 index 0000000..09d0213 --- /dev/null +++ b/secrets/wireguard_private_key.age @@ -0,0 +1,11 @@ +-----BEGIN AGE ENCRYPTED FILE----- +YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSA5dzVG +WUNvT3NlRmcrWS81bzJqSWlTekVYaDFFTE10SkI2dEgzaGpxcUI4Cmk5Y0FGYTRZ +K0NGYzY3VUp4aS9ZZGRmWTgybDJFUURva2pZNmVOS3QxdEUKLT4gPnVRTCtldGMt +Z3JlYXNlCk04OTJZeFRNeDI5aGpMVTk1ZTE0Y2FMMnFEMjlJalJpMHRlaTE4ZWIx +d2lCRGQ5RHVjcktOMGJCb1VERlNWcTYKaSt0L1Z6dVJ0QWIyZkhsYzFEVjZSQWUr +ZWpwVlo1TmhoUFJZdkEvR0gxNlVhcXF2ZTRnCi0tLSBLcmM2MThNVkdWclpHUXRr +VTF6QVk2WUZlTXpZMVNLMlpBOFc3M1o5WjZzCs9xbPlIX+u5vRSQ/z9utu+I9S2c +02DOsIb1kzxzb1OK91b8Kh4JucQSq3qkyEvRucsNn5QW8hIHDnRuND6EbPyN7p4S +YB/F0dxSqgnq +-----END AGE ENCRYPTED FILE----- diff --git a/users/ai-worker.nix b/users/ai-worker.nix index b818426..6308151 100644 --- a/users/ai-worker.nix +++ b/users/ai-worker.nix @@ -9,8 +9,20 @@ openssh.authorizedKeys.keys = [ keys.users.ai-worker.main ]; + # No password login - SSH key only + hashedPassword = "!"; }; users.groups.ai-worker = {}; + + # Enable restricted AI worker SSH access for ollama benchmarking + # SECURITY: ai-worker can only: + # - SSH into host from Hermes container + # - Run docker commands (docker exec ollama ...) via docker group + # - Run specific security audit commands + # - NO access to infra repo (no bind mount) + # - NO sudo access (no nh, nixos-rebuild, nixpkgs-fmt, nix) + # WORKFLOW: SSH from Hermes container, run docker benchmarks, return and save results to /opt/data/ai-optimizer/ + services.aiWorkerAccess = true; # Restricted sudo for ai-worker - security checks only security.sudo.extraRules = [