From 18df45819d4d857b6536a77e86ce6fcde057f03e Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Tue, 28 Apr 2026 15:34:38 +0000 Subject: [PATCH 1/7] Add restricted AI worker access with deployment capabilities - New module: modules/nixos/security/ai-worker-restricted.nix - Bind mount for infra repo access (RW) - Whitelisted sudo commands: nh, nixos-rebuild, nixpkgs-fmt, nix - Audit logging for infra changes - Documentation in README-ai-worker.md - Updated users/ai-worker.nix: - Enable services.aiWorkerAccess - Lock password (SSH key only) - Security documentation comments - Updated flake.nix: - Include new security module SECURITY: AI must ask for user confirmation before running nh os switch --- flake.nix | 1 + modules/nixos/security/README-ai-worker.md | 92 +++++++++++++++++++ .../nixos/security/ai-worker-restricted.nix | 57 ++++++++++++ users/ai-worker.nix | 11 +++ 4 files changed, 161 insertions(+) create mode 100644 modules/nixos/security/README-ai-worker.md create mode 100644 modules/nixos/security/ai-worker-restricted.nix diff --git a/flake.nix b/flake.nix index a06b03e..8f8b51a 100644 --- a/flake.nix +++ b/flake.nix @@ -61,6 +61,7 @@ ./modules/nixos/services/open_code_server.nix ./modules/nixos/services/ollama_init_custom_models.nix ./modules/nixos/services/openclaw_node.nix + ./modules/nixos/security/ai-worker-restricted.nix ./users/gortium.nix ./users/ai-worker.nix ]; diff --git a/modules/nixos/security/README-ai-worker.md b/modules/nixos/security/README-ai-worker.md new file mode 100644 index 0000000..8600e08 --- /dev/null +++ b/modules/nixos/security/README-ai-worker.md @@ -0,0 +1,92 @@ +# AI Worker Restricted Access + +This module provides restricted access for the AI worker (hermes-agent) to manage the infra repository. + +## Security Model + +The `ai-worker` user has: + +### Filesystem Access +- **Bind mount**: `/home/ai-worker/infra` → `/home/gortium/infra` (read-write) +- **Cannot access**: Any other files outside the bind mount and standard system paths + +### Sudo Access (Whitelist Only) +The following commands are allowed via sudo without password: +- `/run/current-system/sw/bin/nh` - NixOS home manager +- `/run/current-system/sw/bin/nixos-rebuild` - System rebuild +- `/run/current-system/sw/bin/nixpkgs-fmt` - Nix formatter +- `/run/current-system/sw/bin/nix` - Nix package manager + +### Docker Access +- Member of `docker` group - can manage containers +- Cannot modify host system directly + +### Audit Logging +- All changes to `/home/gortium/infra` are logged via Linux audit subsystem +- Audit rule: `-w /home/gortium/infra -p wa -k infra_changes` + +## Workflow: Ask First, Always + +**CRITICAL**: Before running any deployment command (`nh os switch` or `nixos-rebuild`), the AI MUST: + +1. **Show the planned changes** to the user +2. **Explain the impact** of the changes +3. **Wait for explicit confirmation** before executing + +### Example Workflow + +```bash +# AI prepares changes +cd /home/ai-worker/infra +# ... edits files ... +nixpkgs-fmt . + +# AI shows diff to user +git diff + +# AI asks: "Ready to deploy? This will restart the ai_stack service." +# User responds: "Yes, proceed" + +# Only then does AI run: +sudo nh os switch --flake .#lazyworkhorse +``` + +## SSH Access + +Connect as: +```bash +ssh ai-worker@lazyworkhorse +``` + +The working directory will be `/home/ai-worker`, with infra repo accessible at `/home/ai-worker/infra`. + +## Verification + +Check ai-worker permissions: +```bash +# On the host, as root or gortium: +sudo -u ai-worker sudo -l +``` + +Expected output should show only the whitelisted commands. + +## Troubleshooting + +If ai-worker cannot access infra: +```bash +# Check bind mount +mount | grep ai-worker/infra + +# Check permissions +ls -la /home/gortium/infra +ls -la /home/ai-worker/infra +``` + +If sudo commands fail: +```bash +# Check sudo rules +sudo cat /etc/sudoers.d/* | grep ai-worker + +# Check audit logs +sudo ausearch -k infra_changes +``` diff --git a/modules/nixos/security/ai-worker-restricted.nix b/modules/nixos/security/ai-worker-restricted.nix new file mode 100644 index 0000000..a02ec69 --- /dev/null +++ b/modules/nixos/security/ai-worker-restricted.nix @@ -0,0 +1,57 @@ +{ config, pkgs, lib, ... }: + +with lib; + +{ + options.services.aiWorkerAccess = mkOption { + type = types.bool; + default = false; + description = "Enable restricted AI worker access to infra repo with deployment capabilities"; + }; + + config = mkIf config.services.aiWorkerAccess { + # Bind mount for infra repo access (read-write for editing) + fileSystems."/home/ai-worker/infra" = { + device = "/home/gortium/infra"; + fsType = "none"; + options = [ "bind" ]; + }; + + # Restricted sudo access - only specific commands allowed + security.sudo.extraRules = [ + { + users = [ "ai-worker" ]; + commands = [ + { + command = "/run/current-system/sw/bin/nh"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/nixos-rebuild"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/nixpkgs-fmt"; + options = [ "NOPASSWD" ]; + } + { + command = "/run/current-system/sw/bin/nix"; + options = [ "NOPASSWD" ]; + } + ]; + } + ]; + + # Ensure ai-worker has necessary tools available + environment.systemPackages = with pkgs; [ + nh + nixpkgs-fmt + ]; + + # Audit logging for ai-worker actions on infra directory + security.audit.enable = mkDefault true; + security.audit.rules = [ + "-w /home/gortium/infra -p wa -k infra_changes" + ]; + }; +} diff --git a/users/ai-worker.nix b/users/ai-worker.nix index a8f027c..d7df7c0 100644 --- a/users/ai-worker.nix +++ b/users/ai-worker.nix @@ -9,6 +9,17 @@ openssh.authorizedKeys.keys = [ keys.users.ai-worker.main ]; + # No password login - SSH key only + hashedPassword = "!"; }; users.groups.ai-worker = {}; + + # Enable restricted AI worker access with deployment capabilities + # SECURITY: ai-worker can only: + # - Access /home/ai-worker/infra (bind-mounted to /home/gortium/infra) + # - Run: nh, nixos-rebuild, nixpkgs-fmt, nix (via sudo, no password) + # - Manage docker containers (via docker group) + # - All changes to infra/ are logged via audit subsystem + # WORKFLOW: AI must ask for user confirmation before running nh os switch + services.aiWorkerAccess = true; } From f0e21d95e4b9734be0101b0dd68f8a0d906f1603 Mon Sep 17 00:00:00 2001 From: Hermes Agent Date: Wed, 29 Apr 2026 19:55:19 +0000 Subject: [PATCH 2/7] fix: ai-worker docker-only access for ollama benchmarking Remove infra repo bind mount and sudo access from ai-worker user. Now ai-worker can only: - SSH into host from Hermes container - Run docker commands via docker group membership - Execute ollama benchmarks via docker exec Results saved to /opt/data/ai-optimizer/ in Hermes container. --- modules/nixos/security/README-ai-worker.md | 103 ++++++++++-------- .../nixos/security/ai-worker-restricted.nix | 48 +------- users/ai-worker.nix | 12 +- 3 files changed, 68 insertions(+), 95 deletions(-) diff --git a/modules/nixos/security/README-ai-worker.md b/modules/nixos/security/README-ai-worker.md index 8600e08..6128573 100644 --- a/modules/nixos/security/README-ai-worker.md +++ b/modules/nixos/security/README-ai-worker.md @@ -1,54 +1,62 @@ # AI Worker Restricted Access -This module provides restricted access for the AI worker (hermes-agent) to manage the infra repository. +This module provides SSH access for the AI worker (hermes-agent) to run ollama benchmarks on the host. ## Security Model The `ai-worker` user has: ### Filesystem Access -- **Bind mount**: `/home/ai-worker/infra` → `/home/gortium/infra` (read-write) -- **Cannot access**: Any other files outside the bind mount and standard system paths +- **Home directory**: `/home/ai-worker` (standard user home) +- **No bind mounts**: Cannot access `/home/gortium/infra` or other host files +- **Cannot access**: Any files outside standard system paths -### Sudo Access (Whitelist Only) -The following commands are allowed via sudo without password: -- `/run/current-system/sw/bin/nh` - NixOS home manager -- `/run/current-system/sw/bin/nixos-rebuild` - System rebuild -- `/run/current-system/sw/bin/nixpkgs-fmt` - Nix formatter -- `/run/current-system/sw/bin/nix` - Nix package manager +### Sudo Access +- **NONE**: ai-worker has no sudo privileges +- Cannot run `nh`, `nixos-rebuild`, `nixpkgs-fmt`, or `nix` with elevated permissions ### Docker Access -- Member of `docker` group - can manage containers -- Cannot modify host system directly +- Member of `docker` group - can run `docker` and `docker exec` commands +- Primary use: `docker exec ollama ollama ...` for benchmarking +- Can run `docker exec --privileged ollama rocm-smi ...` for VRAM monitoring -### Audit Logging -- All changes to `/home/gortium/infra` are logged via Linux audit subsystem -- Audit rule: `-w /home/gortium/infra -p wa -k infra_changes` +## Workflow: SSH + Docker Benchmarking -## Workflow: Ask First, Always - -**CRITICAL**: Before running any deployment command (`nh os switch` or `nixos-rebuild`), the AI MUST: - -1. **Show the planned changes** to the user -2. **Explain the impact** of the changes -3. **Wait for explicit confirmation** before executing +The AI worker connects from the Hermes container to the host via SSH, runs ollama benchmarks, then returns to save results. ### Example Workflow ```bash -# AI prepares changes -cd /home/ai-worker/infra -# ... edits files ... -nixpkgs-fmt . +# From Hermes container, SSH to host +ssh -i /path/to/ssh/key ai-worker@host.docker.internal -# AI shows diff to user -git diff +# On host, run ollama benchmarks via docker +docker exec ollama ollama pull devstral-small-2:24b -# AI asks: "Ready to deploy? This will restart the ai_stack service." -# User responds: "Yes, proceed" +# Create test modelfile +docker exec ollama bash -c 'cat < /root/.ollama/test.modelfile +FROM devstral-small-2:24b +PARAMETER num_ctx 65536 +PARAMETER num_gpu 99 +PARAMETER flash_attn true +EOF' -# Only then does AI run: -sudo nh os switch --flake .#lazyworkhorse +# Create and test model +docker exec ollama ollama create test-model -f /root/.ollama/test.modelfile +docker exec ollama ollama run test-model "Write a Python async function" + +# Check VRAM usage +docker exec --privileged ollama rocm-smi --showmeminfo vram + +# Cleanup +docker exec ollama ollama rm test-model + +# Exit SSH, return to Hermes container +exit + +# Save results in Hermes container +# /opt/data/ai-optimizer/state.json +# /opt/data/ai-optimizer/results.csv ``` ## SSH Access @@ -58,7 +66,7 @@ Connect as: ssh ai-worker@lazyworkhorse ``` -The working directory will be `/home/ai-worker`, with infra repo accessible at `/home/ai-worker/infra`. +The working directory will be `/home/ai-worker`. No infra repo access. ## Verification @@ -66,27 +74,32 @@ Check ai-worker permissions: ```bash # On the host, as root or gortium: sudo -u ai-worker sudo -l -``` +# Should show: no sudo access -Expected output should show only the whitelisted commands. +# Check docker group membership +groups ai-worker +# Should show: ai-worker docker +``` ## Troubleshooting -If ai-worker cannot access infra: +If ai-worker cannot run docker commands: ```bash -# Check bind mount -mount | grep ai-worker/infra +# Check docker group membership +groups ai-worker -# Check permissions -ls -la /home/gortium/infra -ls -la /home/ai-worker/infra +# Verify ollama container is running +docker ps | grep ollama + +# Test docker access +sudo -u ai-worker docker exec ollama ollama list ``` -If sudo commands fail: +If SSH connection fails: ```bash -# Check sudo rules -sudo cat /etc/sudoers.d/* | grep ai-worker +# Check SSH key is authorized +cat /home/ai-worker/.ssh/authorized_keys -# Check audit logs -sudo ausearch -k infra_changes +# Check SSH service +systemctl status sshd ``` diff --git a/modules/nixos/security/ai-worker-restricted.nix b/modules/nixos/security/ai-worker-restricted.nix index a02ec69..0e9d4f6 100644 --- a/modules/nixos/security/ai-worker-restricted.nix +++ b/modules/nixos/security/ai-worker-restricted.nix @@ -6,52 +6,12 @@ with lib; options.services.aiWorkerAccess = mkOption { type = types.bool; default = false; - description = "Enable restricted AI worker access to infra repo with deployment capabilities"; + description = "Enable AI worker SSH access with docker group membership for ollama benchmarking"; }; config = mkIf config.services.aiWorkerAccess { - # Bind mount for infra repo access (read-write for editing) - fileSystems."/home/ai-worker/infra" = { - device = "/home/gortium/infra"; - fsType = "none"; - options = [ "bind" ]; - }; - - # Restricted sudo access - only specific commands allowed - security.sudo.extraRules = [ - { - users = [ "ai-worker" ]; - commands = [ - { - command = "/run/current-system/sw/bin/nh"; - options = [ "NOPASSWD" ]; - } - { - command = "/run/current-system/sw/bin/nixos-rebuild"; - options = [ "NOPASSWD" ]; - } - { - command = "/run/current-system/sw/bin/nixpkgs-fmt"; - options = [ "NOPASSWD" ]; - } - { - command = "/run/current-system/sw/bin/nix"; - options = [ "NOPASSWD" ]; - } - ]; - } - ]; - - # Ensure ai-worker has necessary tools available - environment.systemPackages = with pkgs; [ - nh - nixpkgs-fmt - ]; - - # Audit logging for ai-worker actions on infra directory - security.audit.enable = mkDefault true; - security.audit.rules = [ - "-w /home/gortium/infra -p wa -k infra_changes" - ]; + # ai-worker is member of docker group - can run docker commands via SSH + # No bind mounts, no sudo access - docker-only for ollama benchmarking + users.groups.docker.members = [ "ai-worker" ]; }; } diff --git a/users/ai-worker.nix b/users/ai-worker.nix index d7df7c0..48b51de 100644 --- a/users/ai-worker.nix +++ b/users/ai-worker.nix @@ -14,12 +14,12 @@ }; users.groups.ai-worker = {}; - # Enable restricted AI worker access with deployment capabilities + # Enable restricted AI worker SSH access for ollama benchmarking # SECURITY: ai-worker can only: - # - Access /home/ai-worker/infra (bind-mounted to /home/gortium/infra) - # - Run: nh, nixos-rebuild, nixpkgs-fmt, nix (via sudo, no password) - # - Manage docker containers (via docker group) - # - All changes to infra/ are logged via audit subsystem - # WORKFLOW: AI must ask for user confirmation before running nh os switch + # - SSH into host from Hermes container + # - Run docker commands (docker exec ollama ...) via docker group + # - NO access to infra repo (no bind mount) + # - NO sudo access (no nh, nixos-rebuild, nixpkgs-fmt, nix) + # WORKFLOW: SSH from Hermes container, run docker benchmarks, return and save results to /opt/data/ai-optimizer/ services.aiWorkerAccess = true; } From ff7303cf6ae8ccba95635342254fd3982fe8f03d Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 9 May 2026 20:13:08 +0000 Subject: [PATCH 3/7] feat: add ai-optimizer benchmark plan and state tracking for ollama GPU benchmarking --- assets/ai-optimizer/README.md | 194 ++++++++++++++++++++++++++++++++ assets/ai-optimizer/results.csv | 1 + assets/ai-optimizer/state.json | 21 ++++ 3 files changed, 216 insertions(+) create mode 100644 assets/ai-optimizer/README.md create mode 100644 assets/ai-optimizer/results.csv create mode 100644 assets/ai-optimizer/state.json diff --git a/assets/ai-optimizer/README.md b/assets/ai-optimizer/README.md new file mode 100644 index 0000000..cde9392 --- /dev/null +++ b/assets/ai-optimizer/README.md @@ -0,0 +1,194 @@ +# AI Model Optimizer - Ollama GPU Benchmark Plan + +**Purpose:** Find optimal ollama configurations for maximum context size and GPU utilization on AMD MI50 GPUs. + +**Hardware:** +- 2x AMD MI50 GPUs (32GB VRAM each, 64GB total) +- 128GB system RAM +- ROCm: `HSA_OVERRIDE_GFX_VERSION=9.0.6`, `HIP_VISIBLE_DEVICES=0,1` + +--- + +## File Locations + +``` +STATE: /opt/data/infra/assets/ai-optimizer/state.json +RESULTS: /opt/data/infra/assets/ai-optimizer/results.csv +REPO: /opt/data/infra (persistent clone) +``` + +--- + +## Model Queues + +### GPU Track (Coding - prioritize speed + context on GPU) +1. `deepseek-coder-v2:16b` - Best coding model, fits on GPU +2. `qwen2.5-coder:32b` - Alternative coding model +3. `codellama:34b-instruct` - Legacy option + +### RAM Track (Knowledge - prioritize max context) +1. `qwen2.5:72b` - Large knowledge model +2. `nemotron-3-nano:30b` - Efficient large model +3. `mixtral:8x7b-instruct` - MoE architecture + +--- + +## Context Steps (in order) + +``` +[32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680] +``` + +--- + +## Optimization Strategy + +### GPU Track (Coding) +- Start: `num_ctx=32768`, `num_gpu=99`, `flash_attn=true` +- Increase context until OOM or tokens/sec < 5 +- Record best config before hitting wall +- Target: >10 tokens/sec with max context + +### RAM Track (Knowledge) +- Start: `num_ctx=65536`, `num_gpu=50`, `flash_attn=true` +- Allow heavy RAM offload (up to 100GB system RAM) +- Increase context until OOM +- Speed secondary to context size + +--- + +## Prerequisites + +This PR adds the `ai-worker` user with docker group access. After merge: + +```bash +# SSH from Hermes container to run benchmarks on the host +ssh -i /path/to/key ai-worker@host docker exec ollama ollama list + +# Or if running directly on host +docker exec ollama ollama list +``` + +--- + +## Manual Testing Workflow + +### 1. Quick Model Test + +```bash +docker exec ollama ollama run : "Your prompt here" +``` + +### 2. Check Current State + +```bash +cd /opt/data/infra +cat assets/ai-optimizer/state.json +``` + +### 3. Pull Model (if needed) + +```bash +docker exec ollama ollama pull : +``` + +### 4. Create Test Modelfile + +```bash +docker exec ollama bash -c "cat < /root/.ollama/test_${model}.modelfile +FROM ${model} +PARAMETER num_ctx ${num_ctx} +PARAMETER num_gpu ${num_gpu} +PARAMETER flash_attn true +PARAMETER num_predict 4096 +PARAMETER num_keep 1024 +PARAMETER repeat_penalty 1.1 +EOF" + +docker exec ollama ollama create test-model -f /root/.ollama/test_${model}.modelfile +``` + +### 5. Run Benchmark + +```bash +# Warm up +docker exec ollama ollama run test-model "Hello" > /dev/null + +# Coding prompt +docker exec ollama ollama run test-model "Write a Python async context manager that retries a function with exponential backoff, max 5 retries, and logs each attempt using structlog. Include type hints." + +# Knowledge prompt +docker exec ollama ollama run test-model "Explain the complete memory hierarchy in modern GPUs, from registers through L1/L2 caches to VRAM, and how data moves between them during matrix multiplication." +``` + +### 6. Measure VRAM + +```bash +# Try host first +rocm-smi --showmeminfo vram 2>/dev/null || \ +# Try via docker +docker exec --privileged ollama rocm-smi --showmeminfo vram 2>/dev/null || \ +echo "VRAM unavailable" +``` + +### 7. Record Results + +Update `state.json` and append to `results.csv`: +- tokens/sec from ollama output +- VRAM/RAM usage +- Whether this config is the new best + +### 8. Commit Changes + +```bash +cd /opt/data/infra +git add assets/ai-optimizer/ +git commit -m "ai-optimizer: tested ${model} at ${num_ctx} ctx - ${status}" +git push +``` + +--- + +## State File Structure + +```json +{ + "track": "gpu", + "current_model": "deepseek-coder-v2:16b", + "model_index": 0, + "phase": "context_scaling", + "backend": "ollama", + "current_config": { + "num_ctx": 32768, + "num_gpu": 99, + "flash_attn": true + }, + "best_configs": { + "gpu": {}, + "ram": {} + }, + "completed_models": [], + "gpu_queue": ["deepseek-coder-v2:16b", "qwen2.5-coder:32b", "codellama:34b-instruct"], + "ram_queue": ["qwen2.5:72b", "nemotron-3-nano:30b", "mixtral:8x7b-instruct"], + "context_steps": [32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680], + "last_updated": "2026-04-30T00:00:00Z" +} +``` + +--- + +## Results CSV Format + +```csv +timestamp,track,model,backend,phase,num_ctx,num_gpu,flash_attn,tokens_per_sec,vram_gb,ram_gb,status,is_best +``` + +--- + +## Notes + +- **Manual execution** - Run benchmarks when needed, no automated cron job +- **Two tracks**: Complete GPU track first (coding models), then RAM track +- **Backend**: ollama (llama.cpp optional for advanced users) +- **Host access**: Use docker exec (or SSH via ai-worker) for rocm-smi +- **Commit results**: Push best configs to repo for reference diff --git a/assets/ai-optimizer/results.csv b/assets/ai-optimizer/results.csv new file mode 100644 index 0000000..7e25194 --- /dev/null +++ b/assets/ai-optimizer/results.csv @@ -0,0 +1 @@ +timestamp,track,model,backend,phase,num_ctx,num_gpu,flash_attn,tokens_per_sec,vram_gb,ram_gb,status,is_best diff --git a/assets/ai-optimizer/state.json b/assets/ai-optimizer/state.json new file mode 100644 index 0000000..08dac90 --- /dev/null +++ b/assets/ai-optimizer/state.json @@ -0,0 +1,21 @@ +{ + "track": "gpu", + "current_model": "deepseek-coder-v2:16b", + "model_index": 0, + "phase": "context_scaling", + "backend": "ollama", + "current_config": { + "num_ctx": 32768, + "num_gpu": 99, + "flash_attn": true + }, + "best_configs": { + "gpu": {}, + "ram": {} + }, + "completed_models": [], + "gpu_queue": ["deepseek-coder-v2:16b", "qwen2.5-coder:32b", "codellama:34b-instruct"], + "ram_queue": ["qwen2.5:72b", "nemotron-3-nano:30b", "mixtral:8x7b-instruct"], + "context_steps": [32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680], + "last_updated": "2026-05-09T00:00:00Z" +} From 96e77c5ef2def7eac6525a92ff97a2262f2828d1 Mon Sep 17 00:00:00 2001 From: Hermes Date: Sat, 9 May 2026 20:19:26 +0000 Subject: [PATCH 4/7] Revert "feat: add ai-optimizer benchmark plan and state tracking for ollama GPU benchmarking" This reverts commit ff7303cf6ae8ccba95635342254fd3982fe8f03d. --- assets/ai-optimizer/README.md | 194 -------------------------------- assets/ai-optimizer/results.csv | 1 - assets/ai-optimizer/state.json | 21 ---- 3 files changed, 216 deletions(-) delete mode 100644 assets/ai-optimizer/README.md delete mode 100644 assets/ai-optimizer/results.csv delete mode 100644 assets/ai-optimizer/state.json diff --git a/assets/ai-optimizer/README.md b/assets/ai-optimizer/README.md deleted file mode 100644 index cde9392..0000000 --- a/assets/ai-optimizer/README.md +++ /dev/null @@ -1,194 +0,0 @@ -# AI Model Optimizer - Ollama GPU Benchmark Plan - -**Purpose:** Find optimal ollama configurations for maximum context size and GPU utilization on AMD MI50 GPUs. - -**Hardware:** -- 2x AMD MI50 GPUs (32GB VRAM each, 64GB total) -- 128GB system RAM -- ROCm: `HSA_OVERRIDE_GFX_VERSION=9.0.6`, `HIP_VISIBLE_DEVICES=0,1` - ---- - -## File Locations - -``` -STATE: /opt/data/infra/assets/ai-optimizer/state.json -RESULTS: /opt/data/infra/assets/ai-optimizer/results.csv -REPO: /opt/data/infra (persistent clone) -``` - ---- - -## Model Queues - -### GPU Track (Coding - prioritize speed + context on GPU) -1. `deepseek-coder-v2:16b` - Best coding model, fits on GPU -2. `qwen2.5-coder:32b` - Alternative coding model -3. `codellama:34b-instruct` - Legacy option - -### RAM Track (Knowledge - prioritize max context) -1. `qwen2.5:72b` - Large knowledge model -2. `nemotron-3-nano:30b` - Efficient large model -3. `mixtral:8x7b-instruct` - MoE architecture - ---- - -## Context Steps (in order) - -``` -[32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680] -``` - ---- - -## Optimization Strategy - -### GPU Track (Coding) -- Start: `num_ctx=32768`, `num_gpu=99`, `flash_attn=true` -- Increase context until OOM or tokens/sec < 5 -- Record best config before hitting wall -- Target: >10 tokens/sec with max context - -### RAM Track (Knowledge) -- Start: `num_ctx=65536`, `num_gpu=50`, `flash_attn=true` -- Allow heavy RAM offload (up to 100GB system RAM) -- Increase context until OOM -- Speed secondary to context size - ---- - -## Prerequisites - -This PR adds the `ai-worker` user with docker group access. After merge: - -```bash -# SSH from Hermes container to run benchmarks on the host -ssh -i /path/to/key ai-worker@host docker exec ollama ollama list - -# Or if running directly on host -docker exec ollama ollama list -``` - ---- - -## Manual Testing Workflow - -### 1. Quick Model Test - -```bash -docker exec ollama ollama run : "Your prompt here" -``` - -### 2. Check Current State - -```bash -cd /opt/data/infra -cat assets/ai-optimizer/state.json -``` - -### 3. Pull Model (if needed) - -```bash -docker exec ollama ollama pull : -``` - -### 4. Create Test Modelfile - -```bash -docker exec ollama bash -c "cat < /root/.ollama/test_${model}.modelfile -FROM ${model} -PARAMETER num_ctx ${num_ctx} -PARAMETER num_gpu ${num_gpu} -PARAMETER flash_attn true -PARAMETER num_predict 4096 -PARAMETER num_keep 1024 -PARAMETER repeat_penalty 1.1 -EOF" - -docker exec ollama ollama create test-model -f /root/.ollama/test_${model}.modelfile -``` - -### 5. Run Benchmark - -```bash -# Warm up -docker exec ollama ollama run test-model "Hello" > /dev/null - -# Coding prompt -docker exec ollama ollama run test-model "Write a Python async context manager that retries a function with exponential backoff, max 5 retries, and logs each attempt using structlog. Include type hints." - -# Knowledge prompt -docker exec ollama ollama run test-model "Explain the complete memory hierarchy in modern GPUs, from registers through L1/L2 caches to VRAM, and how data moves between them during matrix multiplication." -``` - -### 6. Measure VRAM - -```bash -# Try host first -rocm-smi --showmeminfo vram 2>/dev/null || \ -# Try via docker -docker exec --privileged ollama rocm-smi --showmeminfo vram 2>/dev/null || \ -echo "VRAM unavailable" -``` - -### 7. Record Results - -Update `state.json` and append to `results.csv`: -- tokens/sec from ollama output -- VRAM/RAM usage -- Whether this config is the new best - -### 8. Commit Changes - -```bash -cd /opt/data/infra -git add assets/ai-optimizer/ -git commit -m "ai-optimizer: tested ${model} at ${num_ctx} ctx - ${status}" -git push -``` - ---- - -## State File Structure - -```json -{ - "track": "gpu", - "current_model": "deepseek-coder-v2:16b", - "model_index": 0, - "phase": "context_scaling", - "backend": "ollama", - "current_config": { - "num_ctx": 32768, - "num_gpu": 99, - "flash_attn": true - }, - "best_configs": { - "gpu": {}, - "ram": {} - }, - "completed_models": [], - "gpu_queue": ["deepseek-coder-v2:16b", "qwen2.5-coder:32b", "codellama:34b-instruct"], - "ram_queue": ["qwen2.5:72b", "nemotron-3-nano:30b", "mixtral:8x7b-instruct"], - "context_steps": [32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680], - "last_updated": "2026-04-30T00:00:00Z" -} -``` - ---- - -## Results CSV Format - -```csv -timestamp,track,model,backend,phase,num_ctx,num_gpu,flash_attn,tokens_per_sec,vram_gb,ram_gb,status,is_best -``` - ---- - -## Notes - -- **Manual execution** - Run benchmarks when needed, no automated cron job -- **Two tracks**: Complete GPU track first (coding models), then RAM track -- **Backend**: ollama (llama.cpp optional for advanced users) -- **Host access**: Use docker exec (or SSH via ai-worker) for rocm-smi -- **Commit results**: Push best configs to repo for reference diff --git a/assets/ai-optimizer/results.csv b/assets/ai-optimizer/results.csv deleted file mode 100644 index 7e25194..0000000 --- a/assets/ai-optimizer/results.csv +++ /dev/null @@ -1 +0,0 @@ -timestamp,track,model,backend,phase,num_ctx,num_gpu,flash_attn,tokens_per_sec,vram_gb,ram_gb,status,is_best diff --git a/assets/ai-optimizer/state.json b/assets/ai-optimizer/state.json deleted file mode 100644 index 08dac90..0000000 --- a/assets/ai-optimizer/state.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "track": "gpu", - "current_model": "deepseek-coder-v2:16b", - "model_index": 0, - "phase": "context_scaling", - "backend": "ollama", - "current_config": { - "num_ctx": 32768, - "num_gpu": 99, - "flash_attn": true - }, - "best_configs": { - "gpu": {}, - "ram": {} - }, - "completed_models": [], - "gpu_queue": ["deepseek-coder-v2:16b", "qwen2.5-coder:32b", "codellama:34b-instruct"], - "ram_queue": ["qwen2.5:72b", "nemotron-3-nano:30b", "mixtral:8x7b-instruct"], - "context_steps": [32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680], - "last_updated": "2026-05-09T00:00:00Z" -} From 6806898f04ccfbff8749a48b75bfea08053a1d8c Mon Sep 17 00:00:00 2001 From: Hermes Date: Sun, 10 May 2026 10:12:34 -0400 Subject: [PATCH 5/7] feat: update compose submodule for ollama-gfx906 (v0.23.2) + add ollama Dockerfile --- assets/compose | 2 +- assets/ollama/Dockerfile | 106 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 assets/ollama/Dockerfile diff --git a/assets/compose b/assets/compose index a79fe9d..6b82a26 160000 --- a/assets/compose +++ b/assets/compose @@ -1 +1 @@ -Subproject commit a79fe9dffacebae6d4ee17502885e9cdfa852073 +Subproject commit 6b82a26c25f1592a2d1c9bea4f941864362fe001 diff --git a/assets/ollama/Dockerfile b/assets/ollama/Dockerfile new file mode 100644 index 0000000..438e607 --- /dev/null +++ b/assets/ollama/Dockerfile @@ -0,0 +1,106 @@ +# ollama-gfx906/Dockerfile +# +# Custom ollama image with ROCm 6.1 + gfx906 (MI50) support. +# The official ollama/rocm image ships ROCm 7.2 which dropped gfx906. +# This uses v0.23.2's native CMake build system with AMDGPU_TARGETS including gfx906. +# +# Build: docker build -t ollama/ollama:rocm-gfx906 ai/ollama + +FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder + +# Build dependencies (CMake, Ninja, Go) +ARG CMAKEVERSION=3.31.2 +ARG NINJAVERSION=1.12.1 +ARG GOLANG_VERSION=1.22.0 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + curl git ccache build-essential pkg-config unzip \ + && rm -rf /var/lib/apt/lists/* + +# Install CMake from official binaries +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-x86_64.tar.gz \ + | tar xz -C /usr/local --strip-components 1 + +# Install Ninja +RUN curl -fsSL -o /tmp/ninja.zip \ + https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux.zip \ + && unzip /tmp/ninja.zip -d /usr/local/bin && rm /tmp/ninja.zip + +# Install Go +RUN curl -fsSL https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz \ + | tar xz -C /usr/local +ENV PATH=/usr/local/go/bin:$PATH + +ARG OLLAMA_VERSION=v0.23.2 +RUN git clone --depth 1 --branch ${OLLAMA_VERSION} https://github.com/ollama/ollama.git /build +WORKDIR /build + +# ROCm paths +ENV HIP_PATH=/opt/rocm +ENV ROCM_PATH=/opt/rocm +ENV CMAKE_GENERATOR=Ninja +ENV LDFLAGS=-s + +# Step 1: Build CPU backends with GCC (no ROCm preset) +# Pre-set CMAKE_HIP_COMPILER="" to prevent check_language(HIP) from +# finding a HIP compiler (it searches /opt/rocm even without PATH). +# Remove /opt/rocm from PATH to prevent find_program from finding hipcc. +RUN mkdir -p build-cpu && \ + PATH=/usr/local/go/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + cmake -B build-cpu -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_HIP_COMPILER="" \ + -DCMAKE_INSTALL_PREFIX=/build/dist && \ + cmake --build build-cpu --target ggml-cpu -- -l $(nproc) && \ + cmake --install build-cpu --component CPU --strip && \ + echo "=== CPU install ===" && \ + (find /build/dist/lib/ollama -type f -o -type l 2>&1 | head -20 || echo "empty") + +# Step 2: Build HIP backend with ROCm preset + gfx906 target only +# The ROCm 6 preset enables HIP language detection (enable_language(HIP)) +# which ensures GPU kernels are properly compiled for gfx906. +# OLLAMA_RUNNER_DIR=rocm from the preset, so HIP goes to lib/ollama/rocm/ +# Need CMAKE_PREFIX_PATH so find_package(hip) finds hip-config.cmake +# at /opt/rocm/lib/cmake/hip/hip-config.cmake. +RUN mkdir -p build-hip && \ + cmake -B build-hip \ + --preset 'ROCm 6' \ + -DAMDGPU_TARGETS="gfx906:xnack-" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_PREFIX_PATH="/opt/rocm" && \ + cmake --build build-hip --target ggml-hip -- -l $(nproc) && \ + cmake --install build-hip --component HIP --strip && \ + echo "=== HIP install ===" && \ + find /build/dist/lib/ollama -type f -o -type l | head -20 + +# Step 3: Build Go binary (GCC for CGo linking) +ENV CGO_ENABLED=1 +RUN go build -trimpath -ldflags="-X=github.com/ollama/ollama/version.Version=${OLLAMA_VERSION}" -o /build/dist/ollama . + +# ---------- Runtime image ---------- +FROM ubuntu:24.04 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + ca-certificates curl libstdc++6 libgomp1 libvulkan1 libopenblas0 \ + && rm -rf /var/lib/apt/lists/* + +# Copy ROCm 6.1 runtime libraries +# These are needed at runtime by ggml-hip via LD_LIBRARY_PATH +COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/ +COPY --from=builder /opt/rocm/share/ /opt/rocm/share/ + +# Copy ollama binary + all backends (CPU + HIP) +# CPU install: /build/dist/lib/ollama/libggml-*.so +# HIP install: /build/dist/lib/ollama/rocm/libggml-hip.so +COPY --from=builder /build/dist/ollama /usr/bin/ollama +COPY --from=builder /build/dist/lib/ollama/ /usr/lib/ollama/ + +RUN ldconfig + +ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama/rocm:/usr/lib/ollama +ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 +ENV HCC_AMDGPU_TARGET=gfx906 +ENV HSA_ENABLE_SDMA=0 + +EXPOSE 11434 +ENTRYPOINT ["/bin/ollama"] +CMD ["serve"] From c07debf088d030453ccbc70c51ee6d3ac9fcec7a Mon Sep 17 00:00:00 2001 From: Robert Date: Sun, 10 May 2026 16:51:32 -0400 Subject: [PATCH 6/7] Added wireguard keys --- secrets/wireguard_preshared_key.age | 9 +++++++++ secrets/wireguard_private_key.age | 11 +++++++++++ 2 files changed, 20 insertions(+) create mode 100644 secrets/wireguard_preshared_key.age create mode 100644 secrets/wireguard_private_key.age diff --git a/secrets/wireguard_preshared_key.age b/secrets/wireguard_preshared_key.age new file mode 100644 index 0000000..6149647 --- /dev/null +++ b/secrets/wireguard_preshared_key.age @@ -0,0 +1,9 @@ +-----BEGIN AGE ENCRYPTED FILE----- +YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSA3VG9Z +MVFPVFc2VVJ3d0h0dmtBUnI3WHl2SzUxTkRZbjFCaGloWmV3dnd3ClcxdnVPeGd6 +SU4zR0Q0K1dtVjRRVHd0VW5XSFI0dVFpTjZnYk1DNjRxTVEKLT4gQzlgRy1ncmVh +c2UKeUozOWgyUytSTVF0NjY2STBEb2VadwotLS0gblI3bmJCUWxxU3QrYTEyVFBI +Snc4NC9rTkh0NnZYbUtxUE9hRWRkelpmMAq58fmH6cK13GeD7wGLxKmx10hmJeW4 +b7KqnCD1ZP7uG85s32xzVRwRG8RrG4xZo5nR9Mrtg1CoTSFfUGeFnf5xveN+Ej0X +wDVB1LwC+Q== +-----END AGE ENCRYPTED FILE----- diff --git a/secrets/wireguard_private_key.age b/secrets/wireguard_private_key.age new file mode 100644 index 0000000..09d0213 --- /dev/null +++ b/secrets/wireguard_private_key.age @@ -0,0 +1,11 @@ +-----BEGIN AGE ENCRYPTED FILE----- +YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSA5dzVG +WUNvT3NlRmcrWS81bzJqSWlTekVYaDFFTE10SkI2dEgzaGpxcUI4Cmk5Y0FGYTRZ +K0NGYzY3VUp4aS9ZZGRmWTgybDJFUURva2pZNmVOS3QxdEUKLT4gPnVRTCtldGMt +Z3JlYXNlCk04OTJZeFRNeDI5aGpMVTk1ZTE0Y2FMMnFEMjlJalJpMHRlaTE4ZWIx +d2lCRGQ5RHVjcktOMGJCb1VERlNWcTYKaSt0L1Z6dVJ0QWIyZkhsYzFEVjZSQWUr +ZWpwVlo1TmhoUFJZdkEvR0gxNlVhcXF2ZTRnCi0tLSBLcmM2MThNVkdWclpHUXRr +VTF6QVk2WUZlTXpZMVNLMlpBOFc3M1o5WjZzCs9xbPlIX+u5vRSQ/z9utu+I9S2c +02DOsIb1kzxzb1OK91b8Kh4JucQSq3qkyEvRucsNn5QW8hIHDnRuND6EbPyN7p4S +YB/F0dxSqgnq +-----END AGE ENCRYPTED FILE----- From f722af7803c96eb655c9d4999fcb672243e54333 Mon Sep 17 00:00:00 2001 From: Robert Date: Sun, 10 May 2026 16:56:09 -0400 Subject: [PATCH 7/7] New ollama model creator module version --- .../services/ollama_init_custom_models.nix | 96 +++++++++++-------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/modules/nixos/services/ollama_init_custom_models.nix b/modules/nixos/services/ollama_init_custom_models.nix index 4dc965d..aa060cc 100644 --- a/modules/nixos/services/ollama_init_custom_models.nix +++ b/modules/nixos/services/ollama_init_custom_models.nix @@ -1,67 +1,87 @@ { pkgs, ... }: { systemd.services.init-ollama-model = { description = "Initialize LLM models with extra context in Ollama Docker"; - after = [ "docker-ollama.service" ]; + + # On s'assure que Docker tourne avant de lancer ce script + after = [ "docker.service" ]; wantedBy = [ "multi-user.target" ]; + script = '' - # Wait for Ollama - while ! ${pkgs.curl}/bin/curl -s http://localhost:11434/api/tags > /dev/null; do - sleep 2 - done + # Fonction de création asynchrone pour ne pas bloquer le démarrage + ( + echo "Starting asynchronous Ollama initialization..." + + # Attente d'Ollama (maximum 120 secondes pour éviter une boucle infinie) + TIMEOUT=60 + COUNT=0 + while ! ${pkgs.curl}/bin/curl -s -f http://127.0.0.1:11434/api/tags > /dev/null; do + if [ $COUNT -ge $TIMEOUT ]; then + echo "Ollama did not become ready in time. Exiting." + exit 1 + fi + echo "Waiting for Ollama API to be reachable..." + sleep 5 + COUNT=$((COUNT + 5)) + done - create_model_if_missing() { - local model_name=$1 - local base_model=$2 - if ! ${pkgs.docker}/bin/docker exec ollama ollama list | grep -q "$model_name"; then - echo "$model_name not found, creating from $base_model..." + create_model_if_missing() { + local model_name=$1 + local base_model=$2 - # We use a custom TEMPLATE block to strip the 'currentDate' function - # which is unsupported in Ollama 0.5.7 but present in Devstral's default manifest. - ${pkgs.docker}/bin/docker exec ollama sh -c "cat < /root/.ollama/$model_name.modelfile + # Vérification robuste via l'API HTTP d'Ollama plutôt que docker exec (évite les conflits de tty) + if ! ${pkgs.curl}/bin/curl -s http://127.0.0.1:11434/api/tags | ${pkgs.jq}/bin/jq -e ".models[] | select(.name == \"$model_name\")" > /dev/null; then + echo "$model_name not found, creating from $base_model..." + + # Utilisation d'un fichier temporaire sur l'hôte pour l'injecter proprement dans Docker + TMP_FILE=$(mktemp) + cat < "$TMP_FILE" FROM $base_model -TEMPLATE \"\"\"{{- if .System }} +TEMPLATE """{{- if .System }} [SYSTEM_PROMPT] {{ .System }} [/SYSTEM_PROMPT] {{- end }} {{- range .Messages }} -{{- if eq .Role \"user\" }} +{{- if eq .Role "user" }} [INST] {{ .Content }} [/INST] -{{- else if eq .Role \"assistant\" }} +{{- else if eq .Role "assistant" }} {{ .Content }} {{- end }} -{{- end }}\"\"\" +{{- end }}""" PARAMETER num_ctx 131072 PARAMETER num_predict 4096 PARAMETER num_keep 1024 PARAMETER repeat_penalty 1.1 PARAMETER top_k 40 -PARAMETER stop \"[INST]\" -PARAMETER stop \"[/INST]\" -PARAMETER stop \"\" -EOF" - ${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f "/root/.ollama/$model_name.modelfile" - ${pkgs.docker}/bin/docker exec ollama rm "/root/.ollama/$model_name.modelfile" - else - echo "$model_name already exists, skipping." - fi - } +PARAMETER stop "[INST]" +PARAMETER stop "[/INST]" +PARAMETER stop "" +EOF - # Create Nemotron - create_model_if_missing "nemotron-3-nano:30b-128k" "nemotron-3-nano:30b" - - # Create Devstral - create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b" - - # create_model_if_missing "qwen2.5-coder:32b-128k" "qwen2.5-coder:32b" - - # create_model_if_missing "mistral-large-planner:123b" "mistral-large:123b-instruct-v2407-q4_K_S" + # Copie et création dans le conteneur + ${pkgs.docker}/bin/docker cp "$TMP_FILE" ollama:/tmp/model.modelfile + ${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f /tmp/model.modelfile + ${pkgs.docker}/bin/docker exec ollama rm /tmp/model.modelfile + rm -f "$TMP_FILE" + else + echo "$model_name already exists, skipping." + fi + } + + # Create Nemotron + create_model_if_missing "nemotron-3-nano:30b-128k" "nemotron-3-nano:30b" + + # Create Devstral + create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b" + + ) & ''; + serviceConfig = { - Type = "oneshot"; - RemainAfterExit = true; + Type = "forking"; # Permet à systemd de savoir que le script passe en arrière-plan via '&' + User = "root"; }; }; }