From 18df45819d4d857b6536a77e86ce6fcde057f03e Mon Sep 17 00:00:00 2001
From: Hermes Agent <hermes@lazyworkhorse.net>
Date: Tue, 28 Apr 2026 15:34:38 +0000
Subject: [PATCH 1/7] Add restricted AI worker access with deployment
 capabilities

- New module: modules/nixos/security/ai-worker-restricted.nix
  - Bind mount for infra repo access (RW)
  - Whitelisted sudo commands: nh, nixos-rebuild, nixpkgs-fmt, nix
  - Audit logging for infra changes
  - Documentation in README-ai-worker.md

- Updated users/ai-worker.nix:
  - Enable services.aiWorkerAccess
  - Lock password (SSH key only)
  - Security documentation comments

- Updated flake.nix:
  - Include new security module

SECURITY: AI must ask for user confirmation before running nh os switch
---
 flake.nix                                     |  1 +
 modules/nixos/security/README-ai-worker.md    | 92 +++++++++++++++++++
 .../nixos/security/ai-worker-restricted.nix   | 57 ++++++++++++
 users/ai-worker.nix                           | 11 +++
 4 files changed, 161 insertions(+)
 create mode 100644 modules/nixos/security/README-ai-worker.md
 create mode 100644 modules/nixos/security/ai-worker-restricted.nix

diff --git a/flake.nix b/flake.nix
index a06b03e..8f8b51a 100644
--- a/flake.nix
+++ b/flake.nix
@@ -61,6 +61,7 @@
               ./modules/nixos/services/open_code_server.nix
               ./modules/nixos/services/ollama_init_custom_models.nix
               ./modules/nixos/services/openclaw_node.nix
+              ./modules/nixos/security/ai-worker-restricted.nix
               ./users/gortium.nix
               ./users/ai-worker.nix
             ];
diff --git a/modules/nixos/security/README-ai-worker.md b/modules/nixos/security/README-ai-worker.md
new file mode 100644
index 0000000..8600e08
--- /dev/null
+++ b/modules/nixos/security/README-ai-worker.md
@@ -0,0 +1,92 @@
+# AI Worker Restricted Access
+
+This module provides restricted access for the AI worker (hermes-agent) to manage the infra repository.
+
+## Security Model
+
+The `ai-worker` user has:
+
+### Filesystem Access
+- **Bind mount**: `/home/ai-worker/infra` → `/home/gortium/infra` (read-write)
+- **Cannot access**: Any other files outside the bind mount and standard system paths
+
+### Sudo Access (Whitelist Only)
+The following commands are allowed via sudo without password:
+- `/run/current-system/sw/bin/nh` - NixOS home manager
+- `/run/current-system/sw/bin/nixos-rebuild` - System rebuild
+- `/run/current-system/sw/bin/nixpkgs-fmt` - Nix formatter
+- `/run/current-system/sw/bin/nix` - Nix package manager
+
+### Docker Access
+- Member of `docker` group - can manage containers
+- Cannot modify host system directly
+
+### Audit Logging
+- All changes to `/home/gortium/infra` are logged via Linux audit subsystem
+- Audit rule: `-w /home/gortium/infra -p wa -k infra_changes`
+
+## Workflow: Ask First, Always
+
+**CRITICAL**: Before running any deployment command (`nh os switch` or `nixos-rebuild`), the AI MUST:
+
+1. **Show the planned changes** to the user
+2. **Explain the impact** of the changes
+3. **Wait for explicit confirmation** before executing
+
+### Example Workflow
+
+```bash
+# AI prepares changes
+cd /home/ai-worker/infra
+# ... edits files ...
+nixpkgs-fmt .
+
+# AI shows diff to user
+git diff
+
+# AI asks: "Ready to deploy? This will restart the ai_stack service."
+# User responds: "Yes, proceed"
+
+# Only then does AI run:
+sudo nh os switch --flake .#lazyworkhorse
+```
+
+## SSH Access
+
+Connect as:
+```bash
+ssh ai-worker@lazyworkhorse
+```
+
+The working directory will be `/home/ai-worker`, with infra repo accessible at `/home/ai-worker/infra`.
+
+## Verification
+
+Check ai-worker permissions:
+```bash
+# On the host, as root or gortium:
+sudo -u ai-worker sudo -l
+```
+
+Expected output should show only the whitelisted commands.
+
+## Troubleshooting
+
+If ai-worker cannot access infra:
+```bash
+# Check bind mount
+mount | grep ai-worker/infra
+
+# Check permissions
+ls -la /home/gortium/infra
+ls -la /home/ai-worker/infra
+```
+
+If sudo commands fail:
+```bash
+# Check sudo rules
+sudo cat /etc/sudoers.d/* | grep ai-worker
+
+# Check audit logs
+sudo ausearch -k infra_changes
+```
diff --git a/modules/nixos/security/ai-worker-restricted.nix b/modules/nixos/security/ai-worker-restricted.nix
new file mode 100644
index 0000000..a02ec69
--- /dev/null
+++ b/modules/nixos/security/ai-worker-restricted.nix
@@ -0,0 +1,57 @@
+{ config, pkgs, lib, ... }:
+
+with lib;
+
+{
+  options.services.aiWorkerAccess = mkOption {
+    type = types.bool;
+    default = false;
+    description = "Enable restricted AI worker access to infra repo with deployment capabilities";
+  };
+
+  config = mkIf config.services.aiWorkerAccess {
+    # Bind mount for infra repo access (read-write for editing)
+    fileSystems."/home/ai-worker/infra" = {
+      device = "/home/gortium/infra";
+      fsType = "none";
+      options = [ "bind" ];
+    };
+
+    # Restricted sudo access - only specific commands allowed
+    security.sudo.extraRules = [
+      {
+        users = [ "ai-worker" ];
+        commands = [
+          {
+            command = "/run/current-system/sw/bin/nh";
+            options = [ "NOPASSWD" ];
+          }
+          {
+            command = "/run/current-system/sw/bin/nixos-rebuild";
+            options = [ "NOPASSWD" ];
+          }
+          {
+            command = "/run/current-system/sw/bin/nixpkgs-fmt";
+            options = [ "NOPASSWD" ];
+          }
+          {
+            command = "/run/current-system/sw/bin/nix";
+            options = [ "NOPASSWD" ];
+          }
+        ];
+      }
+    ];
+
+    # Ensure ai-worker has necessary tools available
+    environment.systemPackages = with pkgs; [
+      nh
+      nixpkgs-fmt
+    ];
+
+    # Audit logging for ai-worker actions on infra directory
+    security.audit.enable = mkDefault true;
+    security.audit.rules = [
+      "-w /home/gortium/infra -p wa -k infra_changes"
+    ];
+  };
+}
diff --git a/users/ai-worker.nix b/users/ai-worker.nix
index a8f027c..d7df7c0 100644
--- a/users/ai-worker.nix
+++ b/users/ai-worker.nix
@@ -9,6 +9,17 @@
     openssh.authorizedKeys.keys = [
       keys.users.ai-worker.main
     ];
+    # No password login - SSH key only
+    hashedPassword = "!";
   };
   users.groups.ai-worker = {};
+
+  # Enable restricted AI worker access with deployment capabilities
+  # SECURITY: ai-worker can only:
+  #   - Access /home/ai-worker/infra (bind-mounted to /home/gortium/infra)
+  #   - Run: nh, nixos-rebuild, nixpkgs-fmt, nix (via sudo, no password)
+  #   - Manage docker containers (via docker group)
+  #   - All changes to infra/ are logged via audit subsystem
+  # WORKFLOW: AI must ask for user confirmation before running nh os switch
+  services.aiWorkerAccess = true;
 }

From f0e21d95e4b9734be0101b0dd68f8a0d906f1603 Mon Sep 17 00:00:00 2001
From: Hermes Agent <hermes@lazyworkhorse.net>
Date: Wed, 29 Apr 2026 19:55:19 +0000
Subject: [PATCH 2/7] fix: ai-worker docker-only access for ollama benchmarking

Remove infra repo bind mount and sudo access from ai-worker user.
Now ai-worker can only:
- SSH into host from Hermes container
- Run docker commands via docker group membership
- Execute ollama benchmarks via docker exec

Results saved to /opt/data/ai-optimizer/ in Hermes container.
---
 modules/nixos/security/README-ai-worker.md    | 103 ++++++++++--------
 .../nixos/security/ai-worker-restricted.nix   |  48 +-------
 users/ai-worker.nix                           |  12 +-
 3 files changed, 68 insertions(+), 95 deletions(-)

diff --git a/modules/nixos/security/README-ai-worker.md b/modules/nixos/security/README-ai-worker.md
index 8600e08..6128573 100644
--- a/modules/nixos/security/README-ai-worker.md
+++ b/modules/nixos/security/README-ai-worker.md
@@ -1,54 +1,62 @@
 # AI Worker Restricted Access
 
-This module provides restricted access for the AI worker (hermes-agent) to manage the infra repository.
+This module provides SSH access for the AI worker (hermes-agent) to run ollama benchmarks on the host.
 
 ## Security Model
 
 The `ai-worker` user has:
 
 ### Filesystem Access
-- **Bind mount**: `/home/ai-worker/infra` → `/home/gortium/infra` (read-write)
-- **Cannot access**: Any other files outside the bind mount and standard system paths
+- **Home directory**: `/home/ai-worker` (standard user home)
+- **No bind mounts**: Cannot access `/home/gortium/infra` or other host files
+- **Cannot access**: Any files outside standard system paths
 
-### Sudo Access (Whitelist Only)
-The following commands are allowed via sudo without password:
-- `/run/current-system/sw/bin/nh` - NixOS home manager
-- `/run/current-system/sw/bin/nixos-rebuild` - System rebuild
-- `/run/current-system/sw/bin/nixpkgs-fmt` - Nix formatter
-- `/run/current-system/sw/bin/nix` - Nix package manager
+### Sudo Access
+- **NONE**: ai-worker has no sudo privileges
+- Cannot run `nh`, `nixos-rebuild`, `nixpkgs-fmt`, or `nix` with elevated permissions
 
 ### Docker Access
-- Member of `docker` group - can manage containers
-- Cannot modify host system directly
+- Member of `docker` group - can run `docker` and `docker exec` commands
+- Primary use: `docker exec ollama ollama ...` for benchmarking
+- Can run `docker exec --privileged ollama rocm-smi ...` for VRAM monitoring
 
-### Audit Logging
-- All changes to `/home/gortium/infra` are logged via Linux audit subsystem
-- Audit rule: `-w /home/gortium/infra -p wa -k infra_changes`
+## Workflow: SSH + Docker Benchmarking
 
-## Workflow: Ask First, Always
-
-**CRITICAL**: Before running any deployment command (`nh os switch` or `nixos-rebuild`), the AI MUST:
-
-1. **Show the planned changes** to the user
-2. **Explain the impact** of the changes
-3. **Wait for explicit confirmation** before executing
+The AI worker connects from the Hermes container to the host via SSH, runs ollama benchmarks, then returns to save results.
 
 ### Example Workflow
 
 ```bash
-# AI prepares changes
-cd /home/ai-worker/infra
-# ... edits files ...
-nixpkgs-fmt .
+# From Hermes container, SSH to host
+ssh -i /path/to/ssh/key ai-worker@host.docker.internal
 
-# AI shows diff to user
-git diff
+# On host, run ollama benchmarks via docker
+docker exec ollama ollama pull devstral-small-2:24b
 
-# AI asks: "Ready to deploy? This will restart the ai_stack service."
-# User responds: "Yes, proceed"
+# Create test modelfile
+docker exec ollama bash -c 'cat <<EOF > /root/.ollama/test.modelfile
+FROM devstral-small-2:24b
+PARAMETER num_ctx 65536
+PARAMETER num_gpu 99
+PARAMETER flash_attn true
+EOF'
 
-# Only then does AI run:
-sudo nh os switch --flake .#lazyworkhorse
+# Create and test model
+docker exec ollama ollama create test-model -f /root/.ollama/test.modelfile
+docker exec ollama ollama run test-model "Write a Python async function"
+
+# Check VRAM usage
+docker exec --privileged ollama rocm-smi --showmeminfo vram
+
+# Cleanup
+docker exec ollama ollama rm test-model
+
+# Exit SSH, return to Hermes container
+exit
+
+# Save results in Hermes container
+# /opt/data/ai-optimizer/state.json
+# /opt/data/ai-optimizer/results.csv
 ```
 
 ## SSH Access
@@ -58,7 +66,7 @@ Connect as:
 ssh ai-worker@lazyworkhorse
 ```
 
-The working directory will be `/home/ai-worker`, with infra repo accessible at `/home/ai-worker/infra`.
+The working directory will be `/home/ai-worker`. No infra repo access.
 
 ## Verification
 
@@ -66,27 +74,32 @@ Check ai-worker permissions:
 ```bash
 # On the host, as root or gortium:
 sudo -u ai-worker sudo -l
-```
+# Should show: no sudo access
 
-Expected output should show only the whitelisted commands.
+# Check docker group membership
+groups ai-worker
+# Should show: ai-worker docker
+```
 
 ## Troubleshooting
 
-If ai-worker cannot access infra:
+If ai-worker cannot run docker commands:
 ```bash
-# Check bind mount
-mount | grep ai-worker/infra
+# Check docker group membership
+groups ai-worker
 
-# Check permissions
-ls -la /home/gortium/infra
-ls -la /home/ai-worker/infra
+# Verify ollama container is running
+docker ps | grep ollama
+
+# Test docker access
+sudo -u ai-worker docker exec ollama ollama list
 ```
 
-If sudo commands fail:
+If SSH connection fails:
 ```bash
-# Check sudo rules
-sudo cat /etc/sudoers.d/* | grep ai-worker
+# Check SSH key is authorized
+cat /home/ai-worker/.ssh/authorized_keys
 
-# Check audit logs
-sudo ausearch -k infra_changes
+# Check SSH service
+systemctl status sshd
 ```
diff --git a/modules/nixos/security/ai-worker-restricted.nix b/modules/nixos/security/ai-worker-restricted.nix
index a02ec69..0e9d4f6 100644
--- a/modules/nixos/security/ai-worker-restricted.nix
+++ b/modules/nixos/security/ai-worker-restricted.nix
@@ -6,52 +6,12 @@ with lib;
   options.services.aiWorkerAccess = mkOption {
     type = types.bool;
     default = false;
-    description = "Enable restricted AI worker access to infra repo with deployment capabilities";
+    description = "Enable AI worker SSH access with docker group membership for ollama benchmarking";
   };
 
   config = mkIf config.services.aiWorkerAccess {
-    # Bind mount for infra repo access (read-write for editing)
-    fileSystems."/home/ai-worker/infra" = {
-      device = "/home/gortium/infra";
-      fsType = "none";
-      options = [ "bind" ];
-    };
-
-    # Restricted sudo access - only specific commands allowed
-    security.sudo.extraRules = [
-      {
-        users = [ "ai-worker" ];
-        commands = [
-          {
-            command = "/run/current-system/sw/bin/nh";
-            options = [ "NOPASSWD" ];
-          }
-          {
-            command = "/run/current-system/sw/bin/nixos-rebuild";
-            options = [ "NOPASSWD" ];
-          }
-          {
-            command = "/run/current-system/sw/bin/nixpkgs-fmt";
-            options = [ "NOPASSWD" ];
-          }
-          {
-            command = "/run/current-system/sw/bin/nix";
-            options = [ "NOPASSWD" ];
-          }
-        ];
-      }
-    ];
-
-    # Ensure ai-worker has necessary tools available
-    environment.systemPackages = with pkgs; [
-      nh
-      nixpkgs-fmt
-    ];
-
-    # Audit logging for ai-worker actions on infra directory
-    security.audit.enable = mkDefault true;
-    security.audit.rules = [
-      "-w /home/gortium/infra -p wa -k infra_changes"
-    ];
+    # ai-worker is member of docker group - can run docker commands via SSH
+    # No bind mounts, no sudo access - docker-only for ollama benchmarking
+    users.groups.docker.members = [ "ai-worker" ];
   };
 }
diff --git a/users/ai-worker.nix b/users/ai-worker.nix
index d7df7c0..48b51de 100644
--- a/users/ai-worker.nix
+++ b/users/ai-worker.nix
@@ -14,12 +14,12 @@
   };
   users.groups.ai-worker = {};
 
-  # Enable restricted AI worker access with deployment capabilities
+  # Enable restricted AI worker SSH access for ollama benchmarking
   # SECURITY: ai-worker can only:
-  #   - Access /home/ai-worker/infra (bind-mounted to /home/gortium/infra)
-  #   - Run: nh, nixos-rebuild, nixpkgs-fmt, nix (via sudo, no password)
-  #   - Manage docker containers (via docker group)
-  #   - All changes to infra/ are logged via audit subsystem
-  # WORKFLOW: AI must ask for user confirmation before running nh os switch
+  #   - SSH into host from Hermes container
+  #   - Run docker commands (docker exec ollama ...) via docker group
+  #   - NO access to infra repo (no bind mount)
+  #   - NO sudo access (no nh, nixos-rebuild, nixpkgs-fmt, nix)
+  # WORKFLOW: SSH from Hermes container, run docker benchmarks, return and save results to /opt/data/ai-optimizer/
   services.aiWorkerAccess = true;
 }

From ff7303cf6ae8ccba95635342254fd3982fe8f03d Mon Sep 17 00:00:00 2001
From: Hermes <hermes@lazyworkhorse.net>
Date: Sat, 9 May 2026 20:13:08 +0000
Subject: [PATCH 3/7] feat: add ai-optimizer benchmark plan and state tracking
 for ollama GPU benchmarking

---
 assets/ai-optimizer/README.md   | 194 ++++++++++++++++++++++++++++++++
 assets/ai-optimizer/results.csv |   1 +
 assets/ai-optimizer/state.json  |  21 ++++
 3 files changed, 216 insertions(+)
 create mode 100644 assets/ai-optimizer/README.md
 create mode 100644 assets/ai-optimizer/results.csv
 create mode 100644 assets/ai-optimizer/state.json

diff --git a/assets/ai-optimizer/README.md b/assets/ai-optimizer/README.md
new file mode 100644
index 0000000..cde9392
--- /dev/null
+++ b/assets/ai-optimizer/README.md
@@ -0,0 +1,194 @@
+# AI Model Optimizer - Ollama GPU Benchmark Plan
+
+**Purpose:** Find optimal ollama configurations for maximum context size and GPU utilization on AMD MI50 GPUs.
+
+**Hardware:**
+- 2x AMD MI50 GPUs (32GB VRAM each, 64GB total)
+- 128GB system RAM
+- ROCm: `HSA_OVERRIDE_GFX_VERSION=9.0.6`, `HIP_VISIBLE_DEVICES=0,1`
+
+---
+
+## File Locations
+
+```
+STATE:   /opt/data/infra/assets/ai-optimizer/state.json
+RESULTS: /opt/data/infra/assets/ai-optimizer/results.csv
+REPO:    /opt/data/infra (persistent clone)
+```
+
+---
+
+## Model Queues
+
+### GPU Track (Coding - prioritize speed + context on GPU)
+1. `deepseek-coder-v2:16b` - Best coding model, fits on GPU
+2. `qwen2.5-coder:32b` - Alternative coding model
+3. `codellama:34b-instruct` - Legacy option
+
+### RAM Track (Knowledge - prioritize max context)
+1. `qwen2.5:72b` - Large knowledge model
+2. `nemotron-3-nano:30b` - Efficient large model
+3. `mixtral:8x7b-instruct` - MoE architecture
+
+---
+
+## Context Steps (in order)
+
+```
+[32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680]
+```
+
+---
+
+## Optimization Strategy
+
+### GPU Track (Coding)
+- Start: `num_ctx=32768`, `num_gpu=99`, `flash_attn=true`
+- Increase context until OOM or tokens/sec < 5
+- Record best config before hitting wall
+- Target: >10 tokens/sec with max context
+
+### RAM Track (Knowledge)
+- Start: `num_ctx=65536`, `num_gpu=50`, `flash_attn=true`
+- Allow heavy RAM offload (up to 100GB system RAM)
+- Increase context until OOM
+- Speed secondary to context size
+
+---
+
+## Prerequisites
+
+This PR adds the `ai-worker` user with docker group access. After merge:
+
+```bash
+# SSH from Hermes container to run benchmarks on the host
+ssh -i /path/to/key ai-worker@host docker exec ollama ollama list
+
+# Or if running directly on host
+docker exec ollama ollama list
+```
+
+---
+
+## Manual Testing Workflow
+
+### 1. Quick Model Test
+
+```bash
+docker exec ollama ollama run <model>:<tag> "Your prompt here"
+```
+
+### 2. Check Current State
+
+```bash
+cd /opt/data/infra
+cat assets/ai-optimizer/state.json
+```
+
+### 3. Pull Model (if needed)
+
+```bash
+docker exec ollama ollama pull <model>:<tag>
+```
+
+### 4. Create Test Modelfile
+
+```bash
+docker exec ollama bash -c "cat <<EOF > /root/.ollama/test_${model}.modelfile
+FROM ${model}
+PARAMETER num_ctx ${num_ctx}
+PARAMETER num_gpu ${num_gpu}
+PARAMETER flash_attn true
+PARAMETER num_predict 4096
+PARAMETER num_keep 1024
+PARAMETER repeat_penalty 1.1
+EOF"
+
+docker exec ollama ollama create test-model -f /root/.ollama/test_${model}.modelfile
+```
+
+### 5. Run Benchmark
+
+```bash
+# Warm up
+docker exec ollama ollama run test-model "Hello" > /dev/null
+
+# Coding prompt
+docker exec ollama ollama run test-model "Write a Python async context manager that retries a function with exponential backoff, max 5 retries, and logs each attempt using structlog. Include type hints."
+
+# Knowledge prompt
+docker exec ollama ollama run test-model "Explain the complete memory hierarchy in modern GPUs, from registers through L1/L2 caches to VRAM, and how data moves between them during matrix multiplication."
+```
+
+### 6. Measure VRAM
+
+```bash
+# Try host first
+rocm-smi --showmeminfo vram 2>/dev/null || \
+# Try via docker
+docker exec --privileged ollama rocm-smi --showmeminfo vram 2>/dev/null || \
+echo "VRAM unavailable"
+```
+
+### 7. Record Results
+
+Update `state.json` and append to `results.csv`:
+- tokens/sec from ollama output
+- VRAM/RAM usage
+- Whether this config is the new best
+
+### 8. Commit Changes
+
+```bash
+cd /opt/data/infra
+git add assets/ai-optimizer/
+git commit -m "ai-optimizer: tested ${model} at ${num_ctx} ctx - ${status}"
+git push
+```
+
+---
+
+## State File Structure
+
+```json
+{
+  "track": "gpu",
+  "current_model": "deepseek-coder-v2:16b",
+  "model_index": 0,
+  "phase": "context_scaling",
+  "backend": "ollama",
+  "current_config": {
+    "num_ctx": 32768,
+    "num_gpu": 99,
+    "flash_attn": true
+  },
+  "best_configs": {
+    "gpu": {},
+    "ram": {}
+  },
+  "completed_models": [],
+  "gpu_queue": ["deepseek-coder-v2:16b", "qwen2.5-coder:32b", "codellama:34b-instruct"],
+  "ram_queue": ["qwen2.5:72b", "nemotron-3-nano:30b", "mixtral:8x7b-instruct"],
+  "context_steps": [32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680],
+  "last_updated": "2026-04-30T00:00:00Z"
+}
+```
+
+---
+
+## Results CSV Format
+
+```csv
+timestamp,track,model,backend,phase,num_ctx,num_gpu,flash_attn,tokens_per_sec,vram_gb,ram_gb,status,is_best
+```
+
+---
+
+## Notes
+
+- **Manual execution** - Run benchmarks when needed, no automated cron job
+- **Two tracks**: Complete GPU track first (coding models), then RAM track
+- **Backend**: ollama (llama.cpp optional for advanced users)
+- **Host access**: Use docker exec (or SSH via ai-worker) for rocm-smi
+- **Commit results**: Push best configs to repo for reference
diff --git a/assets/ai-optimizer/results.csv b/assets/ai-optimizer/results.csv
new file mode 100644
index 0000000..7e25194
--- /dev/null
+++ b/assets/ai-optimizer/results.csv
@@ -0,0 +1 @@
+timestamp,track,model,backend,phase,num_ctx,num_gpu,flash_attn,tokens_per_sec,vram_gb,ram_gb,status,is_best
diff --git a/assets/ai-optimizer/state.json b/assets/ai-optimizer/state.json
new file mode 100644
index 0000000..08dac90
--- /dev/null
+++ b/assets/ai-optimizer/state.json
@@ -0,0 +1,21 @@
+{
+  "track": "gpu",
+  "current_model": "deepseek-coder-v2:16b",
+  "model_index": 0,
+  "phase": "context_scaling",
+  "backend": "ollama",
+  "current_config": {
+    "num_ctx": 32768,
+    "num_gpu": 99,
+    "flash_attn": true
+  },
+  "best_configs": {
+    "gpu": {},
+    "ram": {}
+  },
+  "completed_models": [],
+  "gpu_queue": ["deepseek-coder-v2:16b", "qwen2.5-coder:32b", "codellama:34b-instruct"],
+  "ram_queue": ["qwen2.5:72b", "nemotron-3-nano:30b", "mixtral:8x7b-instruct"],
+  "context_steps": [32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680],
+  "last_updated": "2026-05-09T00:00:00Z"
+}

From 96e77c5ef2def7eac6525a92ff97a2262f2828d1 Mon Sep 17 00:00:00 2001
From: Hermes <hermes@lazyworkhorse.net>
Date: Sat, 9 May 2026 20:19:26 +0000
Subject: [PATCH 4/7] Revert "feat: add ai-optimizer benchmark plan and state
 tracking for ollama GPU benchmarking"

This reverts commit ff7303cf6ae8ccba95635342254fd3982fe8f03d.
---
 assets/ai-optimizer/README.md   | 194 --------------------------------
 assets/ai-optimizer/results.csv |   1 -
 assets/ai-optimizer/state.json  |  21 ----
 3 files changed, 216 deletions(-)
 delete mode 100644 assets/ai-optimizer/README.md
 delete mode 100644 assets/ai-optimizer/results.csv
 delete mode 100644 assets/ai-optimizer/state.json

diff --git a/assets/ai-optimizer/README.md b/assets/ai-optimizer/README.md
deleted file mode 100644
index cde9392..0000000
--- a/assets/ai-optimizer/README.md
+++ /dev/null
@@ -1,194 +0,0 @@
-# AI Model Optimizer - Ollama GPU Benchmark Plan
-
-**Purpose:** Find optimal ollama configurations for maximum context size and GPU utilization on AMD MI50 GPUs.
-
-**Hardware:**
-- 2x AMD MI50 GPUs (32GB VRAM each, 64GB total)
-- 128GB system RAM
-- ROCm: `HSA_OVERRIDE_GFX_VERSION=9.0.6`, `HIP_VISIBLE_DEVICES=0,1`
-
----
-
-## File Locations
-
-```
-STATE:   /opt/data/infra/assets/ai-optimizer/state.json
-RESULTS: /opt/data/infra/assets/ai-optimizer/results.csv
-REPO:    /opt/data/infra (persistent clone)
-```
-
----
-
-## Model Queues
-
-### GPU Track (Coding - prioritize speed + context on GPU)
-1. `deepseek-coder-v2:16b` - Best coding model, fits on GPU
-2. `qwen2.5-coder:32b` - Alternative coding model
-3. `codellama:34b-instruct` - Legacy option
-
-### RAM Track (Knowledge - prioritize max context)
-1. `qwen2.5:72b` - Large knowledge model
-2. `nemotron-3-nano:30b` - Efficient large model
-3. `mixtral:8x7b-instruct` - MoE architecture
-
----
-
-## Context Steps (in order)
-
-```
-[32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680]
-```
-
----
-
-## Optimization Strategy
-
-### GPU Track (Coding)
-- Start: `num_ctx=32768`, `num_gpu=99`, `flash_attn=true`
-- Increase context until OOM or tokens/sec < 5
-- Record best config before hitting wall
-- Target: >10 tokens/sec with max context
-
-### RAM Track (Knowledge)
-- Start: `num_ctx=65536`, `num_gpu=50`, `flash_attn=true`
-- Allow heavy RAM offload (up to 100GB system RAM)
-- Increase context until OOM
-- Speed secondary to context size
-
----
-
-## Prerequisites
-
-This PR adds the `ai-worker` user with docker group access. After merge:
-
-```bash
-# SSH from Hermes container to run benchmarks on the host
-ssh -i /path/to/key ai-worker@host docker exec ollama ollama list
-
-# Or if running directly on host
-docker exec ollama ollama list
-```
-
----
-
-## Manual Testing Workflow
-
-### 1. Quick Model Test
-
-```bash
-docker exec ollama ollama run <model>:<tag> "Your prompt here"
-```
-
-### 2. Check Current State
-
-```bash
-cd /opt/data/infra
-cat assets/ai-optimizer/state.json
-```
-
-### 3. Pull Model (if needed)
-
-```bash
-docker exec ollama ollama pull <model>:<tag>
-```
-
-### 4. Create Test Modelfile
-
-```bash
-docker exec ollama bash -c "cat <<EOF > /root/.ollama/test_${model}.modelfile
-FROM ${model}
-PARAMETER num_ctx ${num_ctx}
-PARAMETER num_gpu ${num_gpu}
-PARAMETER flash_attn true
-PARAMETER num_predict 4096
-PARAMETER num_keep 1024
-PARAMETER repeat_penalty 1.1
-EOF"
-
-docker exec ollama ollama create test-model -f /root/.ollama/test_${model}.modelfile
-```
-
-### 5. Run Benchmark
-
-```bash
-# Warm up
-docker exec ollama ollama run test-model "Hello" > /dev/null
-
-# Coding prompt
-docker exec ollama ollama run test-model "Write a Python async context manager that retries a function with exponential backoff, max 5 retries, and logs each attempt using structlog. Include type hints."
-
-# Knowledge prompt
-docker exec ollama ollama run test-model "Explain the complete memory hierarchy in modern GPUs, from registers through L1/L2 caches to VRAM, and how data moves between them during matrix multiplication."
-```
-
-### 6. Measure VRAM
-
-```bash
-# Try host first
-rocm-smi --showmeminfo vram 2>/dev/null || \
-# Try via docker
-docker exec --privileged ollama rocm-smi --showmeminfo vram 2>/dev/null || \
-echo "VRAM unavailable"
-```
-
-### 7. Record Results
-
-Update `state.json` and append to `results.csv`:
-- tokens/sec from ollama output
-- VRAM/RAM usage
-- Whether this config is the new best
-
-### 8. Commit Changes
-
-```bash
-cd /opt/data/infra
-git add assets/ai-optimizer/
-git commit -m "ai-optimizer: tested ${model} at ${num_ctx} ctx - ${status}"
-git push
-```
-
----
-
-## State File Structure
-
-```json
-{
-  "track": "gpu",
-  "current_model": "deepseek-coder-v2:16b",
-  "model_index": 0,
-  "phase": "context_scaling",
-  "backend": "ollama",
-  "current_config": {
-    "num_ctx": 32768,
-    "num_gpu": 99,
-    "flash_attn": true
-  },
-  "best_configs": {
-    "gpu": {},
-    "ram": {}
-  },
-  "completed_models": [],
-  "gpu_queue": ["deepseek-coder-v2:16b", "qwen2.5-coder:32b", "codellama:34b-instruct"],
-  "ram_queue": ["qwen2.5:72b", "nemotron-3-nano:30b", "mixtral:8x7b-instruct"],
-  "context_steps": [32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680],
-  "last_updated": "2026-04-30T00:00:00Z"
-}
-```
-
----
-
-## Results CSV Format
-
-```csv
-timestamp,track,model,backend,phase,num_ctx,num_gpu,flash_attn,tokens_per_sec,vram_gb,ram_gb,status,is_best
-```
-
----
-
-## Notes
-
-- **Manual execution** - Run benchmarks when needed, no automated cron job
-- **Two tracks**: Complete GPU track first (coding models), then RAM track
-- **Backend**: ollama (llama.cpp optional for advanced users)
-- **Host access**: Use docker exec (or SSH via ai-worker) for rocm-smi
-- **Commit results**: Push best configs to repo for reference
diff --git a/assets/ai-optimizer/results.csv b/assets/ai-optimizer/results.csv
deleted file mode 100644
index 7e25194..0000000
--- a/assets/ai-optimizer/results.csv
+++ /dev/null
@@ -1 +0,0 @@
-timestamp,track,model,backend,phase,num_ctx,num_gpu,flash_attn,tokens_per_sec,vram_gb,ram_gb,status,is_best
diff --git a/assets/ai-optimizer/state.json b/assets/ai-optimizer/state.json
deleted file mode 100644
index 08dac90..0000000
--- a/assets/ai-optimizer/state.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "track": "gpu",
-  "current_model": "deepseek-coder-v2:16b",
-  "model_index": 0,
-  "phase": "context_scaling",
-  "backend": "ollama",
-  "current_config": {
-    "num_ctx": 32768,
-    "num_gpu": 99,
-    "flash_attn": true
-  },
-  "best_configs": {
-    "gpu": {},
-    "ram": {}
-  },
-  "completed_models": [],
-  "gpu_queue": ["deepseek-coder-v2:16b", "qwen2.5-coder:32b", "codellama:34b-instruct"],
-  "ram_queue": ["qwen2.5:72b", "nemotron-3-nano:30b", "mixtral:8x7b-instruct"],
-  "context_steps": [32768, 65536, 98304, 131072, 163840, 200704, 262144, 327680],
-  "last_updated": "2026-05-09T00:00:00Z"
-}

From 6806898f04ccfbff8749a48b75bfea08053a1d8c Mon Sep 17 00:00:00 2001
From: Hermes <hermes@lazyworkhorse.net>
Date: Sun, 10 May 2026 10:12:34 -0400
Subject: [PATCH 5/7] feat: update compose submodule for ollama-gfx906
 (v0.23.2) + add ollama Dockerfile

---
 assets/compose           |   2 +-
 assets/ollama/Dockerfile | 106 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 assets/ollama/Dockerfile

diff --git a/assets/compose b/assets/compose
index a79fe9d..6b82a26 160000
--- a/assets/compose
+++ b/assets/compose
@@ -1 +1 @@
-Subproject commit a79fe9dffacebae6d4ee17502885e9cdfa852073
+Subproject commit 6b82a26c25f1592a2d1c9bea4f941864362fe001
diff --git a/assets/ollama/Dockerfile b/assets/ollama/Dockerfile
new file mode 100644
index 0000000..438e607
--- /dev/null
+++ b/assets/ollama/Dockerfile
@@ -0,0 +1,106 @@
+# ollama-gfx906/Dockerfile
+#
+# Custom ollama image with ROCm 6.1 + gfx906 (MI50) support.
+# The official ollama/rocm image ships ROCm 7.2 which dropped gfx906.
+# This uses v0.23.2's native CMake build system with AMDGPU_TARGETS including gfx906.
+#
+# Build: docker build -t ollama/ollama:rocm-gfx906 ai/ollama
+
+FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder
+
+# Build dependencies (CMake, Ninja, Go)
+ARG CMAKEVERSION=3.31.2
+ARG NINJAVERSION=1.12.1
+ARG GOLANG_VERSION=1.22.0
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    curl git ccache build-essential pkg-config unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install CMake from official binaries
+RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-x86_64.tar.gz \
+    | tar xz -C /usr/local --strip-components 1
+
+# Install Ninja
+RUN curl -fsSL -o /tmp/ninja.zip \
+    https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux.zip \
+    && unzip /tmp/ninja.zip -d /usr/local/bin && rm /tmp/ninja.zip
+
+# Install Go
+RUN curl -fsSL https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz \
+    | tar xz -C /usr/local
+ENV PATH=/usr/local/go/bin:$PATH
+
+ARG OLLAMA_VERSION=v0.23.2
+RUN git clone --depth 1 --branch ${OLLAMA_VERSION} https://github.com/ollama/ollama.git /build
+WORKDIR /build
+
+# ROCm paths
+ENV HIP_PATH=/opt/rocm
+ENV ROCM_PATH=/opt/rocm
+ENV CMAKE_GENERATOR=Ninja
+ENV LDFLAGS=-s
+
+# Step 1: Build CPU backends with GCC (no ROCm preset)
+# Pre-set CMAKE_HIP_COMPILER="" to prevent check_language(HIP) from
+# finding a HIP compiler (it searches /opt/rocm even without PATH).
+# Remove /opt/rocm from PATH to prevent find_program from finding hipcc.
+RUN mkdir -p build-cpu && \
+    PATH=/usr/local/go/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
+    cmake -B build-cpu -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_HIP_COMPILER="" \
+      -DCMAKE_INSTALL_PREFIX=/build/dist && \
+    cmake --build build-cpu --target ggml-cpu -- -l $(nproc) && \
+    cmake --install build-cpu --component CPU --strip && \
+    echo "=== CPU install ===" && \
+    (find /build/dist/lib/ollama -type f -o -type l 2>&1 | head -20 || echo "empty")
+
+# Step 2: Build HIP backend with ROCm preset + gfx906 target only
+# The ROCm 6 preset enables HIP language detection (enable_language(HIP))
+# which ensures GPU kernels are properly compiled for gfx906.
+# OLLAMA_RUNNER_DIR=rocm from the preset, so HIP goes to lib/ollama/rocm/
+# Need CMAKE_PREFIX_PATH so find_package(hip) finds hip-config.cmake
+# at /opt/rocm/lib/cmake/hip/hip-config.cmake.
+RUN mkdir -p build-hip && \
+    cmake -B build-hip \
+      --preset 'ROCm 6' \
+      -DAMDGPU_TARGETS="gfx906:xnack-" \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_PREFIX_PATH="/opt/rocm" && \
+    cmake --build build-hip --target ggml-hip -- -l $(nproc) && \
+    cmake --install build-hip --component HIP --strip && \
+    echo "=== HIP install ===" && \
+    find /build/dist/lib/ollama -type f -o -type l | head -20
+
+# Step 3: Build Go binary (GCC for CGo linking)
+ENV CGO_ENABLED=1
+RUN go build -trimpath -ldflags="-X=github.com/ollama/ollama/version.Version=${OLLAMA_VERSION}" -o /build/dist/ollama .
+
+# ---------- Runtime image ----------
+FROM ubuntu:24.04
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    ca-certificates curl libstdc++6 libgomp1 libvulkan1 libopenblas0 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy ROCm 6.1 runtime libraries
+# These are needed at runtime by ggml-hip via LD_LIBRARY_PATH
+COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/
+COPY --from=builder /opt/rocm/share/ /opt/rocm/share/
+
+# Copy ollama binary + all backends (CPU + HIP)
+# CPU install:  /build/dist/lib/ollama/libggml-*.so
+# HIP install:  /build/dist/lib/ollama/rocm/libggml-hip.so
+COPY --from=builder /build/dist/ollama /usr/bin/ollama
+COPY --from=builder /build/dist/lib/ollama/ /usr/lib/ollama/
+
+RUN ldconfig
+
+ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama/rocm:/usr/lib/ollama
+ENV HSA_OVERRIDE_GFX_VERSION=9.0.6
+ENV HCC_AMDGPU_TARGET=gfx906
+ENV HSA_ENABLE_SDMA=0
+
+EXPOSE 11434
+ENTRYPOINT ["/bin/ollama"]
+CMD ["serve"]

From c07debf088d030453ccbc70c51ee6d3ac9fcec7a Mon Sep 17 00:00:00 2001
From: Robert <robert@lazyworkhorse.net>
Date: Sun, 10 May 2026 16:51:32 -0400
Subject: [PATCH 6/7] Added wireguard keys

---
 secrets/wireguard_preshared_key.age |  9 +++++++++
 secrets/wireguard_private_key.age   | 11 +++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 secrets/wireguard_preshared_key.age
 create mode 100644 secrets/wireguard_private_key.age

diff --git a/secrets/wireguard_preshared_key.age b/secrets/wireguard_preshared_key.age
new file mode 100644
index 0000000..6149647
--- /dev/null
+++ b/secrets/wireguard_preshared_key.age
@@ -0,0 +1,9 @@
+-----BEGIN AGE ENCRYPTED FILE-----
+YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSA3VG9Z
+MVFPVFc2VVJ3d0h0dmtBUnI3WHl2SzUxTkRZbjFCaGloWmV3dnd3ClcxdnVPeGd6
+SU4zR0Q0K1dtVjRRVHd0VW5XSFI0dVFpTjZnYk1DNjRxTVEKLT4gQzlgRy1ncmVh
+c2UKeUozOWgyUytSTVF0NjY2STBEb2VadwotLS0gblI3bmJCUWxxU3QrYTEyVFBI
+Snc4NC9rTkh0NnZYbUtxUE9hRWRkelpmMAq58fmH6cK13GeD7wGLxKmx10hmJeW4
+b7KqnCD1ZP7uG85s32xzVRwRG8RrG4xZo5nR9Mrtg1CoTSFfUGeFnf5xveN+Ej0X
+wDVB1LwC+Q==
+-----END AGE ENCRYPTED FILE-----
diff --git a/secrets/wireguard_private_key.age b/secrets/wireguard_private_key.age
new file mode 100644
index 0000000..09d0213
--- /dev/null
+++ b/secrets/wireguard_private_key.age
@@ -0,0 +1,11 @@
+-----BEGIN AGE ENCRYPTED FILE-----
+YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IHNzaC1lZDI1NTE5IEdoTUQ4QSA5dzVG
+WUNvT3NlRmcrWS81bzJqSWlTekVYaDFFTE10SkI2dEgzaGpxcUI4Cmk5Y0FGYTRZ
+K0NGYzY3VUp4aS9ZZGRmWTgybDJFUURva2pZNmVOS3QxdEUKLT4gPnVRTCtldGMt
+Z3JlYXNlCk04OTJZeFRNeDI5aGpMVTk1ZTE0Y2FMMnFEMjlJalJpMHRlaTE4ZWIx
+d2lCRGQ5RHVjcktOMGJCb1VERlNWcTYKaSt0L1Z6dVJ0QWIyZkhsYzFEVjZSQWUr
+ZWpwVlo1TmhoUFJZdkEvR0gxNlVhcXF2ZTRnCi0tLSBLcmM2MThNVkdWclpHUXRr
+VTF6QVk2WUZlTXpZMVNLMlpBOFc3M1o5WjZzCs9xbPlIX+u5vRSQ/z9utu+I9S2c
+02DOsIb1kzxzb1OK91b8Kh4JucQSq3qkyEvRucsNn5QW8hIHDnRuND6EbPyN7p4S
+YB/F0dxSqgnq
+-----END AGE ENCRYPTED FILE-----

From f722af7803c96eb655c9d4999fcb672243e54333 Mon Sep 17 00:00:00 2001
From: Robert <robert@lazyworkhorse.net>
Date: Sun, 10 May 2026 16:56:09 -0400
Subject: [PATCH 7/7] New ollama model creator module version

---
 .../services/ollama_init_custom_models.nix    | 96 +++++++++++--------
 1 file changed, 58 insertions(+), 38 deletions(-)

diff --git a/modules/nixos/services/ollama_init_custom_models.nix b/modules/nixos/services/ollama_init_custom_models.nix
index 4dc965d..aa060cc 100644
--- a/modules/nixos/services/ollama_init_custom_models.nix
+++ b/modules/nixos/services/ollama_init_custom_models.nix
@@ -1,67 +1,87 @@
 { pkgs, ... }: {
   systemd.services.init-ollama-model = {
     description = "Initialize LLM models with extra context in Ollama Docker";
-    after = [ "docker-ollama.service" ];
+    
+    # On s'assure que Docker tourne avant de lancer ce script
+    after = [ "docker.service" ];
     wantedBy = [ "multi-user.target" ];
+    
     script = ''
-      # Wait for Ollama
-      while ! ${pkgs.curl}/bin/curl -s http://localhost:11434/api/tags > /dev/null; do
-        sleep 2
-      done
+      # Fonction de création asynchrone pour ne pas bloquer le démarrage
+      (
+        echo "Starting asynchronous Ollama initialization..."
+        
+        # Attente d'Ollama (maximum 120 secondes pour éviter une boucle infinie)
+        TIMEOUT=60
+        COUNT=0
+        while ! ${pkgs.curl}/bin/curl -s -f http://127.0.0.1:11434/api/tags > /dev/null; do
+          if [ $COUNT -ge $TIMEOUT ]; then
+            echo "Ollama did not become ready in time. Exiting."
+            exit 1
+          fi
+          echo "Waiting for Ollama API to be reachable..."
+          sleep 5
+          COUNT=$((COUNT + 5))
+        done
 
-      create_model_if_missing() {
-        local model_name=$1
-        local base_model=$2
-        if ! ${pkgs.docker}/bin/docker exec ollama ollama list | grep -q "$model_name"; then
-          echo "$model_name not found, creating from $base_model..."
+        create_model_if_missing() {
+          local model_name=$1
+          local base_model=$2
           
-          # We use a custom TEMPLATE block to strip the 'currentDate' function 
-          # which is unsupported in Ollama 0.5.7 but present in Devstral's default manifest.
-          ${pkgs.docker}/bin/docker exec ollama sh -c "cat <<EOF > /root/.ollama/$model_name.modelfile
+          # Vérification robuste via l'API HTTP d'Ollama plutôt que docker exec (évite les conflits de tty)
+          if ! ${pkgs.curl}/bin/curl -s http://127.0.0.1:11434/api/tags | ${pkgs.jq}/bin/jq -e ".models[] | select(.name == \"$model_name\")" > /dev/null; then
+            echo "$model_name not found, creating from $base_model..."
+            
+            # Utilisation d'un fichier temporaire sur l'hôte pour l'injecter proprement dans Docker
+            TMP_FILE=$(mktemp)
+            cat <<EOF > "$TMP_FILE"
 FROM $base_model
-TEMPLATE \"\"\"{{- if .System }}
+TEMPLATE """{{- if .System }}
 [SYSTEM_PROMPT]
 {{ .System }}
 [/SYSTEM_PROMPT]
 {{- end }}
 {{- range .Messages }}
-{{- if eq .Role \"user\" }}
+{{- if eq .Role "user" }}
 [INST]
 {{ .Content }}
 [/INST]
-{{- else if eq .Role \"assistant\" }}
+{{- else if eq .Role "assistant" }}
 {{ .Content }}
 {{- end }}
-{{- end }}\"\"\"
+{{- end }}"""
 PARAMETER num_ctx 131072
 PARAMETER num_predict 4096
 PARAMETER num_keep 1024
 PARAMETER repeat_penalty 1.1
 PARAMETER top_k 40
-PARAMETER stop \"[INST]\"
-PARAMETER stop \"[/INST]\"
-PARAMETER stop \"</s>\"
-EOF"
-          ${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f "/root/.ollama/$model_name.modelfile"
-          ${pkgs.docker}/bin/docker exec ollama rm "/root/.ollama/$model_name.modelfile"
-        else
-          echo "$model_name already exists, skipping."
-        fi
-      }
+PARAMETER stop "[INST]"
+PARAMETER stop "[/INST]"
+PARAMETER stop "</s>"
+EOF
 
-      # Create Nemotron
-      create_model_if_missing "nemotron-3-nano:30b-128k" "nemotron-3-nano:30b"
-      
-      # Create Devstral
-      create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b" 
-      
-      # create_model_if_missing "qwen2.5-coder:32b-128k" "qwen2.5-coder:32b"
-      
-      # create_model_if_missing "mistral-large-planner:123b" "mistral-large:123b-instruct-v2407-q4_K_S"
+            # Copie et création dans le conteneur
+            ${pkgs.docker}/bin/docker cp "$TMP_FILE" ollama:/tmp/model.modelfile
+            ${pkgs.docker}/bin/docker exec ollama ollama create "$model_name" -f /tmp/model.modelfile
+            ${pkgs.docker}/bin/docker exec ollama rm /tmp/model.modelfile
+            rm -f "$TMP_FILE"
+          else
+            echo "$model_name already exists, skipping."
+          fi
+        }
+
+        # Create Nemotron
+        create_model_if_missing "nemotron-3-nano:30b-128k" "nemotron-3-nano:30b"
+        
+        # Create Devstral
+        create_model_if_missing "devstral-small-2:24b-128k" "devstral-small-2:24b" 
+        
+      ) &
     '';
+
     serviceConfig = {
-      Type = "oneshot";
-      RemainAfterExit = true;
+      Type = "forking"; # Permet à systemd de savoir que le script passe en arrière-plan via '&'
+      User = "root";
     };
   };
 }