Compare commits
7 Commits
feat/herme
...
feat/herme
| Author | SHA1 | Date | |
|---|---|---|---|
| 3c92d93366 | |||
| d3f2e3b7b9 | |||
| 6a44120b1a | |||
| 38a1451689 | |||
| f9fb28d560 | |||
| bcc4b6d157 | |||
| 8d1ae7e632 |
31
AGENTS.md
31
AGENTS.md
@@ -1,31 +0,0 @@
|
||||
# AGENTS.md
|
||||
|
||||
Development conventions for the compose repository (Docker Compose stacks).
|
||||
|
||||
## Build & Deploy
|
||||
|
||||
- The ai stack is managed via `systemctl restart ai_stack.service` on the NixOS host
|
||||
- Compose files are built from the git repo; apply via `nh os switch` or `systemctl restart ai_stack.service`
|
||||
- Never `docker compose up -d` directly — it bypasses the systemd env file and breaks secrets
|
||||
|
||||
## Hermes Workers
|
||||
|
||||
- Paperclip Hermes workers are added via `ai/scripts/provision-hermes-worker.sh`
|
||||
- The script appends only — never deletes or modifies existing content
|
||||
- Workers are CPU-only containers on the `ai_backend` network with no GPU passthrough
|
||||
|
||||
## Workflow
|
||||
|
||||
- New feature → clean branch from `origin/master` → push → PR on Gitea
|
||||
- Branch naming: `feat/description` (features), `fix/description` (bugs/docs)
|
||||
- Always branch from `origin/master`, never from another feature branch
|
||||
- Submodule changes (when this repo is consumed as a submodule): commit the submodule update in the parent repo
|
||||
- PR title should describe the change; body should explain motivation + summary
|
||||
- After PR merge, delete the feature branch
|
||||
|
||||
## YAML Conventions
|
||||
|
||||
- Use `x-*` extension fields for reusable anchors
|
||||
- Comment out inactive services rather than deleting them
|
||||
- Environment variables in dictionary format (`KEY: value`) for anchor compatibility
|
||||
- List format (`- KEY=value`) works for standard services but doesn't merge with anchors
|
||||
172
ai/compose.yml
172
ai/compose.yml
@@ -1,35 +1,4 @@
|
||||
version: "3.8"
|
||||
|
||||
# ── Hermes Worker Template ──────────────────────────────────
|
||||
# Used by paperclip-worker-* Hermes containers via YAML anchor.
|
||||
# Each worker = one isolated Hermes agent for a Paperclip employee.
|
||||
# Override at service level: container_name, API_SERVER_PORT,
|
||||
# API_SERVER_KEY, DISCORD_BOT_TOKEN, volumes.
|
||||
# Workers have NO GPU — they use OpenCode Go or remote providers.
|
||||
x-hermes-worker: &hermes-worker
|
||||
build:
|
||||
context: ./hermes
|
||||
ssh:
|
||||
- default
|
||||
entrypoint: ["/bin/bash", "-c",
|
||||
"bash /opt/data/hermes-tools/install.sh && exec /usr/bin/tini -g -- /opt/hermes/docker/entrypoint.sh \"$@\"",
|
||||
"hermes-entrypoint"]
|
||||
command: gateway run
|
||||
restart: always
|
||||
environment:
|
||||
API_SERVER_ENABLED: "true"
|
||||
API_SERVER_HOST: "0.0.0.0"
|
||||
OLLAMA_HOST: "http://ollama:11434"
|
||||
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
|
||||
# Each worker needs its own OpenCode Go API key in .env
|
||||
OPENCODE_GO_API_KEY: ${OPENCODE_GO_API_KEY}
|
||||
GATEWAY_ALLOW_ALL_USERS: "true"
|
||||
TZ: "America/Montreal"
|
||||
networks:
|
||||
ai_backend:
|
||||
# NO devices — workers are CPU-only, no GPU passthrough
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
|
||||
services:
|
||||
|
||||
# webui:
|
||||
@@ -85,6 +54,10 @@ services:
|
||||
- TZ=America/Montreal
|
||||
volumes:
|
||||
- /mnt/HoardingCow_docker_data/Hermes/data:/opt/data
|
||||
# Syncthing-shared org files — read-only view of user's agenda
|
||||
- /mnt/HoardingCow_docker_data/Syncthing/telos-ro:/opt/data/telos-ro:ro
|
||||
# Syncthing-shared inbox — write tasks here, they sync to user's laptop
|
||||
- /mnt/HoardingCow_docker_data/Syncthing/telos-rw:/opt/data/telos-rw:rw
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
@@ -94,6 +67,35 @@ services:
|
||||
networks:
|
||||
- ai_backend
|
||||
|
||||
syncthing:
|
||||
image: syncthing/syncthing:latest
|
||||
container_name: syncthing
|
||||
hostname: syncthing
|
||||
restart: always
|
||||
ports:
|
||||
- "8384:8384"
|
||||
- "22000:22000"
|
||||
- "21027:21027/udp"
|
||||
environment:
|
||||
- TZ=America/Montreal
|
||||
volumes:
|
||||
- /mnt/HoardingCow_docker_data/Syncthing/config:/var/syncthing/config
|
||||
- /mnt/HoardingCow_docker_data/Syncthing/telos-ro:/telos-ro
|
||||
- /mnt/HoardingCow_docker_data/Syncthing/telos-rw:/telos-rw
|
||||
networks:
|
||||
- ai_backend
|
||||
- ai_net
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.routers.syncthing-http.rule=Host(`syncthing.lazyworkhorse.net`)"
|
||||
- "traefik.http.routers.syncthing-http.entrypoints=web"
|
||||
- "traefik.http.routers.syncthing-http.middlewares=redirect-to-https"
|
||||
- "traefik.http.routers.syncthing-https.rule=Host(`syncthing.lazyworkhorse.net`)"
|
||||
- "traefik.http.routers.syncthing-https.entrypoints=websecure"
|
||||
- "traefik.http.routers.syncthing-https.tls=true"
|
||||
- "traefik.http.routers.syncthing-https.tls.certresolver=njalla"
|
||||
- "traefik.http.services.syncthing.loadbalancer.server.port=8384"
|
||||
|
||||
ollama:
|
||||
build:
|
||||
context: ./ollama
|
||||
@@ -110,40 +112,7 @@ services:
|
||||
- /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama
|
||||
environment:
|
||||
- OLLAMA_VULKAN=0
|
||||
- HSA_OVERRIDE_GFX_VERSION=9.0.6
|
||||
- HCC_AMDGPU_TARGET=gfx906
|
||||
- HIP_VISIBLE_DEVICES=0,1
|
||||
- ROCR_VISIBLE_DEVICES=0,1
|
||||
- HSA_ENABLE_SDMA=0
|
||||
- OLLAMA_HOST=0.0.0.0
|
||||
- OLLAMA_DEBUG=1
|
||||
- OLLAMA_FLASH_ATTENTION=1
|
||||
- OLLAMA_NUM_PARALLEL=2
|
||||
devices:
|
||||
# Map the render nodes and KFD for ROCm to work inside the container
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
group_add:
|
||||
- "303"
|
||||
- "26"
|
||||
|
||||
# ── Paperclip Worker Hermes Agents ──────────────────────────
|
||||
# Each worker is an isolated Hermes agent for a Paperclip employee.
|
||||
# Add new workers with: ./scripts/provision-hermes-worker.sh <name> <discord-token>
|
||||
# The API server key and port are generated automatically.
|
||||
# Workers are CPU-only — they use OpenCode Go or remote providers.
|
||||
|
||||
# ── Worker Template (commented — uncomment + configure to activate) ──
|
||||
# hermes-worker-1:
|
||||
# <<: *hermes-worker
|
||||
# container_name: hermes-worker-1
|
||||
# environment:
|
||||
# API_SERVER_PORT: "8651"
|
||||
# API_SERVER_KEY: "generated-by-provision-script"
|
||||
# DISCORD_BOT_TOKEN: ${WORKER_1_DISCORD_BOT_TOKEN}
|
||||
# volumes:
|
||||
# - /mnt/HoardingCow_docker_data/Hermes/worker-1:/opt/data
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
|
||||
networks:
|
||||
ai_net:
|
||||
@@ -153,47 +122,40 @@ networks:
|
||||
driver: bridge
|
||||
name: ai_backend
|
||||
|
||||
# llama_cpp_devstral:
|
||||
# image: ghcr.io/ggml-org/llama.cpp:server-rocm
|
||||
# container_name: llama_cpp_devstral
|
||||
# restart: unless-stopped
|
||||
# networks:
|
||||
# - ai_backend
|
||||
# ports:
|
||||
# - "8300:8080"
|
||||
# ipc: host
|
||||
# devices:
|
||||
# - "/dev/kfd:/dev/kfd"
|
||||
# - "/dev/dri:/dev/dri"
|
||||
# group_add:
|
||||
# - "303" # video
|
||||
# - "26" # render
|
||||
# environment:
|
||||
# HSA_OVERRIDE_GFX_VERSION: 9.0.6
|
||||
# HIP_VISIBLE_DEVICES: 0,1
|
||||
# LLAMA_CACHE: /models
|
||||
# volumes:
|
||||
# - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models
|
||||
# - /mnt/HoardingCow_docker_data/Llama_cpp/devstral-agent.jinja:/template.jinja
|
||||
# command: >
|
||||
# -hf unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:Devstral-Small-2-24B-Instruct-2512-Q8_0.gguf
|
||||
# -a devstral-2-small-llama_cpp
|
||||
# --chat-template-file /template.jinja
|
||||
# --host 0.0.0.0
|
||||
# --port 8080
|
||||
# --n-gpu-layers 99
|
||||
# --ctx-size 163840
|
||||
# --batch-size 4096
|
||||
# --ubatch-size 4096
|
||||
# --cache-type-k f16
|
||||
# --cache-type-v f16
|
||||
# --cache-reuse 256
|
||||
# --flash-attn on
|
||||
# --context-shift
|
||||
# --split-mode layer
|
||||
# --no-mmap
|
||||
# --n-predict -1
|
||||
# --parallel 2
|
||||
llama-cpp-hermes:
|
||||
image: llama-cpp:rocm-gfx906
|
||||
container_name: llama-cpp-hermes
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- ai_backend
|
||||
ports:
|
||||
- "127.0.0.1:8300:8080"
|
||||
ipc: host
|
||||
devices:
|
||||
- /dev/kfd:/dev/kfd
|
||||
- /dev/dri:/dev/dri
|
||||
group_add:
|
||||
- "303"
|
||||
- "26"
|
||||
environment:
|
||||
- HSA_OVERRIDE_GFX_VERSION=9.0.6
|
||||
- HSA_ENABLE_SDMA=0
|
||||
- HIP_VISIBLE_DEVICES=0,1
|
||||
- LLAMA_CACHE=/models
|
||||
volumes:
|
||||
- /mnt/HoardingCow_docker_data/Llama_cpp/models:/models
|
||||
- /mnt/HoardingCow_docker_data/Ollama/ollama/models/blobs/sha256-17823599694fa3503ef54bf748d5078c6ce881f4d01616cafa255dc05d215a08:/model.gguf:ro
|
||||
command: >
|
||||
-m /model.gguf
|
||||
--host 0.0.0.0
|
||||
--port 8080
|
||||
--gpu-layers 99
|
||||
--ctx-size 163840
|
||||
-ctk f16 -ctv f16
|
||||
--flash-attn on
|
||||
--split-mode layer
|
||||
--no-mmap
|
||||
--n-predict -1
|
||||
|
||||
# vllm:
|
||||
# image: nalanzeyu/vllm-gfx906:v0.9.0-rocm6.3
|
||||
|
||||
30
ai/llama-cpp/Dockerfile
Normal file
30
ai/llama-cpp/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
# llama-cpp-rocm6/Dockerfile
|
||||
# Custom llama.cpp server with ROCm 6.1 + gfx906 (MI50) support.
|
||||
# Build: docker build -t llama-cpp:rocm-gfx906 .
|
||||
|
||||
FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y curl git build-essential pkg-config cmake make && rm -rf /var/lib/apt/lists/*
|
||||
ARG LLAMACPP_VERSION=b9596
|
||||
RUN git clone --depth 1 --branch ${LLAMACPP_VERSION} https://github.com/ggml-org/llama.cpp.git /build
|
||||
WORKDIR /build
|
||||
ENV HIP_PATH=/opt/rocm ROCM_PATH=/opt/rocm PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:${PATH} CMAKE_PREFIX_PATH=/opt/rocm
|
||||
RUN mkdir build && cd build && \
|
||||
cmake .. -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release \
|
||||
-DAMDGPU_TARGETS="gfx906:xnack-" \
|
||||
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
||||
-DGGML_CUDA=OFF -DGGML_VULKAN=OFF -DGGML_METAL=OFF \
|
||||
-DBUILD_SHARED_LIBS=OFF && \
|
||||
cmake --build . --target llama-server -- -j $(nproc)
|
||||
|
||||
FROM ubuntu:24.04
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||
ca-certificates curl libstdc++6 libgomp1 libopenblas0 \
|
||||
libnuma1 libelf1 libdrm2 libdrm-amdgpu1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/
|
||||
COPY --from=builder /opt/rocm/share/ /opt/rocm/share/
|
||||
COPY --from=builder /build/build/bin/llama-server /usr/local/bin/llama-server
|
||||
RUN echo /opt/rocm/lib > /etc/ld.so.conf.d/rocm.conf && ldconfig
|
||||
ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 HCC_AMDGPU_TARGET=gfx906 HSA_ENABLE_SDMA=0
|
||||
EXPOSE 8080
|
||||
ENTRYPOINT ["/usr/local/bin/llama-server"]
|
||||
@@ -1,135 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# ── Hermes Worker Provisioner ──────────────────────────────
|
||||
# Adds a new Paperclip Hermes worker to the ai compose stack.
|
||||
#
|
||||
# Usage:
|
||||
# ./provision-hermes-worker.sh <name> <discord_bot_token_var>
|
||||
#
|
||||
# Example:
|
||||
# ./provision-hermes-worker.sh worker-1 WORKER_1_DISCORD_BOT_TOKEN
|
||||
#
|
||||
# The script APPENDS only — never modifies or removes existing
|
||||
# content, even commented lines.
|
||||
#
|
||||
# Post-provision steps (manual):
|
||||
# 1. Add secrets to agenix .env file
|
||||
# 2. systemctl restart ai_stack.service
|
||||
# 3. Configure Paperclip agent
|
||||
# ─────────────────────────────────────────────────────────────
|
||||
|
||||
NAME="${1:?Usage: $0 <name> <discord_bot_token_var>}"
|
||||
TOKEN_VAR="${2:?Usage: $0 <name> <discord_bot_token_var>}"
|
||||
|
||||
# ── Paths ───────────────────────────────────────────────────
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
COMPOSE_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
COMPOSE_FILE="${COMPOSE_DIR}/compose.yml"
|
||||
|
||||
# Each Hermes worker gets its own volume on the NFS HoardingCow
|
||||
VOLUME_BASE="/mnt/HoardingCow_docker_data/Hermes"
|
||||
VOLUME_DIR="${VOLUME_BASE}/${NAME}"
|
||||
|
||||
# The Hermes container runs as UID 10000 (hermes user from Dockerfile)
|
||||
HERMES_UID=10000
|
||||
|
||||
# ── Validation ──────────────────────────────────────────────
|
||||
if ! [ -f "$COMPOSE_FILE" ]; then
|
||||
echo "❌ compose.yml not found at $COMPOSE_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if grep -q "^ ${NAME}:" "$COMPOSE_FILE"; then
|
||||
echo "❌ Service '${NAME}' already exists in ${COMPOSE_FILE}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Generate unique API key ─────────────────────────────────
|
||||
# Used by Paperclip to authenticate against this worker's
|
||||
# Hermes API server (/v1/chat/completions)
|
||||
API_KEY="pc_worker_$(openssl rand -hex 16)"
|
||||
|
||||
# ── Find next available API port ────────────────────────────
|
||||
# Workers get sequential ports starting at 8650.
|
||||
# Scans compose.yml for existing API_SERVER_PORT values and
|
||||
# picks the next one.
|
||||
BASE_PORT=8650
|
||||
MAX_PORT=0
|
||||
while IFS= read -r line; do
|
||||
port="${line#*API_SERVER_PORT: \"}"
|
||||
port="${port%%\"*}"
|
||||
if [ -n "$port" ] && [ "$port" -gt "$MAX_PORT" ]; then
|
||||
MAX_PORT="$port"
|
||||
fi
|
||||
done < <(grep -oP 'API_SERVER_PORT:\s*"\d+"' "$COMPOSE_FILE" 2>/dev/null)
|
||||
|
||||
NEW_PORT=$((MAX_PORT + 1))
|
||||
if [ "$NEW_PORT" -lt "$BASE_PORT" ]; then
|
||||
NEW_PORT=$BASE_PORT
|
||||
fi
|
||||
|
||||
# ── Create volume directory (on NFS) ────────────────────────
|
||||
echo "📁 Creating volume directory: ${VOLUME_DIR}"
|
||||
mkdir -p "$VOLUME_DIR"
|
||||
|
||||
# Hermes container runs as UID 10000 — set ownership so the
|
||||
# container can write its config, sessions, skills
|
||||
if command -v chown &>/dev/null; then
|
||||
chown -R "${HERMES_UID}:${HERMES_UID}" "$VOLUME_DIR" 2>/dev/null || \
|
||||
echo "⚠ Could not chown ${VOLUME_DIR} — run with sudo if needed"
|
||||
fi
|
||||
|
||||
# Make it group-readable for debugging
|
||||
chmod 755 "$VOLUME_DIR" 2>/dev/null || true
|
||||
|
||||
# ── Append service to compose.yml ───────────────────────────
|
||||
echo "📝 Appending service '${NAME}' to compose.yml ..."
|
||||
|
||||
TMPFILE=$(mktemp)
|
||||
|
||||
awk -v name="$NAME" \
|
||||
-v port="$NEW_PORT" \
|
||||
-v api_key="$API_KEY" \
|
||||
-v token_var="$TOKEN_VAR" \
|
||||
'
|
||||
# Insert new worker service block just before the networks: section
|
||||
/^networks:/ {
|
||||
print ""
|
||||
print " " name ":"
|
||||
print " <<: *hermes-worker"
|
||||
print " container_name: " name
|
||||
print " environment:"
|
||||
print " API_SERVER_PORT: \"" port "\""
|
||||
print " API_SERVER_KEY: \"" api_key "\""
|
||||
print " DISCORD_BOT_TOKEN: ${" token_var "}"
|
||||
print " volumes:"
|
||||
print " - /mnt/HoardingCow_docker_data/Hermes/" name ":/opt/data"
|
||||
print ""
|
||||
}
|
||||
{ print }
|
||||
' "$COMPOSE_FILE" > "$TMPFILE" && mv "$TMPFILE" "$COMPOSE_FILE"
|
||||
|
||||
# ── Done ────────────────────────────────────────────────────
|
||||
echo ""
|
||||
echo "✅ Worker '${NAME}' provisioned successfully"
|
||||
echo ""
|
||||
echo "────────────────────────────────────────────"
|
||||
echo " NEXT STEPS"
|
||||
echo "────────────────────────────────────────────"
|
||||
echo ""
|
||||
echo "1. Add secrets to the agenix .env stack file:"
|
||||
echo ""
|
||||
echo " # ${NAME}"
|
||||
echo " ${TOKEN_VAR}=<paste-discord-bot-token-here>"
|
||||
echo ""
|
||||
echo "2. Restart the AI stack:"
|
||||
echo ""
|
||||
echo " systemctl restart ai_stack.service"
|
||||
echo ""
|
||||
echo "3. In Paperclip, create an agent with HTTP adapter:"
|
||||
echo ""
|
||||
echo " Endpoint: http://${NAME}:${NEW_PORT}/v1/chat/completions"
|
||||
echo " API Key: ${API_KEY}"
|
||||
echo ""
|
||||
echo "────────────────────────────────────────────"
|
||||
@@ -8,13 +8,10 @@ services:
|
||||
- USER_GID=1000
|
||||
- GITEA__server__ROOT_URL=https://code.lazyworkhorse.net
|
||||
- GITEA__actions__ENABLED=true
|
||||
- GITEA__actions__DEFAULT_ACTIONS_URL=off
|
||||
- SSH_PORT=2222
|
||||
- SSH_LISTEN_PORT=2222
|
||||
# Enable Gitea Actions (act_runner required on host)
|
||||
- GITEA__actions__ENABLED=true
|
||||
# Don't fetch actions from GitHub (offline mode + local only)
|
||||
- GITEA__actions__DEFAULT_ACTIONS_URL=off
|
||||
volumes:
|
||||
- /mnt/HoardingCow_docker_data/Gitea:/data
|
||||
networks:
|
||||
|
||||
Reference in New Issue
Block a user