Compare commits

..

1 Commits

Author SHA1 Message Date
3c92d93366 feat: add llama-cpp-hermes service with ROCm 6.1 + gfx906 support
Some checks failed
Build Hermes agent / build (pull_request) Has been cancelled
Build ollama (gfx906) / build (pull_request) Has been cancelled
- Add custom llama.cpp Dockerfile with ROCm 6.1 + gfx906 (MI50) build
- Add llama-cpp-hermes service serving Hermes 4.3 on dual MI50 GPUs
- Strip GPU devices/ROCm env from ollama service (CPU-only for embeddings)

Hermes 4.3 runs at ~19 t/s on dual MI50s with 160K context.
2026-06-11 11:41:42 -04:00
5 changed files with 172 additions and 147 deletions

125
ai/compose.yml Executable file → Normal file
View File

@@ -28,22 +28,17 @@ services:
hermes: hermes:
build: build:
context: ./hermes context: ./hermes
args: ssh:
HERMES_PLUGIN_URLS: "git+https://code.lazyworkhorse.net/gortium/hermes-piper-plugin.git;git+https://code.lazyworkhorse.net/gortium/hermes-identity-plugin.git" - default
container_name: hermes container_name: hermes
entrypoint: ["/bin/bash", "-c", entrypoint: ["/bin/bash", "-c",
"bash /opt/data/hermes-tools/install.sh && bash /usr/local/bin/run-multi-gateways.sh && exec /usr/bin/tini -g -- /opt/hermes/docker/entrypoint.sh \"$@\"", "bash /opt/data/hermes-tools/install.sh && exec /usr/bin/tini -g -- /opt/hermes/docker/entrypoint.sh \"$@\"",
"hermes-entrypoint"] "hermes-entrypoint"]
restart: always restart: always
# Gateway run enables the internal API server on port 8642 # Gateway run enables the internal API server on port 8642
command: gateway run command: gateway run
environment: environment:
- OLLAMA_HOST=http://ollama:11434 - OLLAMA_HOST=http://ollama:11434
- HERMES_DASHBOARD=1
# Multi-profile: comma-separated list of profiles to run as gateways.
# The entrypoint reads this and starts one gateway per profile.
# Add profiles here when they exist on disk (e.g. default,researcher,writer)
- HERMES_PROFILES=ashley,claire,finn,matt,paul
- API_SERVER_ENABLED=true - API_SERVER_ENABLED=true
- API_SERVER_PORT=8642 - API_SERVER_PORT=8642
- API_SERVER_HOST=0.0.0.0 - API_SERVER_HOST=0.0.0.0
@@ -71,30 +66,6 @@ services:
- "26" - "26"
networks: networks:
- ai_backend - ai_backend
- ai_net
labels:
- "traefik.enable=true"
- "traefik.docker.network=ai_net"
# Router for HTTP + redirection to HTTPS
- "traefik.http.routers.hermes-web-http.rule=Host(`hermes.lazyworkhorse.net`)"
- "traefik.http.routers.hermes-web-http.entrypoints=web"
- "traefik.http.routers.hermes-web-http.middlewares=redirect-to-https"
# Router for HTTPS with TLS — protected by Authelia
- "traefik.http.routers.hermes-web-https.rule=Host(`hermes.lazyworkhorse.net`)"
- "traefik.http.routers.hermes-web-https.entrypoints=websecure"
- "traefik.http.routers.hermes-web-https.tls=true"
- "traefik.http.routers.hermes-web-https.tls.certresolver=njalla"
- "traefik.http.routers.hermes-web-https.middlewares=hermes-auth"
# Authelia forwardAuth
- "traefik.http.middlewares.hermes-auth.forwardauth.address=http://authelia:9091/api/verify?rd=https://auth.lazyworkhorse.net/"
- "traefik.http.middlewares.hermes-auth.forwardauth.trustforwardheader=true"
- "traefik.http.middlewares.hermes-auth.forwardauth.authresponseheaders=X-Forwarded-User,X-Forwarded-Groups"
# Service Loadbalancer (dashboard port 9119)
- "traefik.http.services.hermes-web.loadbalancer.server.port=9119"
syncthing: syncthing:
image: syncthing/syncthing:latest image: syncthing/syncthing:latest
@@ -141,22 +112,7 @@ services:
- /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama - /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama
environment: environment:
- OLLAMA_VULKAN=0 - OLLAMA_VULKAN=0
- HSA_OVERRIDE_GFX_VERSION=9.0.6
- HCC_AMDGPU_TARGET=gfx906
- HIP_VISIBLE_DEVICES=0,1
- ROCR_VISIBLE_DEVICES=0,1
- HSA_ENABLE_SDMA=0
- OLLAMA_HOST=0.0.0.0 - OLLAMA_HOST=0.0.0.0
- OLLAMA_DEBUG=1
- OLLAMA_FLASH_ATTENTION=1
- OLLAMA_NUM_PARALLEL=2
devices:
# Map the render nodes and KFD for ROCm to work inside the container
- /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri
group_add:
- "303"
- "26"
networks: networks:
ai_net: ai_net:
@@ -166,47 +122,40 @@ networks:
driver: bridge driver: bridge
name: ai_backend name: ai_backend
# llama_cpp_devstral: llama-cpp-hermes:
# image: ghcr.io/ggml-org/llama.cpp:server-rocm image: llama-cpp:rocm-gfx906
# container_name: llama_cpp_devstral container_name: llama-cpp-hermes
# restart: unless-stopped restart: unless-stopped
# networks: networks:
# - ai_backend - ai_backend
# ports: ports:
# - "8300:8080" - "127.0.0.1:8300:8080"
# ipc: host ipc: host
# devices: devices:
# - "/dev/kfd:/dev/kfd" - /dev/kfd:/dev/kfd
# - "/dev/dri:/dev/dri" - /dev/dri:/dev/dri
# group_add: group_add:
# - "303" # video - "303"
# - "26" # render - "26"
# environment: environment:
# HSA_OVERRIDE_GFX_VERSION: 9.0.6 - HSA_OVERRIDE_GFX_VERSION=9.0.6
# HIP_VISIBLE_DEVICES: 0,1 - HSA_ENABLE_SDMA=0
# LLAMA_CACHE: /models - HIP_VISIBLE_DEVICES=0,1
# volumes: - LLAMA_CACHE=/models
# - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models volumes:
# - /mnt/HoardingCow_docker_data/Llama_cpp/devstral-agent.jinja:/template.jinja - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models
# command: > - /mnt/HoardingCow_docker_data/Ollama/ollama/models/blobs/sha256-17823599694fa3503ef54bf748d5078c6ce881f4d01616cafa255dc05d215a08:/model.gguf:ro
# -hf unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:Devstral-Small-2-24B-Instruct-2512-Q8_0.gguf command: >
# -a devstral-2-small-llama_cpp -m /model.gguf
# --chat-template-file /template.jinja --host 0.0.0.0
# --host 0.0.0.0 --port 8080
# --port 8080 --gpu-layers 99
# --n-gpu-layers 99 --ctx-size 163840
# --ctx-size 163840 -ctk f16 -ctv f16
# --batch-size 4096 --flash-attn on
# --ubatch-size 4096 --split-mode layer
# --cache-type-k f16 --no-mmap
# --cache-type-v f16 --n-predict -1
# --cache-reuse 256
# --flash-attn on
# --context-shift
# --split-mode layer
# --no-mmap
# --n-predict -1
# --parallel 2
# vllm: # vllm:
# image: nalanzeyu/vllm-gfx906:v0.9.0-rocm6.3 # image: nalanzeyu/vllm-gfx906:v0.9.0-rocm6.3

View File

@@ -1,22 +1,45 @@
# syntax=docker/dockerfile:1 # syntax=docker/dockerfile:1
# Hermes Agent -- official image + custom plugins layered on top. # Hermes Agent -- custom fork build
# No fork needed — customizations are pip-installable plugins from Gitea. # Builds on top of official image + overlays our forked source from Gitea.
# Requires Docker BuildKit. Pass SSH agent for git clone:
# docker compose build hermes # docker compose build hermes
# Or manually: # Or manually:
# DOCKER_BUILDKIT=1 docker build --build-arg HERMES_PLUGIN_URLS="url1 url2" -t hermes-agent:custom . # DOCKER_BUILDKIT=1 docker build --ssh default -t hermes-agent:custom .
# ---------- Base: official Hermes image (system deps, npm, uv, Playwright) ---------- # ---------- Base: official Hermes image (system deps, npm, uv, Playwright) ----------
FROM nousresearch/hermes-agent:latest FROM nousresearch/hermes-agent:latest
# ---------- Plugin URLs (semicolon-separated, set via compose.yml build args) ---------- # ---------- Overlay our forked source ----------
ARG HERMES_PLUGIN_URLS="" # Uses SSH agent forwarding from the build host (no key baked into image).
# --exclude node_modules/.venv keeps the base image's pre-built layers intact.
# Only the Python source, web UI source, and config change.
RUN --mount=type=ssh \
mkdir -p /root/.ssh && \
ssh-keyscan -p 2222 code.lazyworkhorse.net >> /root/.ssh/known_hosts 2>/dev/null && \
cd /tmp && \
GIT_SSH_COMMAND='ssh -p 2222 -o StrictHostKeyChecking=no' \
git clone --depth 1 --branch main \
git@code.lazyworkhorse.net:gortium/hermes-agent.git fork && \
rsync -a --delete fork/ /opt/hermes/ \
--exclude node_modules \
--exclude .venv \
--exclude .git && \
rm -rf /tmp/fork /root/.ssh/
# ---------- Rebuild web UI ----------
# Source files changed; node_modules (from base image) reused.
RUN cd /opt/hermes && npm run build
# ---------- Reinstall Python package (editable) ----------
# Picks up source changes from our fork.
RUN . /opt/hermes/.venv/bin/activate && \
uv pip install --no-cache-dir --no-deps -e /opt/hermes
# ---------- Extra system deps ---------- # ---------- Extra system deps ----------
USER root USER root
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y --no-install-recommends \ apt-get install -y --no-install-recommends \
libportaudio2 ca-certificates poppler-utils imagemagick \ libportaudio2 ca-certificates poppler-utils imagemagick \
libolm-dev \
texlive-latex-base texlive-latex-extra texlive-fonts-recommended \ texlive-latex-base texlive-latex-extra texlive-fonts-recommended \
texlive-xetex texlive-science \ texlive-xetex texlive-science \
qemu-user-static binfmt-support emacs-nox && \ qemu-user-static binfmt-support emacs-nox && \
@@ -25,10 +48,6 @@ RUN apt-get update && \
# ---------- UV ---------- # ---------- UV ----------
COPY --chmod=0755 --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/ COPY --chmod=0755 --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/
# ---------- Matrix bridge + extra pip deps ----------
RUN . /opt/hermes/.venv/bin/activate && \
uv pip install --no-cache-dir 'mautrix[encryption]' openai
# ---------- Piper TTS ---------- # ---------- Piper TTS ----------
RUN . /opt/hermes/.venv/bin/activate && \ RUN . /opt/hermes/.venv/bin/activate && \
uv pip install --no-cache-dir piper-tts sounddevice numpy && \ uv pip install --no-cache-dir piper-tts sounddevice numpy && \
@@ -56,22 +75,9 @@ os.remove(tgz)
print('himalaya v1.2.0 installed') print('himalaya v1.2.0 installed')
PYEOF PYEOF
# ---------- Install custom plugins from URLs ---------- # ---------- Install himalaya-ro wrapper ----------
# HERMES_PLUGIN_URLS is a semicolon-separated list of pip-installable COPY --chmod=0755 himalaya-ro.sh /usr/local/bin/himalaya-ro
# package URLs (e.g. git+https:// or direct .tar.gz archives from Gitea).
# Each plugin is installed into the Hermes venv.
RUN if [ -n "$HERMES_PLUGIN_URLS" ]; then \
. /opt/hermes/.venv/bin/activate && \
IFS=';' read -ra URLS <<< "$HERMES_PLUGIN_URLS" && \
for url in "${URLS[@]}"; do \
echo "Installing plugin: $url" && \
uv pip install --no-cache-dir "$url"; \
done; \
fi
# ---------- Install multi-gateway launcher ----------
# Launches one gateway process per profile (HERMES_PROFILES env var)
COPY --chmod=0755 run-multi-gateways.sh /usr/local/bin/run-multi-gateways.sh
# ---------- Runtime ---------- # ---------- Runtime ----------
USER hermes USER hermes
@@ -82,7 +88,6 @@ ENV CHROME_EXECUTABLE=/opt/hermes/.playwright/chromium/chrome-linux/chrome
# Ensure tools directory and toolsets.py are writable by the hermes runtime user # Ensure tools directory and toolsets.py are writable by the hermes runtime user
# so custom tools can be injected from the persistent volume at startup. # so custom tools can be injected from the persistent volume at startup.
USER root
RUN chown -R hermes:hermes /opt/hermes/tools /opt/hermes/toolsets.py RUN chown -R hermes:hermes /opt/hermes/tools /opt/hermes/toolsets.py
VOLUME [ "/opt/data" ] VOLUME [ "/opt/data" ]

73
ai/hermes/himalaya-ro.sh Normal file
View File

@@ -0,0 +1,73 @@
#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────
# himalaya-ro — Read-only wrapper for himalaya
#
# Blocks destructive commands and logs audit trail.
# Pass-through for read-only commands (list, read, search).
#
# Usage: himalaya-ro [options] <command> [args...]
#
# Install: place in PATH before the real himalaya, or use
# `ln -sf himalaya-ro /usr/local/bin/himalaya`
# ─────────────────────────────────────────────────────────────
set -o pipefail
# ── Configuration ───────────────────────────────────────────
HIMALAYA_BIN="${HIMALAYA_BIN:-/usr/local/bin/himalaya}"
AUDIT_LOG="${HIMALAYA_AUDIT_LOG:-/var/log/himalaya-audit.log}"
# ── Destructive commands we block ──────────────────────────
BLOCKED_CMDS=(
"message move"
"message delete"
"message copy"
"flag add"
"flag remove"
"folder create"
"folder delete"
"folder rename"
"template send"
"account configure"
"account delete"
)
# ── Determine the subcommand being invoked ─────────────────
# Strip leading options (--account, --output, etc.) to find the verb
ARGS=()
SKIP_NEXT=false
for arg in "$@"; do
if $SKIP_NEXT; then
SKIP_NEXT=false
continue
fi
if [[ "$arg" == --* ]]; then
case "$arg" in
--account|--output|--page|--page-size|--folder|--color|--format)
SKIP_NEXT=true ;;
esac
continue
fi
ARGS+=("$arg")
done
# Build subcommand string and check against blocklist
CMD_STR=""
for ((i=0; i<${#ARGS[@]}; i++)); do
if [ -z "$CMD_STR" ]; then
CMD_STR="${ARGS[$i]}"
else
CMD_STR="$CMD_STR ${ARGS[$i]}"
fi
for blocked in "${BLOCKED_CMDS[@]}"; do
if [[ "$CMD_STR" == "$blocked" ]]; then
TS=$(date '+%Y-%m-%d %H:%M:%S')
echo "[AUDIT] $TS BLOCKED: himalaya $*" >> "$AUDIT_LOG"
echo "ERROR: Command 'himalaya $CMD_STR ...' is blocked by read-only policy." >&2
echo " Audit log: $AUDIT_LOG" >&2
exit 100
fi
done
done
# ── Allow pass-through ─────────────────────────────────────
exec "$HIMALAYA_BIN" "$@"

View File

@@ -1,32 +0,0 @@
#!/bin/bash
# Multi-gateway launcher for HERMES_PROFILES env var.
# Reads comma-separated profile names, spawns one gateway per profile.
# Designed to run before the main entrypoint — gateways run in background.
set -e
if [ -z "${HERMES_PROFILES}" ]; then
echo "HERMES_PROFILES not set — skipping multi-gateway launch"
exit 0
fi
# Source venv to make 'hermes' available (entrypoint.sh sources it later,
# but we need it NOW for the background gateways)
HERMES_BIN="/opt/hermes/.venv/bin/hermes"
if [ ! -x "$HERMES_BIN" ]; then
echo "ERROR: hermes binary not found at $HERMES_BIN"
exit 1
fi
mkdir -p /opt/data/logs
IFS=',' read -ra PROFILES <<< "${HERMES_PROFILES}"
for profile in "${PROFILES[@]}"; do
profile="$(echo "${profile}" | xargs)" # trim whitespace
[ -z "${profile}" ] && continue
echo "Starting gateway for profile: ${profile}"
nohup env API_SERVER_ENABLED=false API_SERVER_KEY= gosu hermes "$HERMES_BIN" --profile "${profile}" gateway run \
>> "/opt/data/logs/gateway-${profile}.log" 2>&1 &
done
echo "All gateways launched: ${HERMES_PROFILES}"

30
ai/llama-cpp/Dockerfile Normal file
View File

@@ -0,0 +1,30 @@
# llama-cpp-rocm6/Dockerfile
# Custom llama.cpp server with ROCm 6.1 + gfx906 (MI50) support.
# Build: docker build -t llama-cpp:rocm-gfx906 .
FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y curl git build-essential pkg-config cmake make && rm -rf /var/lib/apt/lists/*
ARG LLAMACPP_VERSION=b9596
RUN git clone --depth 1 --branch ${LLAMACPP_VERSION} https://github.com/ggml-org/llama.cpp.git /build
WORKDIR /build
ENV HIP_PATH=/opt/rocm ROCM_PATH=/opt/rocm PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:${PATH} CMAKE_PREFIX_PATH=/opt/rocm
RUN mkdir build && cd build && \
cmake .. -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release \
-DAMDGPU_TARGETS="gfx906:xnack-" \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-DGGML_CUDA=OFF -DGGML_VULKAN=OFF -DGGML_METAL=OFF \
-DBUILD_SHARED_LIBS=OFF && \
cmake --build . --target llama-server -- -j $(nproc)
FROM ubuntu:24.04
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
ca-certificates curl libstdc++6 libgomp1 libopenblas0 \
libnuma1 libelf1 libdrm2 libdrm-amdgpu1 \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/
COPY --from=builder /opt/rocm/share/ /opt/rocm/share/
COPY --from=builder /build/build/bin/llama-server /usr/local/bin/llama-server
RUN echo /opt/rocm/lib > /etc/ld.so.conf.d/rocm.conf && ldconfig
ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 HCC_AMDGPU_TARGET=gfx906 HSA_ENABLE_SDMA=0
EXPOSE 8080
ENTRYPOINT ["/usr/local/bin/llama-server"]