Compare commits
1 Commits
feat/honch
...
3c92d93366
| Author | SHA1 | Date | |
|---|---|---|---|
| 3c92d93366 |
242
ai/compose.yml
242
ai/compose.yml
@@ -44,7 +44,7 @@ services:
|
|||||||
- API_SERVER_HOST=0.0.0.0
|
- API_SERVER_HOST=0.0.0.0
|
||||||
- API_SERVER_KEY=hermes_local_key
|
- API_SERVER_KEY=hermes_local_key
|
||||||
- GATEWAY_ALLOW_ALL_USERS=true
|
- GATEWAY_ALLOW_ALL_USERS=true
|
||||||
- OPENROUTER_API_KEY=${OPEN...KEY}
|
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||||
# ROCm for GPU-accelerated faster-whisper STT
|
# ROCm for GPU-accelerated faster-whisper STT
|
||||||
- HSA_OVERRIDE_GFX_VERSION=9.0.6
|
- HSA_OVERRIDE_GFX_VERSION=9.0.6
|
||||||
- HCC_AMDGPU_TARGET=gfx906
|
- HCC_AMDGPU_TARGET=gfx906
|
||||||
@@ -112,166 +112,7 @@ services:
|
|||||||
- /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama
|
- /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama
|
||||||
environment:
|
environment:
|
||||||
- OLLAMA_VULKAN=0
|
- OLLAMA_VULKAN=0
|
||||||
- HSA_OVERRIDE_GFX_VERSION=9.0.6
|
|
||||||
- HCC_AMDGPU_TARGET=gfx906
|
|
||||||
- HIP_VISIBLE_DEVICES=0,1
|
|
||||||
- ROCR_VISIBLE_DEVICES=0,1
|
|
||||||
- HSA_ENABLE_SDMA=0
|
|
||||||
- OLLAMA_HOST=0.0.0.0
|
- OLLAMA_HOST=0.0.0.0
|
||||||
- OLLAMA_DEBUG=1
|
|
||||||
- OLLAMA_FLASH_ATTENTION=1
|
|
||||||
- OLLAMA_NUM_PARALLEL=2
|
|
||||||
devices:
|
|
||||||
# Map the render nodes and KFD for ROCm to work inside the container
|
|
||||||
- /dev/kfd:/dev/kfd
|
|
||||||
- /dev/dri:/dev/dri
|
|
||||||
group_add:
|
|
||||||
- "303"
|
|
||||||
- "26"
|
|
||||||
|
|
||||||
paperclip-db:
|
|
||||||
image: postgres:17-alpine
|
|
||||||
container_name: paperclip-db
|
|
||||||
restart: always
|
|
||||||
environment:
|
|
||||||
POSTGRES_USER: paperclip
|
|
||||||
POSTGRES_PASSWORD: ${PAPERCLIP_DB_PASSWORD:?PAPERCLIP_DB_PASSWORD must be set}
|
|
||||||
POSTGRES_DB: paperclip
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "pg_isready -U paperclip -d paperclip"]
|
|
||||||
interval: 5s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 10
|
|
||||||
volumes:
|
|
||||||
- /mnt/HoardingCow_docker_data/Paperclip/pgdata:/var/lib/postgresql/data
|
|
||||||
networks:
|
|
||||||
- ai_backend
|
|
||||||
|
|
||||||
paperclip:
|
|
||||||
image: ghcr.io/paperclipai/paperclip:v2026.517.0
|
|
||||||
container_name: paperclip
|
|
||||||
restart: always
|
|
||||||
ports:
|
|
||||||
- "127.0.0.1:3100:3100"
|
|
||||||
environment:
|
|
||||||
- HOST=0.0.0.0
|
|
||||||
- PORT=3100
|
|
||||||
- SERVE_UI=true
|
|
||||||
- DATABASE_URL=postgres://paperclip:***@paperclip-db:5432/paperclip
|
|
||||||
- BETTER_AUTH_SECRET=${PAPE...CRET must be set}
|
|
||||||
- PAPERCLIP_PUBLIC_URL=https://paperclip.lazyworkhorse.net
|
|
||||||
- PAPERCLIP_DEPLOYMENT_MODE=authenticated
|
|
||||||
- PAPERCLIP_DEPLOYMENT_EXPOSURE=private
|
|
||||||
volumes:
|
|
||||||
- /mnt/HoardingCow_docker_data/Paperclip/data:/paperclip
|
|
||||||
depends_on:
|
|
||||||
paperclip-db:
|
|
||||||
condition: service_healthy
|
|
||||||
networks:
|
|
||||||
- ai_net
|
|
||||||
- ai_backend
|
|
||||||
labels:
|
|
||||||
- "traefik.enable=true"
|
|
||||||
- "traefik.docker.network=ai_net"
|
|
||||||
|
|
||||||
- "traefik.http.routers.paperclip-http.rule=Host(`paperclip.lazyworkhorse.net`)"
|
|
||||||
- "traefik.http.routers.paperclip-http.entrypoints=web"
|
|
||||||
- "traefik.http.routers.paperclip-http.middlewares=redirect-to-https"
|
|
||||||
|
|
||||||
- "traefik.http.routers.paperclip-https.rule=Host(`paperclip.lazyworkhorse.net`)"
|
|
||||||
- "traefik.http.routers.paperclip-https.entrypoints=websecure"
|
|
||||||
- "traefik.http.routers.paperclip-https.tls=true"
|
|
||||||
- "traefik.http.routers.paperclip-https.tls.certresolver=njalla"
|
|
||||||
|
|
||||||
- "traefik.http.services.paperclip.loadbalancer.server.port=3100"
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Honcho — Memory infrastructure for stateful AI agents
|
|
||||||
# Self-hosted memory server with pgvector for embedding storage.
|
|
||||||
# Defaults to Ollama for embeddings; configure LLM provider for full deriver
|
|
||||||
# and summarization support.
|
|
||||||
#
|
|
||||||
# API port: 8000
|
|
||||||
# Web: https://honcho.lazyworkhorse.net
|
|
||||||
# Docs: https://github.com/plastic-labs/honcho
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
honcho-db:
|
|
||||||
image: pgvector/pgvector:pg17-trixie
|
|
||||||
container_name: honcho-db
|
|
||||||
restart: unless-stopped
|
|
||||||
environment:
|
|
||||||
POSTGRES_DB: honcho
|
|
||||||
POSTGRES_USER: honcho
|
|
||||||
POSTGRES_PASSWORD: ${HONCHO_DB_PASSWORD:?HONCHO_DB_PASSWORD must be set}
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD-SHELL", "pg_isready -U honcho -d honcho"]
|
|
||||||
interval: 5s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 10
|
|
||||||
volumes:
|
|
||||||
- /mnt/HoardingCow_docker_data/Honcho/pgdata:/var/lib/postgresql/data
|
|
||||||
- ./honcho/init.sql:/docker-entrypoint-initdb.d/init.sql
|
|
||||||
networks:
|
|
||||||
- ai_backend
|
|
||||||
|
|
||||||
honcho:
|
|
||||||
build:
|
|
||||||
context: ./honcho
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
container_name: honcho
|
|
||||||
restart: unless-stopped
|
|
||||||
ports:
|
|
||||||
- "127.0.0.1:8000:8000"
|
|
||||||
depends_on:
|
|
||||||
honcho-db:
|
|
||||||
condition: service_healthy
|
|
||||||
environment:
|
|
||||||
DB_CONNECTION_URI: postgresql+psycopg://honcho:${HONCHO_DB_PASSWORD:?HONCHO_DB_PASSWORD must be set}@honcho-db:5432/honcho
|
|
||||||
LOG_LEVEL: INFO
|
|
||||||
LLM_OPENAI_API_KEY: ${LLM_OPENAI_API_KEY:-ollama}
|
|
||||||
volumes:
|
|
||||||
- /mnt/HoardingCow_docker_data/Honcho/config.toml:/app/config.toml
|
|
||||||
networks:
|
|
||||||
- ai_backend
|
|
||||||
- ai_net
|
|
||||||
labels:
|
|
||||||
- "traefik.enable=true"
|
|
||||||
- "traefik.docker.network=ai_net"
|
|
||||||
|
|
||||||
- "traefik.http.routers.honcho-http.rule=Host(`honcho.lazyworkhorse.net`)"
|
|
||||||
- "traefik.http.routers.honcho-http.entrypoints=web"
|
|
||||||
- "traefik.http.routers.honcho-http.middlewares=redirect-to-https"
|
|
||||||
|
|
||||||
- "traefik.http.routers.honcho-https.rule=Host(`honcho.lazyworkhorse.net`)"
|
|
||||||
- "traefik.http.routers.honcho-https.entrypoints=websecure"
|
|
||||||
- "traefik.http.routers.honcho-https.tls=true"
|
|
||||||
- "traefik.http.routers.honcho-https.tls.certresolver=njalla"
|
|
||||||
|
|
||||||
- "traefik.http.services.honcho.loadbalancer.server.port=8000"
|
|
||||||
|
|
||||||
holographic-memory:
|
|
||||||
build:
|
|
||||||
context: ./holographic-memory
|
|
||||||
image: holographic-memory:latest
|
|
||||||
container_name: holographic-memory
|
|
||||||
restart: unless-stopped
|
|
||||||
ports:
|
|
||||||
- "127.0.0.1:8100:8100"
|
|
||||||
environment:
|
|
||||||
- HOLOGRAPHIC_DB_PATH=/data/holographic/memory_store.db
|
|
||||||
- HOLOGRAPHIC_PORT=8100
|
|
||||||
- HOLOGRAPHIC_DEFAULT_TRUST=0.5
|
|
||||||
volumes:
|
|
||||||
- /mnt/HoardingCow_docker_data/HolographicMemory:/data/holographic
|
|
||||||
networks:
|
|
||||||
- ai_backend
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8100/health')"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 5s
|
|
||||||
retries: 3
|
|
||||||
start_period: 10s
|
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
ai_net:
|
ai_net:
|
||||||
@@ -281,47 +122,40 @@ networks:
|
|||||||
driver: bridge
|
driver: bridge
|
||||||
name: ai_backend
|
name: ai_backend
|
||||||
|
|
||||||
# llama_cpp_devstral:
|
llama-cpp-hermes:
|
||||||
# image: ghcr.io/ggml-org/llama.cpp:server-rocm
|
image: llama-cpp:rocm-gfx906
|
||||||
# container_name: llama_cpp_devstral
|
container_name: llama-cpp-hermes
|
||||||
# restart: unless-stopped
|
restart: unless-stopped
|
||||||
# networks:
|
networks:
|
||||||
# - ai_backend
|
- ai_backend
|
||||||
# ports:
|
ports:
|
||||||
# - "8300:8080"
|
- "127.0.0.1:8300:8080"
|
||||||
# ipc: host
|
ipc: host
|
||||||
# devices:
|
devices:
|
||||||
# - "/dev/kfd:/dev/kfd"
|
- /dev/kfd:/dev/kfd
|
||||||
# - "/dev/dri:/dev/dri"
|
- /dev/dri:/dev/dri
|
||||||
# group_add:
|
group_add:
|
||||||
# - "303" # video
|
- "303"
|
||||||
# - "26" # render
|
- "26"
|
||||||
# environment:
|
environment:
|
||||||
# HSA_OVERRIDE_GFX_VERSION: 9.0.6
|
- HSA_OVERRIDE_GFX_VERSION=9.0.6
|
||||||
# HIP_VISIBLE_DEVICES: 0,1
|
- HSA_ENABLE_SDMA=0
|
||||||
# LLAMA_CACHE: /models
|
- HIP_VISIBLE_DEVICES=0,1
|
||||||
# volumes:
|
- LLAMA_CACHE=/models
|
||||||
# - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models
|
volumes:
|
||||||
# - /mnt/HoardingCow_docker_data/Llama_cpp/devstral-agent.jinja:/template.jinja
|
- /mnt/HoardingCow_docker_data/Llama_cpp/models:/models
|
||||||
# command: >
|
- /mnt/HoardingCow_docker_data/Ollama/ollama/models/blobs/sha256-17823599694fa3503ef54bf748d5078c6ce881f4d01616cafa255dc05d215a08:/model.gguf:ro
|
||||||
# -hf unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:Devstral-Small-2-24B-Instruct-2512-Q8_0.gguf
|
command: >
|
||||||
# -a devstral-2-small-llama_cpp
|
-m /model.gguf
|
||||||
# --chat-template-file /template.jinja
|
--host 0.0.0.0
|
||||||
# --host 0.0.0.0
|
--port 8080
|
||||||
# --port 8080
|
--gpu-layers 99
|
||||||
# --n-gpu-layers 99
|
--ctx-size 163840
|
||||||
# --ctx-size 163840
|
-ctk f16 -ctv f16
|
||||||
# --batch-size 4096
|
--flash-attn on
|
||||||
# --ubatch-size 4096
|
--split-mode layer
|
||||||
# --cache-type-k f16
|
--no-mmap
|
||||||
# --cache-type-v f16
|
--n-predict -1
|
||||||
# --cache-reuse 256
|
|
||||||
# --flash-attn on
|
|
||||||
# --context-shift
|
|
||||||
# --split-mode layer
|
|
||||||
# --no-mmap
|
|
||||||
# --n-predict -1
|
|
||||||
# --parallel 2
|
|
||||||
|
|
||||||
# vllm:
|
# vllm:
|
||||||
# image: nalanzeyu/vllm-gfx906:v0.9.0-rocm6.3
|
# image: nalanzeyu/vllm-gfx906:v0.9.0-rocm6.3
|
||||||
@@ -424,8 +258,8 @@ networks:
|
|||||||
# - /home/gortium/infra:/data/workspace/infra
|
# - /home/gortium/infra:/data/workspace/infra
|
||||||
# environment:
|
# environment:
|
||||||
# - TZ=America/Toronto
|
# - TZ=America/Toronto
|
||||||
# - OPENCLAW_GATEWAY_TOKEN=${OPEN...KEN}
|
# - OPENCLAW_GATEWAY_TOKEN=${OPENCLAW_GATEWAY_TOKEN}
|
||||||
# - OPENROUTER_API_KEY=${OPEN...KEY}
|
# - OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||||
# # Point to the sidecar browser
|
# # Point to the sidecar browser
|
||||||
# - BROWSER_CDP_URL=http://openclaw-browser:9222
|
# - BROWSER_CDP_URL=http://openclaw-browser:9222
|
||||||
# - BROWSER_EVALUATE_ENABLED=true
|
# - BROWSER_EVALUATE_ENABLED=true
|
||||||
@@ -470,7 +304,7 @@ networks:
|
|||||||
# - PGID=1000
|
# - PGID=1000
|
||||||
# - PUBLIC_KEY_FILE=/config/ssh/authorized_keys
|
# - PUBLIC_KEY_FILE=/config/ssh/authorized_keys
|
||||||
# - SUDO_ACCESS=false
|
# - SUDO_ACCESS=false
|
||||||
# - PASSWORD_ACCESS=***
|
# - PASSWORD_ACCESS=false
|
||||||
# volumes:
|
# volumes:
|
||||||
# - /mnt/HoardingCow_docker_data/openclaw/ssh-config:/config
|
# - /mnt/HoardingCow_docker_data/openclaw/ssh-config:/config
|
||||||
# - /home/gortium/infra:/data/workspace/infra:ro
|
# - /home/gortium/infra:/data/workspace/infra:ro
|
||||||
|
|||||||
@@ -79,26 +79,6 @@ PYEOF
|
|||||||
COPY --chmod=0755 himalaya-ro.sh /usr/local/bin/himalaya-ro
|
COPY --chmod=0755 himalaya-ro.sh /usr/local/bin/himalaya-ro
|
||||||
|
|
||||||
|
|
||||||
# ---------- Install 7-Zip (7zz) for CHM extraction ----------
|
|
||||||
RUN /opt/hermes/.venv/bin/python3 /dev/stdin << 'PYEOF'
|
|
||||||
import urllib.request, tarfile, os, shutil, subprocess
|
|
||||||
url = 'https://github.com/ip7z/7zip/releases/download/26.01/7z2601-linux-x64.tar.xz'
|
|
||||||
xz = '/tmp/7z2601-linux-x64.tar.xz'
|
|
||||||
urllib.request.urlretrieve(url, xz)
|
|
||||||
os.makedirs('/tmp/7z', exist_ok=True)
|
|
||||||
with tarfile.open(xz, 'r:xz') as t:
|
|
||||||
t.extractall('/tmp/7z')
|
|
||||||
shutil.move('/tmp/7z/7zz', '/usr/local/bin/7zz')
|
|
||||||
os.chmod('/usr/local/bin/7zz', 0o755)
|
|
||||||
shutil.rmtree('/tmp/7z', ignore_errors=True)
|
|
||||||
os.remove(xz)
|
|
||||||
# Verify
|
|
||||||
result = subprocess.run(['/usr/local/bin/7zz'], capture_output=True, text=True)
|
|
||||||
assert result.returncode == 0, f'7zz verify failed: {result.stderr}'
|
|
||||||
print('7-Zip 26.01 installed successfully')
|
|
||||||
PYEOF
|
|
||||||
|
|
||||||
|
|
||||||
# ---------- Runtime ----------
|
# ---------- Runtime ----------
|
||||||
USER hermes
|
USER hermes
|
||||||
ENV HERMES_HOME=/opt/data
|
ENV HERMES_HOME=/opt/data
|
||||||
|
|||||||
@@ -1,72 +0,0 @@
|
|||||||
# Honcho — Memory infrastructure for stateful AI agents
|
|
||||||
# Builds the Honcho FastAPI server from the official GitHub repository.
|
|
||||||
#
|
|
||||||
# Usage:
|
|
||||||
# docker compose build honcho
|
|
||||||
# docker compose up honcho
|
|
||||||
#
|
|
||||||
# Reference: https://github.com/plastic-labs/honcho
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Stage 1 — clone source & install dependencies
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
FROM python:3.13-slim-bookworm AS builder
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
COPY --from=ghcr.io/astral-sh/uv:0.9.24 /uv /bin/uv
|
|
||||||
|
|
||||||
WORKDIR /src
|
|
||||||
RUN git clone --depth 1 --branch main https://github.com/plastic-labs/honcho.git .
|
|
||||||
|
|
||||||
ENV UV_COMPILE_BYTECODE=1
|
|
||||||
ENV UV_LINK_MODE=copy
|
|
||||||
|
|
||||||
# Install project dependencies (frozen from lockfile, no dev)
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
||||||
uv sync --frozen --no-install-project --no-group dev
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Stage 2 — runtime image
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
FROM python:3.13-slim-bookworm AS runtime
|
|
||||||
|
|
||||||
COPY --from=ghcr.io/astral-sh/uv:0.9.24 /uv /bin/uv
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y --no-install-recommends ca-certificates && \
|
|
||||||
rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
ENV UV_COMPILE_BYTECODE=1
|
|
||||||
ENV UV_LINK_MODE=copy
|
|
||||||
ENV PYTHONDONTWRITEBYTECODE=1
|
|
||||||
ENV PYTHONUNBUFFERED=1
|
|
||||||
ENV PATH="/app/.venv/bin:$PATH"
|
|
||||||
ENV HOME=/app
|
|
||||||
ENV UV_CACHE_DIR=/tmp/uv-cache
|
|
||||||
|
|
||||||
# Copy the dependency layer from the builder
|
|
||||||
COPY --from=builder /src/uv.lock /src/pyproject.toml /app/
|
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
||||||
uv sync --frozen --no-group dev
|
|
||||||
|
|
||||||
# Copy application source and config
|
|
||||||
COPY --from=builder /src/src/ /app/src/
|
|
||||||
COPY --from=builder /src/migrations/ /app/migrations/
|
|
||||||
COPY --from=builder /src/scripts/ /app/scripts/
|
|
||||||
COPY --from=builder /src/docker/ /app/docker/
|
|
||||||
COPY --from=builder /src/alembic.ini /app/alembic.ini
|
|
||||||
|
|
||||||
# Create non-root user
|
|
||||||
RUN addgroup --system app && \
|
|
||||||
adduser --system --ingroup app app && \
|
|
||||||
mkdir -p /tmp/uv-cache && \
|
|
||||||
chown -R app:app /app /tmp/uv-cache
|
|
||||||
|
|
||||||
USER app
|
|
||||||
EXPOSE 8000
|
|
||||||
|
|
||||||
# The entrypoint.sh script runs database migrations then starts the FastAPI server
|
|
||||||
ENTRYPOINT ["sh", "docker/entrypoint.sh"]
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
CREATE EXTENSION IF NOT EXISTS vector;
|
|
||||||
30
ai/llama-cpp/Dockerfile
Normal file
30
ai/llama-cpp/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
# llama-cpp-rocm6/Dockerfile
|
||||||
|
# Custom llama.cpp server with ROCm 6.1 + gfx906 (MI50) support.
|
||||||
|
# Build: docker build -t llama-cpp:rocm-gfx906 .
|
||||||
|
|
||||||
|
FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y curl git build-essential pkg-config cmake make && rm -rf /var/lib/apt/lists/*
|
||||||
|
ARG LLAMACPP_VERSION=b9596
|
||||||
|
RUN git clone --depth 1 --branch ${LLAMACPP_VERSION} https://github.com/ggml-org/llama.cpp.git /build
|
||||||
|
WORKDIR /build
|
||||||
|
ENV HIP_PATH=/opt/rocm ROCM_PATH=/opt/rocm PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:${PATH} CMAKE_PREFIX_PATH=/opt/rocm
|
||||||
|
RUN mkdir build && cd build && \
|
||||||
|
cmake .. -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release \
|
||||||
|
-DAMDGPU_TARGETS="gfx906:xnack-" \
|
||||||
|
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
|
||||||
|
-DGGML_CUDA=OFF -DGGML_VULKAN=OFF -DGGML_METAL=OFF \
|
||||||
|
-DBUILD_SHARED_LIBS=OFF && \
|
||||||
|
cmake --build . --target llama-server -- -j $(nproc)
|
||||||
|
|
||||||
|
FROM ubuntu:24.04
|
||||||
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
|
||||||
|
ca-certificates curl libstdc++6 libgomp1 libopenblas0 \
|
||||||
|
libnuma1 libelf1 libdrm2 libdrm-amdgpu1 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/
|
||||||
|
COPY --from=builder /opt/rocm/share/ /opt/rocm/share/
|
||||||
|
COPY --from=builder /build/build/bin/llama-server /usr/local/bin/llama-server
|
||||||
|
RUN echo /opt/rocm/lib > /etc/ld.so.conf.d/rocm.conf && ldconfig
|
||||||
|
ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 HCC_AMDGPU_TARGET=gfx906 HSA_ENABLE_SDMA=0
|
||||||
|
EXPOSE 8080
|
||||||
|
ENTRYPOINT ["/usr/local/bin/llama-server"]
|
||||||
@@ -1,93 +0,0 @@
|
|||||||
# Honcho Configuration
|
|
||||||
# Pre-configured for self-hosted deployment with Ollama embeddings.
|
|
||||||
# Mount this file at /app/config.toml in the Honcho container.
|
|
||||||
#
|
|
||||||
# Environment variables override these values at runtime
|
|
||||||
# (e.g. DB_CONNECTION_URI, DERIVER_*).
|
|
||||||
|
|
||||||
[app]
|
|
||||||
LOG_LEVEL = "INFO"
|
|
||||||
NAMESPACE = "honcho"
|
|
||||||
SESSION_OBSERVERS_LIMIT = 10
|
|
||||||
GET_CONTEXT_MAX_TOKENS = 16384
|
|
||||||
EMBED_MESSAGES = true
|
|
||||||
|
|
||||||
[db]
|
|
||||||
# Connection URI is set via environment variable DB_CONNECTION_URI
|
|
||||||
SCHEMA = "public"
|
|
||||||
POOL_SIZE = 10
|
|
||||||
MAX_OVERFLOW = 20
|
|
||||||
POOL_TIMEOUT = 30
|
|
||||||
POOL_RECYCLE = 300
|
|
||||||
POOL_PRE_PING = true
|
|
||||||
POOL_USE_LIFO = true
|
|
||||||
SQL_DEBUG = false
|
|
||||||
|
|
||||||
[auth]
|
|
||||||
USE_AUTH = false
|
|
||||||
|
|
||||||
[llm]
|
|
||||||
DEFAULT_MAX_TOKENS = 4096
|
|
||||||
|
|
||||||
[embedding]
|
|
||||||
VECTOR_DIMENSIONS = 768
|
|
||||||
MAX_INPUT_TOKENS = 8192
|
|
||||||
MAX_TOKENS_PER_REQUEST = 2048
|
|
||||||
|
|
||||||
[embedding.model_config]
|
|
||||||
transport = "openai"
|
|
||||||
model = "nomic-embed-text:latest"
|
|
||||||
|
|
||||||
[embedding.model_config.overrides]
|
|
||||||
base_url = "http://ollama:11434/v1"
|
|
||||||
# Ollama does not require an API key; env var must be set to non-empty string
|
|
||||||
api_key_env = "LLM_OPENAI_API_KEY"
|
|
||||||
|
|
||||||
[deriver]
|
|
||||||
ENABLED = false
|
|
||||||
WORKERS = 1
|
|
||||||
POLLING_SLEEP_INTERVAL_SECONDS = 1.0
|
|
||||||
STALE_SESSION_TIMEOUT_MINUTES = 5
|
|
||||||
DEDUPLICATE = true
|
|
||||||
LOG_OBSERVATIONS = false
|
|
||||||
|
|
||||||
[deriver.model_config]
|
|
||||||
transport = "openai"
|
|
||||||
model = "qwen3.6:27b-q4_K_M"
|
|
||||||
|
|
||||||
[deriver.model_config.overrides]
|
|
||||||
base_url = "http://ollama:11434/v1"
|
|
||||||
api_key_env = "LLM_OPENAI_API_KEY"
|
|
||||||
|
|
||||||
[summary]
|
|
||||||
ENABLED = false
|
|
||||||
|
|
||||||
[summary.model_config]
|
|
||||||
transport = "openai"
|
|
||||||
model = "qwen3.6:27b-q4_K_M"
|
|
||||||
|
|
||||||
[summary.model_config.overrides]
|
|
||||||
base_url = "http://ollama:11434/v1"
|
|
||||||
api_key_env = "LLM_OPENAI_API_KEY"
|
|
||||||
|
|
||||||
[dream]
|
|
||||||
ENABLED = false
|
|
||||||
|
|
||||||
[dialectic]
|
|
||||||
MAX_OUTPUT_TOKENS = 4096
|
|
||||||
MAX_INPUT_TOKENS = 16384
|
|
||||||
|
|
||||||
[cache]
|
|
||||||
ENABLED = false
|
|
||||||
|
|
||||||
[vector_store]
|
|
||||||
TYPE = "pgvector"
|
|
||||||
|
|
||||||
[metrics]
|
|
||||||
ENABLED = false
|
|
||||||
|
|
||||||
[telemetry]
|
|
||||||
ENABLED = false
|
|
||||||
|
|
||||||
[sentry]
|
|
||||||
ENABLED = false
|
|
||||||
31
env/.env.example.honcho
vendored
31
env/.env.example.honcho
vendored
@@ -1,31 +0,0 @@
|
|||||||
# Honcho Environment Variables
|
|
||||||
# Copy this file to your .env (at the compose root or docker-compose working directory)
|
|
||||||
# and fill in the secrets.
|
|
||||||
#
|
|
||||||
# cp env/.env.example.honcho .env
|
|
||||||
#
|
|
||||||
# Then reference it from compose.yml:
|
|
||||||
# env_file:
|
|
||||||
# - path: .env
|
|
||||||
# required: true
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Database
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# PostgreSQL connection string for Honcho.
|
|
||||||
# The password must match HONCHO_DB_PASSWORD below.
|
|
||||||
HONCHO_DB_PASSWORD=change_me_to_a_strong_random_password
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# LLM Provider
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Ollama does not require a real API key, but the env var must be set to a
|
|
||||||
# non-empty string for the OpenAI-compatible client to connect.
|
|
||||||
LLM_OPENAI_API_KEY=ollama
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Honcho Server
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Honcho will pick up DB_CONNECTION_URI from the compose environment.
|
|
||||||
# You can override additional settings here if needed.
|
|
||||||
# LOG_LEVEL=INFO
|
|
||||||
Reference in New Issue
Block a user