Compare commits

..

1 Commits

Author SHA1 Message Date
c39174f0fe feat: add 7zz for CHM documentation extraction
Some checks failed
Build Hermes agent / build (pull_request) Has been cancelled
Download static 7-Zip binary at Docker build time for extracting Microsoft Compiled HTML Help (.chm) files. Follows the same pattern as the existing Himalaya CLI installation. 7zz is scraped from 7-zip.org/download.html at build time.
2026-05-13 16:27:32 -04:00
4 changed files with 44 additions and 72 deletions

View File

@@ -1,23 +0,0 @@
FROM debian:13.4
# Install uv (Python package manager), curl, poppler-utils, and imagemagick
RUN apt-get update && \
apt-get install -y --no-install-recommends \
curl \
poppler-utils \
imagemagick && \
rm -rf /var/lib/apt/lists/*
# Install uv if not already present (debian:13.4 doesn't ship it)
COPY --from=ghcr.io/astral-sh/uv:latest /usr/local/bin/uv /usr/local/bin/uv
RUN uv --version
# Verify all expected tools are available
RUN curl --version && \
pdftotext -v 2>&1 | head -1 && \
pdfinfo -v 2>&1 | head -1 && \
pdftoppm -v 2>&1 | head -1 && \
convert --version | head -1 && \
identify --version | head -1
CMD ["/bin/bash"]

View File

@@ -54,10 +54,6 @@ services:
- TZ=America/Montreal - TZ=America/Montreal
volumes: volumes:
- /mnt/HoardingCow_docker_data/Hermes/data:/opt/data - /mnt/HoardingCow_docker_data/Hermes/data:/opt/data
# Syncthing-shared org files — read-only view of user's agenda
- /mnt/HoardingCow_docker_data/Syncthing/telos-ro:/opt/data/telos-ro:ro
# Syncthing-shared inbox — write tasks here, they sync to user's laptop
- /mnt/HoardingCow_docker_data/Syncthing/telos-rw:/opt/data/telos-rw:rw
devices: devices:
- /dev/kfd:/dev/kfd - /dev/kfd:/dev/kfd
- /dev/dri:/dev/dri - /dev/dri:/dev/dri
@@ -67,35 +63,6 @@ services:
networks: networks:
- ai_backend - ai_backend
syncthing:
image: syncthing/syncthing:latest
container_name: syncthing
hostname: syncthing
restart: always
ports:
- "8384:8384"
- "22000:22000"
- "21027:21027/udp"
environment:
- TZ=America/Montreal
volumes:
- /mnt/HoardingCow_docker_data/Syncthing/config:/var/syncthing/config
- /mnt/HoardingCow_docker_data/Syncthing/telos-ro:/telos-ro
- /mnt/HoardingCow_docker_data/Syncthing/telos-rw:/telos-rw
networks:
- ai_backend
- ai_net
labels:
- "traefik.enable=true"
- "traefik.http.routers.syncthing-http.rule=Host(`syncthing.lazyworkhorse.net`)"
- "traefik.http.routers.syncthing-http.entrypoints=web"
- "traefik.http.routers.syncthing-http.middlewares=redirect-to-https"
- "traefik.http.routers.syncthing-https.rule=Host(`syncthing.lazyworkhorse.net`)"
- "traefik.http.routers.syncthing-https.entrypoints=websecure"
- "traefik.http.routers.syncthing-https.tls=true"
- "traefik.http.routers.syncthing-https.tls.certresolver=njalla"
- "traefik.http.services.syncthing.loadbalancer.server.port=8384"
ollama: ollama:
build: build:
context: ./ollama context: ./ollama
@@ -129,22 +96,6 @@ services:
- "303" - "303"
- "26" - "26"
paperclip-db:
image: postgres:17
container_name: paperclip-db
restart: always
environment:
- POSTGRES_PASSWORD=${PAPERCLIP_DB_PASSWORD}
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
timeout: 5s
retries: 10
volumes:
- /mnt/HoardingCow_docker_data/Paperclip/db:/var/lib/postgresql/data
networks:
- ai_backend
networks: networks:
ai_net: ai_net:
external: true external: true

View File

@@ -78,6 +78,47 @@ PYEOF
# ---------- Install himalaya-ro wrapper ---------- # ---------- Install himalaya-ro wrapper ----------
COPY --chmod=0755 himalaya-ro.sh /usr/local/bin/himalaya-ro COPY --chmod=0755 himalaya-ro.sh /usr/local/bin/himalaya-ro
# ---------- Install 7-Zip for CHM extraction ----------
RUN /opt/hermes/.venv/bin/python3 /dev/stdin << 'PYEOF'
import urllib.request, tarfile, os, shutil, re, subprocess
# Scrape 7-zip.org for latest Linux x64 binary
url = 'https://7-zip.org/download.html'
req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
r = urllib.request.urlopen(req, timeout=15)
html = r.read().decode()
links = re.findall(r'href="(a/7z[\d]+-linux-x64\.tar\.xz)"', html)
if not links:
raise RuntimeError("Could not find 7z download link")
dl_url = f'https://7-zip.org/{links[0]}'
print(f'Downloading 7z from {dl_url}...')
req = urllib.request.Request(dl_url, headers={'User-Agent': 'Mozilla/5.0'})
r = urllib.request.urlopen(req, timeout=30)
data = r.read()
with open('/tmp/7z.tar.xz', 'wb') as f:
f.write(data)
subprocess.run(['tar', '-xJf', '/tmp/7z.tar.xz', '-C', '/tmp/'], check=True)
for root, dirs, files in os.walk('/tmp'):
for f in files:
if f == '7zz':
src = os.path.join(root, f)
shutil.move(src, '/usr/local/bin/7zz')
os.chmod('/usr/local/bin/7zz', 0o755)
print(f'7zz installed from {src}')
break
os.remove('/tmp/7z.tar.xz')
# Verify
r = subprocess.run(['/usr/local/bin/7zz'], capture_output=True, text=True)
print(f'7-Zip {r.stdout.strip()[:60]}')
PYEOF
# ---------- Runtime ---------- # ---------- Runtime ----------
USER hermes USER hermes

View File

@@ -8,10 +8,13 @@ services:
- USER_GID=1000 - USER_GID=1000
- GITEA__server__ROOT_URL=https://code.lazyworkhorse.net - GITEA__server__ROOT_URL=https://code.lazyworkhorse.net
- GITEA__actions__ENABLED=true - GITEA__actions__ENABLED=true
- GITEA__actions__DEFAULT_ACTIONS_URL=off
- SSH_PORT=2222 - SSH_PORT=2222
- SSH_LISTEN_PORT=2222 - SSH_LISTEN_PORT=2222
# Enable Gitea Actions (act_runner required on host) # Enable Gitea Actions (act_runner required on host)
- GITEA__actions__ENABLED=true - GITEA__actions__ENABLED=true
# Don't fetch actions from GitHub (offline mode + local only)
- GITEA__actions__DEFAULT_ACTIONS_URL=off
volumes: volumes:
- /mnt/HoardingCow_docker_data/Gitea:/data - /mnt/HoardingCow_docker_data/Gitea:/data
networks: networks: