Merge pull request 'refactor: split ai/ into hermes/ and ollama/ directories with gfx906 build' (#19) from feat/ollama-gfx906 into master

Reviewed-on: #19
2026-05-11 01:26:11 +00:00
parent 0580603f27 6b82a26c25
commit f8c2f864de
5 changed files with 112 additions and 4 deletions
--- a/ai/compose.yml
+++ b/ai/compose.yml
@@ -26,7 +26,7 @@ services:
  #     - "traefik.http.routers.webui-https.tls.certresolver=njalla"

  hermes:
-    build: ./
+    build: ./hermes
    container_name: hermes
    restart: always
    # Gateway run enables the internal API server on port 8642
@@ -58,9 +58,11 @@ services:
      - ai_backend

  ollama:
-    image: ollama/ollama:latest
+    build:
+      context: ./ollama
+      dockerfile: Dockerfile
+    image: ollama/ollama:rocm-gfx906
    container_name: ollama
-    privileged: true
    tty: true
    restart: always 
    ports:
@@ -78,7 +80,7 @@ services:
      - HSA_ENABLE_SDMA=0 
      - OLLAMA_HOST=0.0.0.0
      - OLLAMA_DEBUG=1
-      - OLLAMA_FLASH_ATTENTION=0
+      - OLLAMA_FLASH_ATTENTION=1
      - OLLAMA_NUM_PARALLEL=2
    devices:
      # Map the render nodes and KFD for ROCm to work inside the container
--- a/ai/hermes/Dockerfile
+++ b/ai/hermes/Dockerfile
--- a/ai/hermes/fix-permissions.sh
+++ b/ai/hermes/fix-permissions.sh
--- a/ai/hermes/patch_tts_tool.py
+++ b/ai/hermes/patch_tts_tool.py
--- a/ai/ollama/Dockerfile
+++ b/ai/ollama/Dockerfile
@@ -0,0 +1,106 @@
+# ollama-gfx906/Dockerfile
+#
+# Custom ollama image with ROCm 6.1 + gfx906 (MI50) support.
+# The official ollama/rocm image ships ROCm 7.2 which dropped gfx906.
+# This uses v0.23.2's native CMake build system with AMDGPU_TARGETS including gfx906.
+#
+# Build: docker build -t ollama/ollama:rocm-gfx906 ai/ollama
+
+FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder
+
+# Build dependencies (CMake, Ninja, Go)
+ARG CMAKEVERSION=3.31.2
+ARG NINJAVERSION=1.12.1
+ARG GOLANG_VERSION=1.22.0
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    curl git ccache build-essential pkg-config unzip \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install CMake from official binaries
+RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-x86_64.tar.gz \
+    | tar xz -C /usr/local --strip-components 1
+
+# Install Ninja
+RUN curl -fsSL -o /tmp/ninja.zip \
+    https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux.zip \
+    && unzip /tmp/ninja.zip -d /usr/local/bin && rm /tmp/ninja.zip
+
+# Install Go
+RUN curl -fsSL https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz \
+    | tar xz -C /usr/local
+ENV PATH=/usr/local/go/bin:$PATH
+
+ARG OLLAMA_VERSION=v0.23.2
+RUN git clone --depth 1 --branch ${OLLAMA_VERSION} https://github.com/ollama/ollama.git /build
+WORKDIR /build
+
+# ROCm paths
+ENV HIP_PATH=/opt/rocm
+ENV ROCM_PATH=/opt/rocm
+ENV CMAKE_GENERATOR=Ninja
+ENV LDFLAGS=-s
+
+# Step 1: Build CPU backends with GCC (no ROCm preset)
+# Pre-set CMAKE_HIP_COMPILER="" to prevent check_language(HIP) from
+# finding a HIP compiler (it searches /opt/rocm even without PATH).
+# Remove /opt/rocm from PATH to prevent find_program from finding hipcc.
+RUN mkdir -p build-cpu && \
+    PATH=/usr/local/go/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \
+    cmake -B build-cpu -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_HIP_COMPILER="" \
+      -DCMAKE_INSTALL_PREFIX=/build/dist && \
+    cmake --build build-cpu --target ggml-cpu -- -l $(nproc) && \
+    cmake --install build-cpu --component CPU --strip && \
+    echo "=== CPU install ===" && \
+    (find /build/dist/lib/ollama -type f -o -type l 2>&1 | head -20 || echo "empty")
+
+# Step 2: Build HIP backend with ROCm preset + gfx906 target only
+# The ROCm 6 preset enables HIP language detection (enable_language(HIP))
+# which ensures GPU kernels are properly compiled for gfx906.
+# OLLAMA_RUNNER_DIR=rocm from the preset, so HIP goes to lib/ollama/rocm/
+# Need CMAKE_PREFIX_PATH so find_package(hip) finds hip-config.cmake
+# at /opt/rocm/lib/cmake/hip/hip-config.cmake.
+RUN mkdir -p build-hip && \
+    cmake -B build-hip \
+      --preset 'ROCm 6' \
+      -DAMDGPU_TARGETS="gfx906:xnack-" \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_PREFIX_PATH="/opt/rocm" && \
+    cmake --build build-hip --target ggml-hip -- -l $(nproc) && \
+    cmake --install build-hip --component HIP --strip && \
+    echo "=== HIP install ===" && \
+    find /build/dist/lib/ollama -type f -o -type l | head -20
+
+# Step 3: Build Go binary (GCC for CGo linking)
+ENV CGO_ENABLED=1
+RUN go build -trimpath -ldflags="-X=github.com/ollama/ollama/version.Version=${OLLAMA_VERSION}" -o /build/dist/ollama .
+
+# ---------- Runtime image ----------
+FROM ubuntu:24.04
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    ca-certificates curl libstdc++6 libgomp1 libvulkan1 libopenblas0 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy ROCm 6.1 runtime libraries
+# These are needed at runtime by ggml-hip via LD_LIBRARY_PATH
+COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/
+COPY --from=builder /opt/rocm/share/ /opt/rocm/share/
+
+# Copy ollama binary + all backends (CPU + HIP)
+# CPU install:  /build/dist/lib/ollama/libggml-*.so
+# HIP install:  /build/dist/lib/ollama/rocm/libggml-hip.so
+COPY --from=builder /build/dist/ollama /usr/bin/ollama
+COPY --from=builder /build/dist/lib/ollama/ /usr/lib/ollama/
+
+RUN ldconfig
+
+ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama/rocm:/usr/lib/ollama
+ENV HSA_OVERRIDE_GFX_VERSION=9.0.6
+ENV HCC_AMDGPU_TARGET=gfx906
+ENV HSA_ENABLE_SDMA=0
+
+EXPOSE 11434
+ENTRYPOINT ["/bin/ollama"]
+CMD ["serve"]