diff --git a/ai/compose.yml b/ai/compose.yml index b8590dc..2e565ae 100644 --- a/ai/compose.yml +++ b/ai/compose.yml @@ -26,7 +26,7 @@ services: # - "traefik.http.routers.webui-https.tls.certresolver=njalla" hermes: - build: ./ + build: ./hermes container_name: hermes restart: always # Gateway run enables the internal API server on port 8642 @@ -58,9 +58,11 @@ services: - ai_backend ollama: - image: ollama/ollama:latest + build: + context: ./ollama + dockerfile: Dockerfile + image: ollama/ollama:rocm-gfx906 container_name: ollama - privileged: true tty: true restart: always ports: @@ -78,7 +80,7 @@ services: - HSA_ENABLE_SDMA=0 - OLLAMA_HOST=0.0.0.0 - OLLAMA_DEBUG=1 - - OLLAMA_FLASH_ATTENTION=0 + - OLLAMA_FLASH_ATTENTION=1 - OLLAMA_NUM_PARALLEL=2 devices: # Map the render nodes and KFD for ROCm to work inside the container diff --git a/ai/Dockerfile b/ai/hermes/Dockerfile similarity index 100% rename from ai/Dockerfile rename to ai/hermes/Dockerfile diff --git a/ai/fix-permissions.sh b/ai/hermes/fix-permissions.sh similarity index 100% rename from ai/fix-permissions.sh rename to ai/hermes/fix-permissions.sh diff --git a/ai/patch_tts_tool.py b/ai/hermes/patch_tts_tool.py similarity index 100% rename from ai/patch_tts_tool.py rename to ai/hermes/patch_tts_tool.py diff --git a/ai/ollama/Dockerfile b/ai/ollama/Dockerfile new file mode 100644 index 0000000..438e607 --- /dev/null +++ b/ai/ollama/Dockerfile @@ -0,0 +1,106 @@ +# ollama-gfx906/Dockerfile +# +# Custom ollama image with ROCm 6.1 + gfx906 (MI50) support. +# The official ollama/rocm image ships ROCm 7.2 which dropped gfx906. +# This uses v0.23.2's native CMake build system with AMDGPU_TARGETS including gfx906. +# +# Build: docker build -t ollama/ollama:rocm-gfx906 ai/ollama + +FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder + +# Build dependencies (CMake, Ninja, Go) +ARG CMAKEVERSION=3.31.2 +ARG NINJAVERSION=1.12.1 +ARG GOLANG_VERSION=1.22.0 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + curl git ccache build-essential pkg-config unzip \ + && rm -rf /var/lib/apt/lists/* + +# Install CMake from official binaries +RUN curl -fsSL https://github.com/Kitware/CMake/releases/download/v${CMAKEVERSION}/cmake-${CMAKEVERSION}-linux-x86_64.tar.gz \ + | tar xz -C /usr/local --strip-components 1 + +# Install Ninja +RUN curl -fsSL -o /tmp/ninja.zip \ + https://github.com/ninja-build/ninja/releases/download/v${NINJAVERSION}/ninja-linux.zip \ + && unzip /tmp/ninja.zip -d /usr/local/bin && rm /tmp/ninja.zip + +# Install Go +RUN curl -fsSL https://go.dev/dl/go${GOLANG_VERSION}.linux-amd64.tar.gz \ + | tar xz -C /usr/local +ENV PATH=/usr/local/go/bin:$PATH + +ARG OLLAMA_VERSION=v0.23.2 +RUN git clone --depth 1 --branch ${OLLAMA_VERSION} https://github.com/ollama/ollama.git /build +WORKDIR /build + +# ROCm paths +ENV HIP_PATH=/opt/rocm +ENV ROCM_PATH=/opt/rocm +ENV CMAKE_GENERATOR=Ninja +ENV LDFLAGS=-s + +# Step 1: Build CPU backends with GCC (no ROCm preset) +# Pre-set CMAKE_HIP_COMPILER="" to prevent check_language(HIP) from +# finding a HIP compiler (it searches /opt/rocm even without PATH). +# Remove /opt/rocm from PATH to prevent find_program from finding hipcc. +RUN mkdir -p build-cpu && \ + PATH=/usr/local/go/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin \ + cmake -B build-cpu -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_HIP_COMPILER="" \ + -DCMAKE_INSTALL_PREFIX=/build/dist && \ + cmake --build build-cpu --target ggml-cpu -- -l $(nproc) && \ + cmake --install build-cpu --component CPU --strip && \ + echo "=== CPU install ===" && \ + (find /build/dist/lib/ollama -type f -o -type l 2>&1 | head -20 || echo "empty") + +# Step 2: Build HIP backend with ROCm preset + gfx906 target only +# The ROCm 6 preset enables HIP language detection (enable_language(HIP)) +# which ensures GPU kernels are properly compiled for gfx906. +# OLLAMA_RUNNER_DIR=rocm from the preset, so HIP goes to lib/ollama/rocm/ +# Need CMAKE_PREFIX_PATH so find_package(hip) finds hip-config.cmake +# at /opt/rocm/lib/cmake/hip/hip-config.cmake. +RUN mkdir -p build-hip && \ + cmake -B build-hip \ + --preset 'ROCm 6' \ + -DAMDGPU_TARGETS="gfx906:xnack-" \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_PREFIX_PATH="/opt/rocm" && \ + cmake --build build-hip --target ggml-hip -- -l $(nproc) && \ + cmake --install build-hip --component HIP --strip && \ + echo "=== HIP install ===" && \ + find /build/dist/lib/ollama -type f -o -type l | head -20 + +# Step 3: Build Go binary (GCC for CGo linking) +ENV CGO_ENABLED=1 +RUN go build -trimpath -ldflags="-X=github.com/ollama/ollama/version.Version=${OLLAMA_VERSION}" -o /build/dist/ollama . + +# ---------- Runtime image ---------- +FROM ubuntu:24.04 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + ca-certificates curl libstdc++6 libgomp1 libvulkan1 libopenblas0 \ + && rm -rf /var/lib/apt/lists/* + +# Copy ROCm 6.1 runtime libraries +# These are needed at runtime by ggml-hip via LD_LIBRARY_PATH +COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/ +COPY --from=builder /opt/rocm/share/ /opt/rocm/share/ + +# Copy ollama binary + all backends (CPU + HIP) +# CPU install: /build/dist/lib/ollama/libggml-*.so +# HIP install: /build/dist/lib/ollama/rocm/libggml-hip.so +COPY --from=builder /build/dist/ollama /usr/bin/ollama +COPY --from=builder /build/dist/lib/ollama/ /usr/lib/ollama/ + +RUN ldconfig + +ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/lib/ollama/rocm:/usr/lib/ollama +ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 +ENV HCC_AMDGPU_TARGET=gfx906 +ENV HSA_ENABLE_SDMA=0 + +EXPOSE 11434 +ENTRYPOINT ["/bin/ollama"] +CMD ["serve"]