diff --git a/ai/compose.yml b/ai/compose.yml index 1db7831..3db4fe6 100644 --- a/ai/compose.yml +++ b/ai/compose.yml @@ -112,22 +112,7 @@ services: - /mnt/HoardingCow_docker_data/Ollama/ollama:/root/.ollama environment: - OLLAMA_VULKAN=0 - - HSA_OVERRIDE_GFX_VERSION=9.0.6 - - HCC_AMDGPU_TARGET=gfx906 - - HIP_VISIBLE_DEVICES=0,1 - - ROCR_VISIBLE_DEVICES=0,1 - - HSA_ENABLE_SDMA=0 - OLLAMA_HOST=0.0.0.0 - - OLLAMA_DEBUG=1 - - OLLAMA_FLASH_ATTENTION=1 - - OLLAMA_NUM_PARALLEL=2 - devices: - # Map the render nodes and KFD for ROCm to work inside the container - - /dev/kfd:/dev/kfd - - /dev/dri:/dev/dri - group_add: - - "303" - - "26" networks: ai_net: @@ -137,47 +122,40 @@ networks: driver: bridge name: ai_backend - # llama_cpp_devstral: - # image: ghcr.io/ggml-org/llama.cpp:server-rocm - # container_name: llama_cpp_devstral - # restart: unless-stopped - # networks: - # - ai_backend - # ports: - # - "8300:8080" - # ipc: host - # devices: - # - "/dev/kfd:/dev/kfd" - # - "/dev/dri:/dev/dri" - # group_add: - # - "303" # video - # - "26" # render - # environment: - # HSA_OVERRIDE_GFX_VERSION: 9.0.6 - # HIP_VISIBLE_DEVICES: 0,1 - # LLAMA_CACHE: /models - # volumes: - # - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models - # - /mnt/HoardingCow_docker_data/Llama_cpp/devstral-agent.jinja:/template.jinja - # command: > - # -hf unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF:Devstral-Small-2-24B-Instruct-2512-Q8_0.gguf - # -a devstral-2-small-llama_cpp - # --chat-template-file /template.jinja - # --host 0.0.0.0 - # --port 8080 - # --n-gpu-layers 99 - # --ctx-size 163840 - # --batch-size 4096 - # --ubatch-size 4096 - # --cache-type-k f16 - # --cache-type-v f16 - # --cache-reuse 256 - # --flash-attn on - # --context-shift - # --split-mode layer - # --no-mmap - # --n-predict -1 - # --parallel 2 + llama-cpp-hermes: + image: llama-cpp:rocm-gfx906 + container_name: llama-cpp-hermes + restart: unless-stopped + networks: + - ai_backend + ports: + - "127.0.0.1:8300:8080" + ipc: host + devices: + - /dev/kfd:/dev/kfd + - /dev/dri:/dev/dri + group_add: + - "303" + - "26" + environment: + - HSA_OVERRIDE_GFX_VERSION=9.0.6 + - HSA_ENABLE_SDMA=0 + - HIP_VISIBLE_DEVICES=0,1 + - LLAMA_CACHE=/models + volumes: + - /mnt/HoardingCow_docker_data/Llama_cpp/models:/models + - /mnt/HoardingCow_docker_data/Ollama/ollama/models/blobs/sha256-17823599694fa3503ef54bf748d5078c6ce881f4d01616cafa255dc05d215a08:/model.gguf:ro + command: > + -m /model.gguf + --host 0.0.0.0 + --port 8080 + --gpu-layers 99 + --ctx-size 163840 + -ctk f16 -ctv f16 + --flash-attn on + --split-mode layer + --no-mmap + --n-predict -1 # vllm: # image: nalanzeyu/vllm-gfx906:v0.9.0-rocm6.3 diff --git a/ai/llama-cpp/Dockerfile b/ai/llama-cpp/Dockerfile new file mode 100644 index 0000000..5084278 --- /dev/null +++ b/ai/llama-cpp/Dockerfile @@ -0,0 +1,30 @@ +# llama-cpp-rocm6/Dockerfile +# Custom llama.cpp server with ROCm 6.1 + gfx906 (MI50) support. +# Build: docker build -t llama-cpp:rocm-gfx906 . + +FROM rocm/dev-ubuntu-22.04:6.1.2-complete AS builder +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y curl git build-essential pkg-config cmake make && rm -rf /var/lib/apt/lists/* +ARG LLAMACPP_VERSION=b9596 +RUN git clone --depth 1 --branch ${LLAMACPP_VERSION} https://github.com/ggml-org/llama.cpp.git /build +WORKDIR /build +ENV HIP_PATH=/opt/rocm ROCM_PATH=/opt/rocm PATH=/opt/rocm/bin:/opt/rocm/llvm/bin:${PATH} CMAKE_PREFIX_PATH=/opt/rocm +RUN mkdir build && cd build && \ + cmake .. -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release \ + -DAMDGPU_TARGETS="gfx906:xnack-" \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DGGML_CUDA=OFF -DGGML_VULKAN=OFF -DGGML_METAL=OFF \ + -DBUILD_SHARED_LIBS=OFF && \ + cmake --build . --target llama-server -- -j $(nproc) + +FROM ubuntu:24.04 +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + ca-certificates curl libstdc++6 libgomp1 libopenblas0 \ + libnuma1 libelf1 libdrm2 libdrm-amdgpu1 \ + && rm -rf /var/lib/apt/lists/* +COPY --from=builder /opt/rocm/lib/ /opt/rocm/lib/ +COPY --from=builder /opt/rocm/share/ /opt/rocm/share/ +COPY --from=builder /build/build/bin/llama-server /usr/local/bin/llama-server +RUN echo /opt/rocm/lib > /etc/ld.so.conf.d/rocm.conf && ldconfig +ENV HSA_OVERRIDE_GFX_VERSION=9.0.6 HCC_AMDGPU_TARGET=gfx906 HSA_ENABLE_SDMA=0 +EXPOSE 8080 +ENTRYPOINT ["/usr/local/bin/llama-server"]