[assets] update docker files (#8176)

2025-05-27 18:15:23 +08:00
parent 46ccf84aaa
commit 00974a3169
11 changed files with 187 additions and 253 deletions
--- a/docker/docker-rocm/Dockerfile
+++ b/docker/docker-rocm/Dockerfile
@@ -1,21 +1,23 @@
-FROM hardandheavy/transformers-rocm:2.2.0
+# Installation arguments
+ARG BASE_IMAGE=rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.10_pytorch_release_2.6.0
+ARG PIP_INDEX=https://pypi.org/simple
+ARG EXTRAS=metrics
+ARG INSTALL_FLASHATTN=false
+ARG HTTP_PROXY=""
+ARG PYTORCH_INDEX=https://download.pytorch.org/whl/rocm6.3
+
+# https://hub.docker.com/r/rocm/pytorch/tags
+FROM "${BASE_IMAGE}"

 # Define environments
-ENV MAX_JOBS=4
+ENV MAX_JOBS=16
 ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
-
-# Define installation arguments
-ARG INSTALL_BNB=false
-ARG INSTALL_VLLM=false
-ARG INSTALL_DEEPSPEED=false
-ARG INSTALL_FLASHATTN=false
-ARG INSTALL_LIGER_KERNEL=false
-ARG INSTALL_HQQ=false
-ARG INSTALL_PYTORCH=true
-ARG PIP_INDEX=https://pypi.org/simple
-ARG HTTP_PROXY=
-ARG PYTORCH_INDEX=https://download.pytorch.org/whl/nightly/rocm6.3
+ENV DEBIAN_FRONTEND=noninteractive
+ENV NODE_OPTIONS=""
+ENV PIP_ROOT_USER_ACTION=ignore
+ENV http_proxy="${HTTP_PROXY}"
+ENV https_proxy="${HTTP_PROXY}"

 # Use Bash instead of default /bin/sh
 SHELL ["/bin/bash", "-c"]
@@ -23,83 +25,47 @@ SHELL ["/bin/bash", "-c"]
 # Set the working directory
 WORKDIR /app

-# Set http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        echo "Configuring proxy..."; \
-        export http_proxy=$HTTP_PROXY; \
-        export https_proxy=$HTTP_PROXY; \
-    fi
+# Change pip source
+RUN pip config set global.index-url "${PIP_INDEX}" && \
+    pip config set global.extra-index-url "${PIP_INDEX}" && \
+    python -m pip install --upgrade pip
+
+# Reinstall pytorch rocm
+RUN pip uninstall -y torch torchvision torchaudio && \
+    pip install --pre torch torchvision torchaudio --index-url "${PYTORCH_INDEX}"

 # Install the requirements
 COPY requirements.txt /app
-RUN pip config set global.index-url "$PIP_INDEX" && \
-    pip config set global.extra-index-url "$PIP_INDEX" && \
-    python -m pip install --upgrade pip && \
-    if [ -n "$HTTP_PROXY" ]; then \
-        python -m pip install --proxy=$HTTP_PROXY -r requirements.txt; \
-    else \
-        python -m pip install -r requirements.txt; \
-    fi
+RUN pip install --no-cache-dir -r requirements.txt

 # Copy the rest of the application into the image
 COPY . /app

-# Install the LLaMA Factory
-RUN EXTRA_PACKAGES="metrics"; \
-    if [ "$INSTALL_BNB" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
-    fi; \
-    if [ "$INSTALL_VLLM" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
-    fi; \
-    if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
-    fi; \
-    if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
-    fi; \
-    if [ "$INSTALL_HQQ" == "true" ]; then \
-        EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
-    fi; \
-    if [ -n "$HTTP_PROXY" ]; then \
-        pip install --proxy=$HTTP_PROXY -e ".[$EXTRA_PACKAGES]"; \
-    else \
-        pip install -e ".[$EXTRA_PACKAGES]"; \
-    fi
-
-# Reinstall pytorch
-# This is necessary to ensure that the correct version of PyTorch is installed
-RUN if [ "$INSTALL_PYTORCH" == "true" ]; then \
-        pip uninstall -y torch torchvision torchaudio && \
-        pip install --pre torch torchvision torchaudio --index-url "$PYTORCH_INDEX"; \
-    fi
+# Install LLaMA Factory
+RUN pip install --no-cache-dir -e ".[${EXTRAS}]" --no-build-isolation

 # Rebuild flash attention
-RUN pip uninstall -y transformer-engine flash-attn && \
-    if [ "$INSTALL_FLASHATTN" == "true" ]; then \
+RUN if [ "$INSTALL_FLASHATTN" == "true" ]; then \
        pip uninstall -y ninja && \
-        if [ -n "$HTTP_PROXY" ]; then \
-            pip install --proxy=$HTTP_PROXY ninja && \
-            pip install --proxy=$HTTP_PROXY --no-cache-dir flash-attn --no-build-isolation; \
-        else \
-            pip install ninja && \
-            pip install --no-cache-dir flash-attn --no-build-isolation; \
-        fi; \
-    fi
-
-# Unset http proxy
-RUN if [ -n "$HTTP_PROXY" ]; then \
-        unset http_proxy; \
-        unset https_proxy; \
+        pip install --no-cache-dir ninja && \
+        pip install --no-cache-dir flash-attn --no-build-isolation; \
    fi

 # Set up volumes
-VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
+VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/root/.cache/openmind", "/app/data", "/app/output" ]

-# Expose port 7860 for the LLaMA Board
+# Expose port 7860 for LLaMA Board
 ENV GRADIO_SERVER_PORT 7860
 EXPOSE 7860

-# Expose port 8000 for the API service
+# Expose port 8000 for API service
 ENV API_PORT 8000
 EXPOSE 8000
+
+# unset proxy
+ENV http_proxy=
+ENV https_proxy=
+
+# Reset pip config
+RUN pip config unset global.index-url && \
+    pip config unset global.extra-index-url
--- a/docker/docker-rocm/docker-compose.yml
+++ b/docker/docker-rocm/docker-compose.yml
@@ -4,15 +4,8 @@ services:
      dockerfile: ./docker/docker-rocm/Dockerfile
      context: ../..
      args:
-        INSTALL_BNB: "false"
-        INSTALL_VLLM: "false"
-        INSTALL_DEEPSPEED: "false"
-        INSTALL_FLASHATTN: "false"
-        INSTALL_LIGER_KERNEL: "false"
-        INSTALL_PYTORCH: "true"
-        INSTALL_HQQ: "false"
        PIP_INDEX: https://pypi.org/simple
-        PYTORCH_INDEX: https://download.pytorch.org/whl/nightly/rocm6.3
+        EXTRAS: metrics
    container_name: llamafactory
    volumes:
      - ../../hf_cache:/root/.cache/huggingface
@@ -20,13 +13,12 @@ services:
      - ../../om_cache:/root/.cache/openmind
      - ../../data:/app/data
      - ../../output:/app/output
-      - ../../saves:/app/saves
    ports:
      - "7860:7860"
      - "8000:8000"
    ipc: host
    tty: true
-    shm_size: "16gb"
+    # shm_size: "16gb"  # ipc: host is set
    stdin_open: true
    command: bash
    devices: