update docker files

1. add docker-npu (Dockerfile and docker-compose.yml)
  2. move cuda docker to docker-cuda and tiny changes to adapt to the new path


Former-commit-id: 5431c1f18aadb072208efe7fd8e36fdcfbf807c2
This commit is contained in:
MengqingCao
2024-06-24 10:57:36 +00:00
parent 826d7808b4
commit af2607de1a
6 changed files with 187 additions and 19 deletions

View File

@@ -0,0 +1,49 @@
# Use the NVIDIA official image with PyTorch 2.3.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
FROM nvcr.io/nvidia/pytorch:24.02-py3
# Define installation arguments
ARG INSTALL_BNB=false
ARG INSTALL_VLLM=false
ARG INSTALL_DEEPSPEED=false
ARG PIP_INDEX=https://pypi.org/simple
# Set the working directory
WORKDIR /app/LLaMA-Factory
RUN cd /app && \
git config --global http.version HTTP/1.1 && \
git clone https://github.com/hiyouga/LLaMA-Factory.git && \
cd /app/LLaMA-Factory
# Install the requirements
RUN pip config set global.index-url $PIP_INDEX
RUN python -m pip install --upgrade pip
RUN python -m pip install -r requirements.txt
# Install the LLaMA Factory
RUN EXTRA_PACKAGES="metrics"; \
if [ "$INSTALL_BNB" = "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
fi; \
if [ "$INSTALL_VLLM" = "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
fi; \
if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
fi; \
pip install -e .[$EXTRA_PACKAGES] && \
pip uninstall -y transformer-engine flash-attn
# Set up volumes
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
# Expose port 7860 for the LLaMA Board
EXPOSE 7860
# Expose port 8000 for the API service
EXPOSE 8000
# Launch LLaMA Board
CMD [ "llamafactory-cli", "webui" ]

View File

@@ -0,0 +1,30 @@
services:
llamafactory:
build:
dockerfile: Dockerfile
context: .
args:
INSTALL_BNB: false
INSTALL_VLLM: false
INSTALL_DEEPSPEED: false
PIP_INDEX: https://pypi.org/simple
container_name: llamafactory
volumes:
- ../../hf_cache:/root/.cache/huggingface/
- ../../data:/app/LLaMA-Factory/data
- ../../output:/app/LLaMA-Factory/output
ports:
- "7860:7860"
- "8000:8000"
ipc: host
tty: true
stdin_open: true
command: bash
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: "all"
capabilities: [gpu]
restart: unless-stopped