Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0f1e592326 | ||
|
|
4d7dd0330d | ||
|
|
ea2ca2777f | ||
|
|
4b2b92fd9a | ||
|
|
784088db3f | ||
|
|
0ecf0d51e3 | ||
|
|
041ecd0de1 | ||
|
|
d812249db7 | ||
|
|
88528f1a87 | ||
|
|
82533114a7 | ||
|
|
6d9fbb3fa9 | ||
|
|
9953ae3d03 |
17
.github/workflows/label_issue.yml
vendored
Normal file
17
.github/workflows/label_issue.yml
vendored
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
name: label_issue
|
||||||
|
|
||||||
|
on:
|
||||||
|
issues:
|
||||||
|
types:
|
||||||
|
- opened
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
label_issue:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- env:
|
||||||
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
ISSUE_URL: ${{ github.event.issue.html_url }}
|
||||||
|
run: |
|
||||||
|
gh issue edit $ISSUE_URL --add-label "pending"
|
||||||
10
.github/workflows/tests.yml
vendored
10
.github/workflows/tests.yml
vendored
@@ -9,6 +9,8 @@ on:
|
|||||||
- "requirements.txt"
|
- "requirements.txt"
|
||||||
- ".github/workflows/*.yml"
|
- ".github/workflows/*.yml"
|
||||||
pull_request:
|
pull_request:
|
||||||
|
types:
|
||||||
|
- review_requested
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
paths:
|
paths:
|
||||||
@@ -19,21 +21,27 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
tests:
|
tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.8"
|
python-version: "3.8"
|
||||||
cache: "pip"
|
cache: "pip"
|
||||||
cache-dependency-path: "setup.py"
|
cache-dependency-path: "setup.py"
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
python -m pip install .[torch,dev]
|
python -m pip install .[torch,dev]
|
||||||
|
|
||||||
- name: Check quality
|
- name: Check quality
|
||||||
run: |
|
run: |
|
||||||
make style && make quality
|
make style && make quality
|
||||||
|
|
||||||
- name: Test with pytest
|
- name: Test with pytest
|
||||||
run: |
|
run: |
|
||||||
make test
|
make test
|
||||||
|
|||||||
38
Dockerfile
38
Dockerfile
@@ -1,14 +1,44 @@
|
|||||||
FROM nvcr.io/nvidia/pytorch:24.01-py3
|
# Use the NVIDIA official image with PyTorch 2.3.0
|
||||||
|
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
|
||||||
|
FROM nvcr.io/nvidia/pytorch:24.02-py3
|
||||||
|
|
||||||
|
# Define installation arguments
|
||||||
|
ARG INSTALL_BNB=false
|
||||||
|
ARG INSTALL_VLLM=false
|
||||||
|
ARG INSTALL_DEEPSPEED=false
|
||||||
|
ARG PIP_INDEX=https://pypi.org/simple
|
||||||
|
|
||||||
|
# Set the working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install the requirements
|
||||||
COPY requirements.txt /app/
|
COPY requirements.txt /app/
|
||||||
RUN pip install -r requirements.txt
|
RUN pip config set global.index-url $PIP_INDEX
|
||||||
|
RUN python -m pip install --upgrade pip
|
||||||
|
RUN python -m pip install -r requirements.txt
|
||||||
|
|
||||||
|
# Copy the rest of the application into the image
|
||||||
COPY . /app/
|
COPY . /app/
|
||||||
RUN pip install -e .[metrics,bitsandbytes,qwen]
|
|
||||||
|
|
||||||
|
# Install the LLaMA Factory
|
||||||
|
RUN EXTRA_PACKAGES="metrics"; \
|
||||||
|
if [ "$INSTALL_BNB" = "true" ]; then \
|
||||||
|
EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
|
||||||
|
fi; \
|
||||||
|
if [ "$INSTALL_VLLM" = "true" ]; then \
|
||||||
|
EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
|
||||||
|
fi; \
|
||||||
|
if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
|
||||||
|
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
|
||||||
|
fi; \
|
||||||
|
pip install -e .[$EXTRA_PACKAGES] && \
|
||||||
|
pip uninstall -y transformer-engine
|
||||||
|
|
||||||
|
# Set up volumes
|
||||||
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
|
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
|
||||||
|
|
||||||
|
# Expose port 7860 for the LLaMA Board
|
||||||
EXPOSE 7860
|
EXPOSE 7860
|
||||||
|
|
||||||
CMD [ "llamafactory-cli", "webui" ]
|
# Expose port 8000 for the API service
|
||||||
|
EXPOSE 8000
|
||||||
|
|||||||
38
README.md
38
README.md
@@ -335,7 +335,7 @@ huggingface-cli login
|
|||||||
```bash
|
```bash
|
||||||
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
||||||
cd LLaMA-Factory
|
cd LLaMA-Factory
|
||||||
pip install -e '.[torch,metrics]'
|
pip install -e ".[torch,metrics]"
|
||||||
```
|
```
|
||||||
|
|
||||||
Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
|
Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
|
||||||
@@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about
|
|||||||
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
|
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
||||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
|
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
|
||||||
@@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr
|
|||||||
|
|
||||||
### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
|
### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
|
||||||
|
|
||||||
#### Use local environment
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
|
llamafactory-cli webui
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
### Build Docker
|
||||||
|
|
||||||
#### Use Docker
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -f ./Dockerfile -t llama-factory:latest .
|
docker build -f ./Dockerfile \
|
||||||
docker run --gpus=all \
|
--build-arg INSTALL_BNB=false \
|
||||||
|
--build-arg INSTALL_VLLM=false \
|
||||||
|
--build-arg INSTALL_DEEPSPEED=false \
|
||||||
|
--build-arg PIP_INDEX=https://pypi.org/simple \
|
||||||
|
-t llamafactory:latest .
|
||||||
|
|
||||||
|
docker run -it --gpus=all \
|
||||||
-v ./hf_cache:/root/.cache/huggingface/ \
|
-v ./hf_cache:/root/.cache/huggingface/ \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
-v ./output:/app/output \
|
-v ./output:/app/output \
|
||||||
-p 7860:7860 \
|
-p 7860:7860 \
|
||||||
|
-p 8000:8000 \
|
||||||
--shm-size 16G \
|
--shm-size 16G \
|
||||||
--name llama_factory \
|
--name llamafactory \
|
||||||
-d llama-factory:latest
|
llamafactory:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Use Docker Compose
|
> [!TIP]
|
||||||
|
> Use Docker Compose to build image via `docker compose up -d`.
|
||||||
```bash
|
|
||||||
docker compose -f ./docker-compose.yml up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
<details><summary>Details about volume</summary>
|
<details><summary>Details about volume</summary>
|
||||||
|
|
||||||
|
|||||||
36
README_zh.md
36
README_zh.md
@@ -335,7 +335,7 @@ huggingface-cli login
|
|||||||
```bash
|
```bash
|
||||||
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
||||||
cd LLaMA-Factory
|
cd LLaMA-Factory
|
||||||
pip install -e '.[torch,metrics]'
|
pip install -e ".[torch,metrics]"
|
||||||
```
|
```
|
||||||
|
|
||||||
可选的额外依赖项:torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
|
可选的额外依赖项:torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
|
||||||
@@ -405,9 +405,9 @@ Docker 镜像:
|
|||||||
下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
|
下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
||||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||||
```
|
```
|
||||||
|
|
||||||
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
|
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
|
||||||
@@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s
|
|||||||
|
|
||||||
### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动)
|
### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动)
|
||||||
|
|
||||||
#### 使用本地环境
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
|
llamafactory-cli webui
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 使用 Docker
|
### 构建 Docker
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker build -f ./Dockerfile -t llama-factory:latest .
|
docker build -f ./Dockerfile \
|
||||||
docker run --gpus=all \
|
--build-arg INSTALL_BNB=false \
|
||||||
|
--build-arg INSTALL_VLLM=false \
|
||||||
|
--build-arg INSTALL_DEEPSPEED=false \
|
||||||
|
--build-arg PIP_INDEX=https://pypi.org/simple \
|
||||||
|
-t llamafactory:latest .
|
||||||
|
|
||||||
|
docker run -it --gpus=all \
|
||||||
-v ./hf_cache:/root/.cache/huggingface/ \
|
-v ./hf_cache:/root/.cache/huggingface/ \
|
||||||
-v ./data:/app/data \
|
-v ./data:/app/data \
|
||||||
-v ./output:/app/output \
|
-v ./output:/app/output \
|
||||||
-p 7860:7860 \
|
-p 7860:7860 \
|
||||||
|
-p 8000:8000 \
|
||||||
--shm-size 16G \
|
--shm-size 16G \
|
||||||
--name llama_factory \
|
--name llamafactory \
|
||||||
-d llama-factory:latest
|
llamafactory:latest
|
||||||
```
|
```
|
||||||
|
|
||||||
#### 使用 Docker Compose
|
> [!TIP]
|
||||||
|
> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
|
||||||
```bash
|
|
||||||
docker compose -f ./docker-compose.yml up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
<details><summary>数据卷详情</summary>
|
<details><summary>数据卷详情</summary>
|
||||||
|
|
||||||
|
|||||||
@@ -1,17 +1,23 @@
|
|||||||
version: '3.8'
|
version: '3.8'
|
||||||
|
|
||||||
services:
|
services:
|
||||||
llama-factory:
|
llamafactory:
|
||||||
build:
|
build:
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
context: .
|
context: .
|
||||||
container_name: llama_factory
|
args:
|
||||||
|
INSTALL_BNB: false
|
||||||
|
INSTALL_VLLM: false
|
||||||
|
INSTALL_DEEPSPEED: false
|
||||||
|
PIP_INDEX: https://pypi.org/simple
|
||||||
|
container_name: llamafactory
|
||||||
volumes:
|
volumes:
|
||||||
- ./hf_cache:/root/.cache/huggingface/
|
- ./hf_cache:/root/.cache/huggingface/
|
||||||
- ./data:/app/data
|
- ./data:/app/data
|
||||||
- ./output:/app/output
|
- ./output:/app/output
|
||||||
ports:
|
ports:
|
||||||
- "7860:7860"
|
- "7860:7860"
|
||||||
|
- "8000:8000"
|
||||||
ipc: host
|
ipc: host
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
|
|||||||
@@ -26,9 +26,7 @@ class Evaluator:
|
|||||||
self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
|
self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
|
||||||
self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
|
self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
|
||||||
self.eval_template = get_eval_template(self.eval_args.lang)
|
self.eval_template = get_eval_template(self.eval_args.lang)
|
||||||
self.choice_inputs = [
|
self.choice_inputs = [self.tokenizer.encode(ch, add_special_tokens=False)[-1] for ch in CHOICES]
|
||||||
self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES
|
|
||||||
]
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]:
|
def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]:
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ class EvalTemplate:
|
|||||||
system: str
|
system: str
|
||||||
choice: str
|
choice: str
|
||||||
answer: str
|
answer: str
|
||||||
prefix: str
|
|
||||||
|
|
||||||
def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
|
def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
|
||||||
r"""
|
r"""
|
||||||
@@ -42,8 +41,8 @@ class EvalTemplate:
|
|||||||
eval_templates: Dict[str, "EvalTemplate"] = {}
|
eval_templates: Dict[str, "EvalTemplate"] = {}
|
||||||
|
|
||||||
|
|
||||||
def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None:
|
def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
|
||||||
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix)
|
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
|
||||||
|
|
||||||
|
|
||||||
def get_eval_template(name: str) -> "EvalTemplate":
|
def get_eval_template(name: str) -> "EvalTemplate":
|
||||||
@@ -56,8 +55,7 @@ _register_eval_template(
|
|||||||
name="en",
|
name="en",
|
||||||
system="The following are multiple choice questions (with answers) about {subject}.\n\n",
|
system="The following are multiple choice questions (with answers) about {subject}.\n\n",
|
||||||
choice="\n{choice}. {content}",
|
choice="\n{choice}. {content}",
|
||||||
answer="\nAnswer: ",
|
answer="\nAnswer:",
|
||||||
prefix=" ",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -66,5 +64,4 @@ _register_eval_template(
|
|||||||
system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n",
|
system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n",
|
||||||
choice="\n{choice}. {content}",
|
choice="\n{choice}. {content}",
|
||||||
answer="\n答案:",
|
answer="\n答案:",
|
||||||
prefix=" ",
|
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -6,13 +6,10 @@ import peft
|
|||||||
import torch
|
import torch
|
||||||
import transformers
|
import transformers
|
||||||
import trl
|
import trl
|
||||||
from transformers.integrations import is_deepspeed_available
|
from transformers.utils import is_torch_cuda_available, is_torch_npu_available
|
||||||
from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available
|
|
||||||
|
|
||||||
from .packages import is_vllm_available
|
|
||||||
|
|
||||||
|
|
||||||
VERSION = "0.8.0"
|
VERSION = "0.8.1"
|
||||||
|
|
||||||
|
|
||||||
def print_env() -> None:
|
def print_env() -> None:
|
||||||
@@ -37,19 +34,25 @@ def print_env() -> None:
|
|||||||
info["NPU type"] = torch.npu.get_device_name()
|
info["NPU type"] = torch.npu.get_device_name()
|
||||||
info["CANN version"] = torch.version.cann
|
info["CANN version"] = torch.version.cann
|
||||||
|
|
||||||
if is_deepspeed_available():
|
try:
|
||||||
import deepspeed # type: ignore
|
import deepspeed # type: ignore
|
||||||
|
|
||||||
info["DeepSpeed version"] = deepspeed.__version__
|
info["DeepSpeed version"] = deepspeed.__version__
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
if is_bitsandbytes_available():
|
try:
|
||||||
import bitsandbytes
|
import bitsandbytes
|
||||||
|
|
||||||
info["Bitsandbytes version"] = bitsandbytes.__version__
|
info["Bitsandbytes version"] = bitsandbytes.__version__
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
if is_vllm_available():
|
try:
|
||||||
import vllm
|
import vllm
|
||||||
|
|
||||||
info["vLLM version"] = vllm.__version__
|
info["vLLM version"] = vllm.__version__
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")
|
print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")
|
||||||
|
|||||||
@@ -209,6 +209,7 @@ def _setup_lora_tuning(
|
|||||||
"lora_alpha": finetuning_args.lora_alpha,
|
"lora_alpha": finetuning_args.lora_alpha,
|
||||||
"lora_dropout": finetuning_args.lora_dropout,
|
"lora_dropout": finetuning_args.lora_dropout,
|
||||||
"use_rslora": finetuning_args.use_rslora,
|
"use_rslora": finetuning_args.use_rslora,
|
||||||
|
"use_dora": finetuning_args.use_dora,
|
||||||
"modules_to_save": finetuning_args.additional_target,
|
"modules_to_save": finetuning_args.additional_target,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,7 +219,6 @@ def _setup_lora_tuning(
|
|||||||
lora_config = LoraConfig(
|
lora_config = LoraConfig(
|
||||||
task_type=TaskType.CAUSAL_LM,
|
task_type=TaskType.CAUSAL_LM,
|
||||||
inference_mode=False,
|
inference_mode=False,
|
||||||
use_dora=finetuning_args.use_dora,
|
|
||||||
**peft_kwargs,
|
**peft_kwargs,
|
||||||
)
|
)
|
||||||
model = get_peft_model(model, lora_config)
|
model = get_peft_model(model, lora_config)
|
||||||
|
|||||||
@@ -96,7 +96,8 @@ def llama_attention_forward(
|
|||||||
(
|
(
|
||||||
attn_output[:, :, : self.num_heads // 2],
|
attn_output[:, :, : self.num_heads // 2],
|
||||||
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
||||||
)
|
),
|
||||||
|
dim=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
||||||
@@ -194,7 +195,8 @@ def llama_flash_attention_2_forward(
|
|||||||
(
|
(
|
||||||
attn_output[:, :, : self.num_heads // 2],
|
attn_output[:, :, : self.num_heads // 2],
|
||||||
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
||||||
)
|
),
|
||||||
|
dim=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
|
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
|
||||||
@@ -293,7 +295,8 @@ def llama_sdpa_attention_forward(
|
|||||||
(
|
(
|
||||||
attn_output[:, :, : self.num_heads // 2],
|
attn_output[:, :, : self.num_heads // 2],
|
||||||
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
||||||
)
|
),
|
||||||
|
dim=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
||||||
@@ -303,7 +306,7 @@ def llama_sdpa_attention_forward(
|
|||||||
|
|
||||||
|
|
||||||
def _apply_llama_patch() -> None:
|
def _apply_llama_patch() -> None:
|
||||||
require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2")
|
require_version("transformers==4.41.2", "To fix: pip install transformers==4.41.2")
|
||||||
LlamaAttention.forward = llama_attention_forward
|
LlamaAttention.forward = llama_attention_forward
|
||||||
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
||||||
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import random
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
@@ -8,17 +9,17 @@ from llamafactory.hparams import get_train_args
|
|||||||
from llamafactory.model import load_tokenizer
|
from llamafactory.model import load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
|
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAINING_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
"dataset": "llamafactory/tiny_dataset",
|
"dataset": "llamafactory/tiny-supervised-dataset",
|
||||||
"dataset_dir": "ONLINE",
|
"dataset_dir": "ONLINE",
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
"cutoff_len": 1024,
|
"cutoff_len": 8192,
|
||||||
"overwrite_cache": True,
|
"overwrite_cache": True,
|
||||||
"output_dir": "dummy_dir",
|
"output_dir": "dummy_dir",
|
||||||
"overwrite_output_dir": True,
|
"overwrite_output_dir": True,
|
||||||
@@ -26,19 +27,24 @@ TRAINING_ARGS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("test_num", [5])
|
@pytest.mark.parametrize("num_samples", [10])
|
||||||
def test_supervised(test_num: int):
|
def test_supervised(num_samples: int):
|
||||||
model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS)
|
model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS)
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
tokenizer = tokenizer_module["tokenizer"]
|
tokenizer = tokenizer_module["tokenizer"]
|
||||||
tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
|
tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
|
||||||
|
|
||||||
original_data = load_dataset(TRAINING_ARGS["dataset"], split="train")
|
original_data = load_dataset(TRAIN_ARGS["dataset"], split="train")
|
||||||
for test_idx in range(test_num):
|
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||||
decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx])
|
for index in indexes:
|
||||||
|
decoded_result = tokenizer.decode(tokenized_data["input_ids"][index])
|
||||||
|
prompt = original_data[index]["instruction"]
|
||||||
|
if original_data[index]["input"]:
|
||||||
|
prompt += "\n" + original_data[index]["input"]
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "user", "content": original_data[test_idx]["instruction"]},
|
{"role": "user", "content": prompt},
|
||||||
{"role": "assistant", "content": original_data[test_idx]["output"]},
|
{"role": "assistant", "content": original_data[index]["output"]},
|
||||||
]
|
]
|
||||||
templated_result = tokenizer.apply_chat_template(messages, tokenize=False)
|
templated_result = tokenizer.apply_chat_template(messages, tokenize=False)
|
||||||
assert decode_result == templated_result
|
assert decoded_result == templated_result
|
||||||
|
|||||||
77
tests/eval/test_eval_template.py
Normal file
77
tests/eval/test_eval_template.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
from llamafactory.eval.template import get_eval_template
|
||||||
|
|
||||||
|
|
||||||
|
def test_eval_template_en():
|
||||||
|
support_set = [
|
||||||
|
{
|
||||||
|
"question": "Fewshot question",
|
||||||
|
"A": "Fewshot1",
|
||||||
|
"B": "Fewshot2",
|
||||||
|
"C": "Fewshot3",
|
||||||
|
"D": "Fewshot4",
|
||||||
|
"answer": "B",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
example = {
|
||||||
|
"question": "Target question",
|
||||||
|
"A": "Target1",
|
||||||
|
"B": "Target2",
|
||||||
|
"C": "Target3",
|
||||||
|
"D": "Target4",
|
||||||
|
"answer": "C",
|
||||||
|
}
|
||||||
|
template = get_eval_template(name="en")
|
||||||
|
messages = template.format_example(example, support_set=support_set, subject_name="SubName")
|
||||||
|
assert messages == [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
"The following are multiple choice questions (with answers) about SubName.\n\n"
|
||||||
|
"Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{"role": "assistant", "content": "B"},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:",
|
||||||
|
},
|
||||||
|
{"role": "assistant", "content": "C"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_eval_template_zh():
|
||||||
|
support_set = [
|
||||||
|
{
|
||||||
|
"question": "示例问题",
|
||||||
|
"A": "示例答案1",
|
||||||
|
"B": "示例答案2",
|
||||||
|
"C": "示例答案3",
|
||||||
|
"D": "示例答案4",
|
||||||
|
"answer": "B",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
example = {
|
||||||
|
"question": "目标问题",
|
||||||
|
"A": "目标答案1",
|
||||||
|
"B": "目标答案2",
|
||||||
|
"C": "目标答案3",
|
||||||
|
"D": "目标答案4",
|
||||||
|
"answer": "C",
|
||||||
|
}
|
||||||
|
template = get_eval_template(name="zh")
|
||||||
|
messages = template.format_example(example, support_set=support_set, subject_name="主题")
|
||||||
|
assert messages == [
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": (
|
||||||
|
"以下是中国关于主题考试的单项选择题,请选出其中的正确答案。\n\n"
|
||||||
|
"示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案:"
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{"role": "assistant", "content": "B"},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案:",
|
||||||
|
},
|
||||||
|
{"role": "assistant", "content": "C"},
|
||||||
|
]
|
||||||
@@ -6,7 +6,12 @@ from llamafactory.hparams import get_infer_args
|
|||||||
from llamafactory.model import load_model, load_tokenizer
|
from llamafactory.model import load_model, load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
|
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
|
INFER_ARGS = {
|
||||||
|
"model_name_or_path": TINY_LLAMA,
|
||||||
|
"template": "llama3",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_attention():
|
def test_attention():
|
||||||
@@ -23,13 +28,7 @@ def test_attention():
|
|||||||
"fa2": "LlamaFlashAttention2",
|
"fa2": "LlamaFlashAttention2",
|
||||||
}
|
}
|
||||||
for requested_attention in attention_available:
|
for requested_attention in attention_available:
|
||||||
model_args, _, finetuning_args, _ = get_infer_args(
|
model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS})
|
||||||
{
|
|
||||||
"model_name_or_path": TINY_LLAMA,
|
|
||||||
"template": "llama2",
|
|
||||||
"flash_attn": requested_attention,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)
|
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)
|
||||||
for module in model.modules():
|
for module in model.modules():
|
||||||
|
|||||||
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
|
|||||||
from llamafactory.model import load_model, load_tokenizer
|
from llamafactory.model import load_model, load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
|
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAINING_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "freeze",
|
"finetuning_type": "freeze",
|
||||||
"dataset": "llamafactory/tiny_dataset",
|
"dataset": "llamafactory/tiny-supervised-dataset",
|
||||||
"dataset_dir": "ONLINE",
|
"dataset_dir": "ONLINE",
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
"cutoff_len": 1024,
|
"cutoff_len": 1024,
|
||||||
@@ -25,12 +25,7 @@ TRAINING_ARGS = {
|
|||||||
|
|
||||||
|
|
||||||
def test_freeze_all_modules():
|
def test_freeze_all_modules():
|
||||||
model_args, _, _, finetuning_args, _ = get_train_args(
|
model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS})
|
||||||
{
|
|
||||||
"freeze_trainable_layers": 1,
|
|
||||||
**TRAINING_ARGS,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||||
for name, param in model.named_parameters():
|
for name, param in model.named_parameters():
|
||||||
@@ -44,11 +39,7 @@ def test_freeze_all_modules():
|
|||||||
|
|
||||||
def test_freeze_extra_modules():
|
def test_freeze_extra_modules():
|
||||||
model_args, _, _, finetuning_args, _ = get_train_args(
|
model_args, _, _, finetuning_args, _ = get_train_args(
|
||||||
{
|
{"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS}
|
||||||
"freeze_trainable_layers": 1,
|
|
||||||
"freeze_extra_modules": "embed_tokens,lm_head",
|
|
||||||
**TRAINING_ARGS,
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||||
|
|||||||
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
|
|||||||
from llamafactory.model import load_model, load_tokenizer
|
from llamafactory.model import load_model, load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
|
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
TRAINING_ARGS = {
|
TRAIN_ARGS = {
|
||||||
"model_name_or_path": TINY_LLAMA,
|
"model_name_or_path": TINY_LLAMA,
|
||||||
"stage": "sft",
|
"stage": "sft",
|
||||||
"do_train": True,
|
"do_train": True,
|
||||||
"finetuning_type": "full",
|
"finetuning_type": "full",
|
||||||
"dataset": "llamafactory/tiny_dataset",
|
"dataset": "llamafactory/tiny-supervised-dataset",
|
||||||
"dataset_dir": "ONLINE",
|
"dataset_dir": "ONLINE",
|
||||||
"template": "llama3",
|
"template": "llama3",
|
||||||
"cutoff_len": 1024,
|
"cutoff_len": 1024,
|
||||||
@@ -25,7 +25,7 @@ TRAINING_ARGS = {
|
|||||||
|
|
||||||
|
|
||||||
def test_full():
|
def test_full():
|
||||||
model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS)
|
model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS)
|
||||||
tokenizer_module = load_tokenizer(model_args)
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||||
for param in model.parameters():
|
for param in model.parameters():
|
||||||
|
|||||||
63
tests/model/test_lora.py
Normal file
63
tests/model/test_lora.py
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from llamafactory.hparams import get_train_args
|
||||||
|
from llamafactory.model import load_model, load_tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||||
|
|
||||||
|
TRAIN_ARGS = {
|
||||||
|
"model_name_or_path": TINY_LLAMA,
|
||||||
|
"stage": "sft",
|
||||||
|
"do_train": True,
|
||||||
|
"finetuning_type": "lora",
|
||||||
|
"dataset": "llamafactory/tiny-supervised-dataset",
|
||||||
|
"dataset_dir": "ONLINE",
|
||||||
|
"template": "llama3",
|
||||||
|
"cutoff_len": 1024,
|
||||||
|
"overwrite_cache": True,
|
||||||
|
"output_dir": "dummy_dir",
|
||||||
|
"overwrite_output_dir": True,
|
||||||
|
"fp16": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_lora_all_modules():
|
||||||
|
model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS})
|
||||||
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
|
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||||
|
linear_modules = set()
|
||||||
|
for name, param in model.named_parameters():
|
||||||
|
if any(module in name for module in ["lora_A", "lora_B"]):
|
||||||
|
linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1])
|
||||||
|
assert param.requires_grad is True
|
||||||
|
assert param.dtype == torch.float32
|
||||||
|
else:
|
||||||
|
assert param.requires_grad is False
|
||||||
|
assert param.dtype == torch.float16
|
||||||
|
|
||||||
|
assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_lora_extra_modules():
|
||||||
|
model_args, _, _, finetuning_args, _ = get_train_args(
|
||||||
|
{"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS}
|
||||||
|
)
|
||||||
|
tokenizer_module = load_tokenizer(model_args)
|
||||||
|
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||||
|
extra_modules = set()
|
||||||
|
for name, param in model.named_parameters():
|
||||||
|
if any(module in name for module in ["lora_A", "lora_B"]):
|
||||||
|
assert param.requires_grad is True
|
||||||
|
assert param.dtype == torch.float32
|
||||||
|
elif "modules_to_save" in name:
|
||||||
|
extra_modules.add(name.split(".modules_to_save", maxsplit=1)[0].split(".")[-1])
|
||||||
|
assert param.requires_grad is True
|
||||||
|
assert param.dtype == torch.float32
|
||||||
|
else:
|
||||||
|
assert param.requires_grad is False
|
||||||
|
assert param.dtype == torch.float16
|
||||||
|
|
||||||
|
assert extra_modules == {"embed_tokens", "lm_head"}
|
||||||
Reference in New Issue
Block a user