Compare commits
12 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0f1e592326 | ||
|
|
4d7dd0330d | ||
|
|
ea2ca2777f | ||
|
|
4b2b92fd9a | ||
|
|
784088db3f | ||
|
|
0ecf0d51e3 | ||
|
|
041ecd0de1 | ||
|
|
d812249db7 | ||
|
|
88528f1a87 | ||
|
|
82533114a7 | ||
|
|
6d9fbb3fa9 | ||
|
|
9953ae3d03 |
17
.github/workflows/label_issue.yml
vendored
Normal file
17
.github/workflows/label_issue.yml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: label_issue
|
||||
|
||||
on:
|
||||
issues:
|
||||
types:
|
||||
- opened
|
||||
|
||||
jobs:
|
||||
label_issue:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
ISSUE_URL: ${{ github.event.issue.html_url }}
|
||||
run: |
|
||||
gh issue edit $ISSUE_URL --add-label "pending"
|
||||
10
.github/workflows/tests.yml
vendored
10
.github/workflows/tests.yml
vendored
@@ -9,6 +9,8 @@ on:
|
||||
- "requirements.txt"
|
||||
- ".github/workflows/*.yml"
|
||||
pull_request:
|
||||
types:
|
||||
- review_requested
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
@@ -19,21 +21,27 @@ on:
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.8"
|
||||
cache: "pip"
|
||||
cache-dependency-path: "setup.py"
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install .[torch,dev]
|
||||
|
||||
- name: Check quality
|
||||
run: |
|
||||
make style && make quality
|
||||
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
make test
|
||||
|
||||
38
Dockerfile
38
Dockerfile
@@ -1,14 +1,44 @@
|
||||
FROM nvcr.io/nvidia/pytorch:24.01-py3
|
||||
# Use the NVIDIA official image with PyTorch 2.3.0
|
||||
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
|
||||
FROM nvcr.io/nvidia/pytorch:24.02-py3
|
||||
|
||||
# Define installation arguments
|
||||
ARG INSTALL_BNB=false
|
||||
ARG INSTALL_VLLM=false
|
||||
ARG INSTALL_DEEPSPEED=false
|
||||
ARG PIP_INDEX=https://pypi.org/simple
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install the requirements
|
||||
COPY requirements.txt /app/
|
||||
RUN pip install -r requirements.txt
|
||||
RUN pip config set global.index-url $PIP_INDEX
|
||||
RUN python -m pip install --upgrade pip
|
||||
RUN python -m pip install -r requirements.txt
|
||||
|
||||
# Copy the rest of the application into the image
|
||||
COPY . /app/
|
||||
RUN pip install -e .[metrics,bitsandbytes,qwen]
|
||||
|
||||
# Install the LLaMA Factory
|
||||
RUN EXTRA_PACKAGES="metrics"; \
|
||||
if [ "$INSTALL_BNB" = "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
|
||||
fi; \
|
||||
if [ "$INSTALL_VLLM" = "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
|
||||
fi; \
|
||||
if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
|
||||
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
|
||||
fi; \
|
||||
pip install -e .[$EXTRA_PACKAGES] && \
|
||||
pip uninstall -y transformer-engine
|
||||
|
||||
# Set up volumes
|
||||
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
|
||||
|
||||
# Expose port 7860 for the LLaMA Board
|
||||
EXPOSE 7860
|
||||
|
||||
CMD [ "llamafactory-cli", "webui" ]
|
||||
# Expose port 8000 for the API service
|
||||
EXPOSE 8000
|
||||
|
||||
38
README.md
38
README.md
@@ -335,7 +335,7 @@ huggingface-cli login
|
||||
```bash
|
||||
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
||||
cd LLaMA-Factory
|
||||
pip install -e '.[torch,metrics]'
|
||||
pip install -e ".[torch,metrics]"
|
||||
```
|
||||
|
||||
Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
|
||||
@@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about
|
||||
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||
llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
||||
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||
```
|
||||
|
||||
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
|
||||
@@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr
|
||||
|
||||
### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
|
||||
|
||||
#### Use local environment
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
|
||||
llamafactory-cli webui
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
#### Use Docker
|
||||
### Build Docker
|
||||
|
||||
```bash
|
||||
docker build -f ./Dockerfile -t llama-factory:latest .
|
||||
docker run --gpus=all \
|
||||
docker build -f ./Dockerfile \
|
||||
--build-arg INSTALL_BNB=false \
|
||||
--build-arg INSTALL_VLLM=false \
|
||||
--build-arg INSTALL_DEEPSPEED=false \
|
||||
--build-arg PIP_INDEX=https://pypi.org/simple \
|
||||
-t llamafactory:latest .
|
||||
|
||||
docker run -it --gpus=all \
|
||||
-v ./hf_cache:/root/.cache/huggingface/ \
|
||||
-v ./data:/app/data \
|
||||
-v ./output:/app/output \
|
||||
-p 7860:7860 \
|
||||
-p 8000:8000 \
|
||||
--shm-size 16G \
|
||||
--name llama_factory \
|
||||
-d llama-factory:latest
|
||||
--name llamafactory \
|
||||
llamafactory:latest
|
||||
```
|
||||
|
||||
#### Use Docker Compose
|
||||
|
||||
```bash
|
||||
docker compose -f ./docker-compose.yml up -d
|
||||
```
|
||||
> [!TIP]
|
||||
> Use Docker Compose to build image via `docker compose up -d`.
|
||||
|
||||
<details><summary>Details about volume</summary>
|
||||
|
||||
|
||||
36
README_zh.md
36
README_zh.md
@@ -335,7 +335,7 @@ huggingface-cli login
|
||||
```bash
|
||||
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
||||
cd LLaMA-Factory
|
||||
pip install -e '.[torch,metrics]'
|
||||
pip install -e ".[torch,metrics]"
|
||||
```
|
||||
|
||||
可选的额外依赖项:torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
|
||||
@@ -405,9 +405,9 @@ Docker 镜像:
|
||||
下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
||||
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||
llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
|
||||
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
||||
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
||||
```
|
||||
|
||||
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
|
||||
@@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s
|
||||
|
||||
### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动)
|
||||
|
||||
#### 使用本地环境
|
||||
|
||||
```bash
|
||||
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
|
||||
llamafactory-cli webui
|
||||
```
|
||||
|
||||
#### 使用 Docker
|
||||
### 构建 Docker
|
||||
|
||||
```bash
|
||||
docker build -f ./Dockerfile -t llama-factory:latest .
|
||||
docker run --gpus=all \
|
||||
docker build -f ./Dockerfile \
|
||||
--build-arg INSTALL_BNB=false \
|
||||
--build-arg INSTALL_VLLM=false \
|
||||
--build-arg INSTALL_DEEPSPEED=false \
|
||||
--build-arg PIP_INDEX=https://pypi.org/simple \
|
||||
-t llamafactory:latest .
|
||||
|
||||
docker run -it --gpus=all \
|
||||
-v ./hf_cache:/root/.cache/huggingface/ \
|
||||
-v ./data:/app/data \
|
||||
-v ./output:/app/output \
|
||||
-p 7860:7860 \
|
||||
-p 8000:8000 \
|
||||
--shm-size 16G \
|
||||
--name llama_factory \
|
||||
-d llama-factory:latest
|
||||
--name llamafactory \
|
||||
llamafactory:latest
|
||||
```
|
||||
|
||||
#### 使用 Docker Compose
|
||||
|
||||
```bash
|
||||
docker compose -f ./docker-compose.yml up -d
|
||||
```
|
||||
> [!TIP]
|
||||
> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
|
||||
|
||||
<details><summary>数据卷详情</summary>
|
||||
|
||||
|
||||
@@ -1,17 +1,23 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
llama-factory:
|
||||
llamafactory:
|
||||
build:
|
||||
dockerfile: Dockerfile
|
||||
context: .
|
||||
container_name: llama_factory
|
||||
args:
|
||||
INSTALL_BNB: false
|
||||
INSTALL_VLLM: false
|
||||
INSTALL_DEEPSPEED: false
|
||||
PIP_INDEX: https://pypi.org/simple
|
||||
container_name: llamafactory
|
||||
volumes:
|
||||
- ./hf_cache:/root/.cache/huggingface/
|
||||
- ./data:/app/data
|
||||
- ./output:/app/output
|
||||
ports:
|
||||
- "7860:7860"
|
||||
- "8000:8000"
|
||||
ipc: host
|
||||
deploy:
|
||||
resources:
|
||||
|
||||
@@ -26,9 +26,7 @@ class Evaluator:
|
||||
self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
|
||||
self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
|
||||
self.eval_template = get_eval_template(self.eval_args.lang)
|
||||
self.choice_inputs = [
|
||||
self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES
|
||||
]
|
||||
self.choice_inputs = [self.tokenizer.encode(ch, add_special_tokens=False)[-1] for ch in CHOICES]
|
||||
|
||||
@torch.inference_mode()
|
||||
def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]:
|
||||
|
||||
@@ -10,7 +10,6 @@ class EvalTemplate:
|
||||
system: str
|
||||
choice: str
|
||||
answer: str
|
||||
prefix: str
|
||||
|
||||
def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
|
||||
r"""
|
||||
@@ -42,8 +41,8 @@ class EvalTemplate:
|
||||
eval_templates: Dict[str, "EvalTemplate"] = {}
|
||||
|
||||
|
||||
def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None:
|
||||
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix)
|
||||
def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
|
||||
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
|
||||
|
||||
|
||||
def get_eval_template(name: str) -> "EvalTemplate":
|
||||
@@ -56,8 +55,7 @@ _register_eval_template(
|
||||
name="en",
|
||||
system="The following are multiple choice questions (with answers) about {subject}.\n\n",
|
||||
choice="\n{choice}. {content}",
|
||||
answer="\nAnswer: ",
|
||||
prefix=" ",
|
||||
answer="\nAnswer:",
|
||||
)
|
||||
|
||||
|
||||
@@ -66,5 +64,4 @@ _register_eval_template(
|
||||
system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n",
|
||||
choice="\n{choice}. {content}",
|
||||
answer="\n答案:",
|
||||
prefix=" ",
|
||||
)
|
||||
|
||||
@@ -6,13 +6,10 @@ import peft
|
||||
import torch
|
||||
import transformers
|
||||
import trl
|
||||
from transformers.integrations import is_deepspeed_available
|
||||
from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available
|
||||
|
||||
from .packages import is_vllm_available
|
||||
from transformers.utils import is_torch_cuda_available, is_torch_npu_available
|
||||
|
||||
|
||||
VERSION = "0.8.0"
|
||||
VERSION = "0.8.1"
|
||||
|
||||
|
||||
def print_env() -> None:
|
||||
@@ -37,19 +34,25 @@ def print_env() -> None:
|
||||
info["NPU type"] = torch.npu.get_device_name()
|
||||
info["CANN version"] = torch.version.cann
|
||||
|
||||
if is_deepspeed_available():
|
||||
try:
|
||||
import deepspeed # type: ignore
|
||||
|
||||
info["DeepSpeed version"] = deepspeed.__version__
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if is_bitsandbytes_available():
|
||||
try:
|
||||
import bitsandbytes
|
||||
|
||||
info["Bitsandbytes version"] = bitsandbytes.__version__
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if is_vllm_available():
|
||||
try:
|
||||
import vllm
|
||||
|
||||
info["vLLM version"] = vllm.__version__
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")
|
||||
|
||||
@@ -209,6 +209,7 @@ def _setup_lora_tuning(
|
||||
"lora_alpha": finetuning_args.lora_alpha,
|
||||
"lora_dropout": finetuning_args.lora_dropout,
|
||||
"use_rslora": finetuning_args.use_rslora,
|
||||
"use_dora": finetuning_args.use_dora,
|
||||
"modules_to_save": finetuning_args.additional_target,
|
||||
}
|
||||
|
||||
@@ -218,7 +219,6 @@ def _setup_lora_tuning(
|
||||
lora_config = LoraConfig(
|
||||
task_type=TaskType.CAUSAL_LM,
|
||||
inference_mode=False,
|
||||
use_dora=finetuning_args.use_dora,
|
||||
**peft_kwargs,
|
||||
)
|
||||
model = get_peft_model(model, lora_config)
|
||||
|
||||
@@ -96,7 +96,8 @@ def llama_attention_forward(
|
||||
(
|
||||
attn_output[:, :, : self.num_heads // 2],
|
||||
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
||||
)
|
||||
),
|
||||
dim=2,
|
||||
)
|
||||
|
||||
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
||||
@@ -194,7 +195,8 @@ def llama_flash_attention_2_forward(
|
||||
(
|
||||
attn_output[:, :, : self.num_heads // 2],
|
||||
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
||||
)
|
||||
),
|
||||
dim=2,
|
||||
)
|
||||
|
||||
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
|
||||
@@ -293,7 +295,8 @@ def llama_sdpa_attention_forward(
|
||||
(
|
||||
attn_output[:, :, : self.num_heads // 2],
|
||||
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
|
||||
)
|
||||
),
|
||||
dim=2,
|
||||
)
|
||||
|
||||
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
||||
@@ -303,7 +306,7 @@ def llama_sdpa_attention_forward(
|
||||
|
||||
|
||||
def _apply_llama_patch() -> None:
|
||||
require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2")
|
||||
require_version("transformers==4.41.2", "To fix: pip install transformers==4.41.2")
|
||||
LlamaAttention.forward = llama_attention_forward
|
||||
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
||||
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from datasets import load_dataset
|
||||
@@ -8,17 +9,17 @@ from llamafactory.hparams import get_train_args
|
||||
from llamafactory.model import load_tokenizer
|
||||
|
||||
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||
|
||||
TRAINING_ARGS = {
|
||||
TRAIN_ARGS = {
|
||||
"model_name_or_path": TINY_LLAMA,
|
||||
"stage": "sft",
|
||||
"do_train": True,
|
||||
"finetuning_type": "full",
|
||||
"dataset": "llamafactory/tiny_dataset",
|
||||
"dataset": "llamafactory/tiny-supervised-dataset",
|
||||
"dataset_dir": "ONLINE",
|
||||
"template": "llama3",
|
||||
"cutoff_len": 1024,
|
||||
"cutoff_len": 8192,
|
||||
"overwrite_cache": True,
|
||||
"output_dir": "dummy_dir",
|
||||
"overwrite_output_dir": True,
|
||||
@@ -26,19 +27,24 @@ TRAINING_ARGS = {
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize("test_num", [5])
|
||||
def test_supervised(test_num: int):
|
||||
model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS)
|
||||
@pytest.mark.parametrize("num_samples", [10])
|
||||
def test_supervised(num_samples: int):
|
||||
model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS)
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
tokenizer = tokenizer_module["tokenizer"]
|
||||
tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
|
||||
|
||||
original_data = load_dataset(TRAINING_ARGS["dataset"], split="train")
|
||||
for test_idx in range(test_num):
|
||||
decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx])
|
||||
original_data = load_dataset(TRAIN_ARGS["dataset"], split="train")
|
||||
indexes = random.choices(range(len(original_data)), k=num_samples)
|
||||
for index in indexes:
|
||||
decoded_result = tokenizer.decode(tokenized_data["input_ids"][index])
|
||||
prompt = original_data[index]["instruction"]
|
||||
if original_data[index]["input"]:
|
||||
prompt += "\n" + original_data[index]["input"]
|
||||
|
||||
messages = [
|
||||
{"role": "user", "content": original_data[test_idx]["instruction"]},
|
||||
{"role": "assistant", "content": original_data[test_idx]["output"]},
|
||||
{"role": "user", "content": prompt},
|
||||
{"role": "assistant", "content": original_data[index]["output"]},
|
||||
]
|
||||
templated_result = tokenizer.apply_chat_template(messages, tokenize=False)
|
||||
assert decode_result == templated_result
|
||||
assert decoded_result == templated_result
|
||||
|
||||
77
tests/eval/test_eval_template.py
Normal file
77
tests/eval/test_eval_template.py
Normal file
@@ -0,0 +1,77 @@
|
||||
from llamafactory.eval.template import get_eval_template
|
||||
|
||||
|
||||
def test_eval_template_en():
|
||||
support_set = [
|
||||
{
|
||||
"question": "Fewshot question",
|
||||
"A": "Fewshot1",
|
||||
"B": "Fewshot2",
|
||||
"C": "Fewshot3",
|
||||
"D": "Fewshot4",
|
||||
"answer": "B",
|
||||
}
|
||||
]
|
||||
example = {
|
||||
"question": "Target question",
|
||||
"A": "Target1",
|
||||
"B": "Target2",
|
||||
"C": "Target3",
|
||||
"D": "Target4",
|
||||
"answer": "C",
|
||||
}
|
||||
template = get_eval_template(name="en")
|
||||
messages = template.format_example(example, support_set=support_set, subject_name="SubName")
|
||||
assert messages == [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"The following are multiple choice questions (with answers) about SubName.\n\n"
|
||||
"Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:"
|
||||
),
|
||||
},
|
||||
{"role": "assistant", "content": "B"},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:",
|
||||
},
|
||||
{"role": "assistant", "content": "C"},
|
||||
]
|
||||
|
||||
|
||||
def test_eval_template_zh():
|
||||
support_set = [
|
||||
{
|
||||
"question": "示例问题",
|
||||
"A": "示例答案1",
|
||||
"B": "示例答案2",
|
||||
"C": "示例答案3",
|
||||
"D": "示例答案4",
|
||||
"answer": "B",
|
||||
}
|
||||
]
|
||||
example = {
|
||||
"question": "目标问题",
|
||||
"A": "目标答案1",
|
||||
"B": "目标答案2",
|
||||
"C": "目标答案3",
|
||||
"D": "目标答案4",
|
||||
"answer": "C",
|
||||
}
|
||||
template = get_eval_template(name="zh")
|
||||
messages = template.format_example(example, support_set=support_set, subject_name="主题")
|
||||
assert messages == [
|
||||
{
|
||||
"role": "user",
|
||||
"content": (
|
||||
"以下是中国关于主题考试的单项选择题,请选出其中的正确答案。\n\n"
|
||||
"示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案:"
|
||||
),
|
||||
},
|
||||
{"role": "assistant", "content": "B"},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案:",
|
||||
},
|
||||
{"role": "assistant", "content": "C"},
|
||||
]
|
||||
@@ -6,7 +6,12 @@ from llamafactory.hparams import get_infer_args
|
||||
from llamafactory.model import load_model, load_tokenizer
|
||||
|
||||
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||
|
||||
INFER_ARGS = {
|
||||
"model_name_or_path": TINY_LLAMA,
|
||||
"template": "llama3",
|
||||
}
|
||||
|
||||
|
||||
def test_attention():
|
||||
@@ -23,13 +28,7 @@ def test_attention():
|
||||
"fa2": "LlamaFlashAttention2",
|
||||
}
|
||||
for requested_attention in attention_available:
|
||||
model_args, _, finetuning_args, _ = get_infer_args(
|
||||
{
|
||||
"model_name_or_path": TINY_LLAMA,
|
||||
"template": "llama2",
|
||||
"flash_attn": requested_attention,
|
||||
}
|
||||
)
|
||||
model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS})
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)
|
||||
for module in model.modules():
|
||||
|
||||
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
|
||||
from llamafactory.model import load_model, load_tokenizer
|
||||
|
||||
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||
|
||||
TRAINING_ARGS = {
|
||||
TRAIN_ARGS = {
|
||||
"model_name_or_path": TINY_LLAMA,
|
||||
"stage": "sft",
|
||||
"do_train": True,
|
||||
"finetuning_type": "freeze",
|
||||
"dataset": "llamafactory/tiny_dataset",
|
||||
"dataset": "llamafactory/tiny-supervised-dataset",
|
||||
"dataset_dir": "ONLINE",
|
||||
"template": "llama3",
|
||||
"cutoff_len": 1024,
|
||||
@@ -25,12 +25,7 @@ TRAINING_ARGS = {
|
||||
|
||||
|
||||
def test_freeze_all_modules():
|
||||
model_args, _, _, finetuning_args, _ = get_train_args(
|
||||
{
|
||||
"freeze_trainable_layers": 1,
|
||||
**TRAINING_ARGS,
|
||||
}
|
||||
)
|
||||
model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS})
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||
for name, param in model.named_parameters():
|
||||
@@ -44,11 +39,7 @@ def test_freeze_all_modules():
|
||||
|
||||
def test_freeze_extra_modules():
|
||||
model_args, _, _, finetuning_args, _ = get_train_args(
|
||||
{
|
||||
"freeze_trainable_layers": 1,
|
||||
"freeze_extra_modules": "embed_tokens,lm_head",
|
||||
**TRAINING_ARGS,
|
||||
}
|
||||
{"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS}
|
||||
)
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||
|
||||
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
|
||||
from llamafactory.model import load_model, load_tokenizer
|
||||
|
||||
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||
|
||||
TRAINING_ARGS = {
|
||||
TRAIN_ARGS = {
|
||||
"model_name_or_path": TINY_LLAMA,
|
||||
"stage": "sft",
|
||||
"do_train": True,
|
||||
"finetuning_type": "full",
|
||||
"dataset": "llamafactory/tiny_dataset",
|
||||
"dataset": "llamafactory/tiny-supervised-dataset",
|
||||
"dataset_dir": "ONLINE",
|
||||
"template": "llama3",
|
||||
"cutoff_len": 1024,
|
||||
@@ -25,7 +25,7 @@ TRAINING_ARGS = {
|
||||
|
||||
|
||||
def test_full():
|
||||
model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS)
|
||||
model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS)
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||
for param in model.parameters():
|
||||
|
||||
63
tests/model/test_lora.py
Normal file
63
tests/model/test_lora.py
Normal file
@@ -0,0 +1,63 @@
|
||||
import os
|
||||
|
||||
import torch
|
||||
|
||||
from llamafactory.hparams import get_train_args
|
||||
from llamafactory.model import load_model, load_tokenizer
|
||||
|
||||
|
||||
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
|
||||
|
||||
TRAIN_ARGS = {
|
||||
"model_name_or_path": TINY_LLAMA,
|
||||
"stage": "sft",
|
||||
"do_train": True,
|
||||
"finetuning_type": "lora",
|
||||
"dataset": "llamafactory/tiny-supervised-dataset",
|
||||
"dataset_dir": "ONLINE",
|
||||
"template": "llama3",
|
||||
"cutoff_len": 1024,
|
||||
"overwrite_cache": True,
|
||||
"output_dir": "dummy_dir",
|
||||
"overwrite_output_dir": True,
|
||||
"fp16": True,
|
||||
}
|
||||
|
||||
|
||||
def test_lora_all_modules():
|
||||
model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS})
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||
linear_modules = set()
|
||||
for name, param in model.named_parameters():
|
||||
if any(module in name for module in ["lora_A", "lora_B"]):
|
||||
linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1])
|
||||
assert param.requires_grad is True
|
||||
assert param.dtype == torch.float32
|
||||
else:
|
||||
assert param.requires_grad is False
|
||||
assert param.dtype == torch.float16
|
||||
|
||||
assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"}
|
||||
|
||||
|
||||
def test_lora_extra_modules():
|
||||
model_args, _, _, finetuning_args, _ = get_train_args(
|
||||
{"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS}
|
||||
)
|
||||
tokenizer_module = load_tokenizer(model_args)
|
||||
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
|
||||
extra_modules = set()
|
||||
for name, param in model.named_parameters():
|
||||
if any(module in name for module in ["lora_A", "lora_B"]):
|
||||
assert param.requires_grad is True
|
||||
assert param.dtype == torch.float32
|
||||
elif "modules_to_save" in name:
|
||||
extra_modules.add(name.split(".modules_to_save", maxsplit=1)[0].split(".")[-1])
|
||||
assert param.requires_grad is True
|
||||
assert param.dtype == torch.float32
|
||||
else:
|
||||
assert param.requires_grad is False
|
||||
assert param.dtype == torch.float16
|
||||
|
||||
assert extra_modules == {"embed_tokens", "lm_head"}
|
||||
Reference in New Issue
Block a user