12 Commits

Author SHA1 Message Date
hiyouga
0f1e592326 release v0.8.1
Former-commit-id: 875a34f492701d1c644facbe9ede411af2931513
2024-06-11 00:44:26 +08:00
hiyouga
4d7dd0330d fix #4160
The split heads should be concatenated in dim=2


Former-commit-id: 4b3f247f270d44df9fe226cfe0dabfb7fcd2deda
2024-06-11 00:37:17 +08:00
hiyouga
ea2ca2777f fix #4145
Fix the docker image


Former-commit-id: a9838281156fe870bfcde5d1f7afc15264fd4aad
2024-06-11 00:19:17 +08:00
hiyouga
4b2b92fd9a update evaluator
Former-commit-id: bb8661e62481ff7027b8969f3d8a6a17290c9da3
2024-06-10 23:56:00 +08:00
hiyouga
784088db3f fix #2666
Former-commit-id: f121d5c4f94af9f165132c4309cb9bdc8217d985
2024-06-10 21:24:15 +08:00
hoshi-hiyouga
0ecf0d51e3 Merge pull request #4167 from yzoaim/branch
fix README

Former-commit-id: 1a877b0fbf54478dbf905fb3e84bd079a55bb725
2024-06-10 16:24:33 +08:00
-.-
041ecd0de1 fix README
Former-commit-id: fa30028c0b83c38610b596209493a748b8ca0928
2024-06-08 23:51:56 +08:00
hiyouga
d812249db7 add pr ci
Former-commit-id: 9b05bb8540b946d0c74bf804bcafc4a785d22c47
2024-06-08 21:25:35 +08:00
hiyouga
88528f1a87 Update tests.yml
Former-commit-id: e90f0cc30d6bb819246ccc08935c39e714c179a1
2024-06-08 21:15:36 +08:00
hiyouga
82533114a7 update git workflows
Former-commit-id: 5a3f26bc53433caa98b2a66294becaf156280a4c
2024-06-08 21:11:32 +08:00
hiyouga
6d9fbb3fa9 fix llamafactory-cli env
Former-commit-id: b0515e5f42831b67d1f4d049999ecb68756e66db
2024-06-08 07:15:45 +08:00
hiyouga
9953ae3d03 set dev version
Former-commit-id: 08b7fe1c452cc99264ff0312e310b579590c6a45
2024-06-08 06:46:09 +08:00
17 changed files with 304 additions and 104 deletions

17
.github/workflows/label_issue.yml vendored Normal file
View File

@@ -0,0 +1,17 @@
name: label_issue
on:
issues:
types:
- opened
jobs:
label_issue:
runs-on: ubuntu-latest
steps:
- env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ISSUE_URL: ${{ github.event.issue.html_url }}
run: |
gh issue edit $ISSUE_URL --add-label "pending"

View File

@@ -9,6 +9,8 @@ on:
- "requirements.txt"
- ".github/workflows/*.yml"
pull_request:
types:
- review_requested
branches:
- main
paths:
@@ -19,21 +21,27 @@ on:
jobs:
tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Checkout
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: "setup.py"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install .[torch,dev]
- name: Check quality
run: |
make style && make quality
- name: Test with pytest
run: |
make test

View File

@@ -1,14 +1,44 @@
FROM nvcr.io/nvidia/pytorch:24.01-py3
# Use the NVIDIA official image with PyTorch 2.3.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
FROM nvcr.io/nvidia/pytorch:24.02-py3
# Define installation arguments
ARG INSTALL_BNB=false
ARG INSTALL_VLLM=false
ARG INSTALL_DEEPSPEED=false
ARG PIP_INDEX=https://pypi.org/simple
# Set the working directory
WORKDIR /app
# Install the requirements
COPY requirements.txt /app/
RUN pip install -r requirements.txt
RUN pip config set global.index-url $PIP_INDEX
RUN python -m pip install --upgrade pip
RUN python -m pip install -r requirements.txt
# Copy the rest of the application into the image
COPY . /app/
RUN pip install -e .[metrics,bitsandbytes,qwen]
# Install the LLaMA Factory
RUN EXTRA_PACKAGES="metrics"; \
if [ "$INSTALL_BNB" = "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
fi; \
if [ "$INSTALL_VLLM" = "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
fi; \
if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
fi; \
pip install -e .[$EXTRA_PACKAGES] && \
pip uninstall -y transformer-engine
# Set up volumes
VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
# Expose port 7860 for the LLaMA Board
EXPOSE 7860
CMD [ "llamafactory-cli", "webui" ]
# Expose port 8000 for the API service
EXPOSE 8000

View File

@@ -335,7 +335,7 @@ huggingface-cli login
```bash
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip install -e '.[torch,metrics]'
pip install -e ".[torch,metrics]"
```
Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
@@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about
Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
```
See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
@@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr
### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))
#### Use local environment
```bash
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
llamafactory-cli webui
```
</details>
#### Use Docker
### Build Docker
```bash
docker build -f ./Dockerfile -t llama-factory:latest .
docker run --gpus=all \
docker build -f ./Dockerfile \
--build-arg INSTALL_BNB=false \
--build-arg INSTALL_VLLM=false \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
docker run -it --gpus=all \
-v ./hf_cache:/root/.cache/huggingface/ \
-v ./data:/app/data \
-v ./output:/app/output \
-p 7860:7860 \
-p 8000:8000 \
--shm-size 16G \
--name llama_factory \
-d llama-factory:latest
--name llamafactory \
llamafactory:latest
```
#### Use Docker Compose
```bash
docker compose -f ./docker-compose.yml up -d
```
> [!TIP]
> Use Docker Compose to build image via `docker compose up -d`.
<details><summary>Details about volume</summary>

View File

@@ -335,7 +335,7 @@ huggingface-cli login
```bash
git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
cd LLaMA-Factory
pip install -e '.[torch,metrics]'
pip install -e ".[torch,metrics]"
```
可选的额外依赖项torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
@@ -405,9 +405,9 @@ Docker 镜像:
下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。
```bash
CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
```
高级用法请参考 [examples/README_zh.md](examples/README_zh.md)(包括多 GPU 微调)。
@@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s
### LLaMA Board 可视化微调(由 [Gradio](https://github.com/gradio-app/gradio) 驱动)
#### 使用本地环境
```bash
CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
llamafactory-cli webui
```
#### 使用 Docker
### 构建 Docker
```bash
docker build -f ./Dockerfile -t llama-factory:latest .
docker run --gpus=all \
docker build -f ./Dockerfile \
--build-arg INSTALL_BNB=false \
--build-arg INSTALL_VLLM=false \
--build-arg INSTALL_DEEPSPEED=false \
--build-arg PIP_INDEX=https://pypi.org/simple \
-t llamafactory:latest .
docker run -it --gpus=all \
-v ./hf_cache:/root/.cache/huggingface/ \
-v ./data:/app/data \
-v ./output:/app/output \
-p 7860:7860 \
-p 8000:8000 \
--shm-size 16G \
--name llama_factory \
-d llama-factory:latest
--name llamafactory \
llamafactory:latest
```
#### 使用 Docker Compose
```bash
docker compose -f ./docker-compose.yml up -d
```
> [!TIP]
> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。
<details><summary>数据卷详情</summary>

View File

@@ -1,17 +1,23 @@
version: '3.8'
services:
llama-factory:
llamafactory:
build:
dockerfile: Dockerfile
context: .
container_name: llama_factory
args:
INSTALL_BNB: false
INSTALL_VLLM: false
INSTALL_DEEPSPEED: false
PIP_INDEX: https://pypi.org/simple
container_name: llamafactory
volumes:
- ./hf_cache:/root/.cache/huggingface/
- ./data:/app/data
- ./output:/app/output
ports:
- "7860:7860"
- "8000:8000"
ipc: host
deploy:
resources:

View File

@@ -26,9 +26,7 @@ class Evaluator:
self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
self.eval_template = get_eval_template(self.eval_args.lang)
self.choice_inputs = [
self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES
]
self.choice_inputs = [self.tokenizer.encode(ch, add_special_tokens=False)[-1] for ch in CHOICES]
@torch.inference_mode()
def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]:

View File

@@ -10,7 +10,6 @@ class EvalTemplate:
system: str
choice: str
answer: str
prefix: str
def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
r"""
@@ -42,8 +41,8 @@ class EvalTemplate:
eval_templates: Dict[str, "EvalTemplate"] = {}
def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None:
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix)
def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)
def get_eval_template(name: str) -> "EvalTemplate":
@@ -56,8 +55,7 @@ _register_eval_template(
name="en",
system="The following are multiple choice questions (with answers) about {subject}.\n\n",
choice="\n{choice}. {content}",
answer="\nAnswer: ",
prefix=" ",
answer="\nAnswer:",
)
@@ -66,5 +64,4 @@ _register_eval_template(
system="以下是中国关于{subject}考试的单项选择题,请选出其中的正确答案。\n\n",
choice="\n{choice}. {content}",
answer="\n答案:",
prefix=" ",
)

View File

@@ -6,13 +6,10 @@ import peft
import torch
import transformers
import trl
from transformers.integrations import is_deepspeed_available
from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available
from .packages import is_vllm_available
from transformers.utils import is_torch_cuda_available, is_torch_npu_available
VERSION = "0.8.0"
VERSION = "0.8.1"
def print_env() -> None:
@@ -37,19 +34,25 @@ def print_env() -> None:
info["NPU type"] = torch.npu.get_device_name()
info["CANN version"] = torch.version.cann
if is_deepspeed_available():
try:
import deepspeed # type: ignore
info["DeepSpeed version"] = deepspeed.__version__
except Exception:
pass
if is_bitsandbytes_available():
try:
import bitsandbytes
info["Bitsandbytes version"] = bitsandbytes.__version__
except Exception:
pass
if is_vllm_available():
try:
import vllm
info["vLLM version"] = vllm.__version__
except Exception:
pass
print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")

View File

@@ -209,6 +209,7 @@ def _setup_lora_tuning(
"lora_alpha": finetuning_args.lora_alpha,
"lora_dropout": finetuning_args.lora_dropout,
"use_rslora": finetuning_args.use_rslora,
"use_dora": finetuning_args.use_dora,
"modules_to_save": finetuning_args.additional_target,
}
@@ -218,7 +219,6 @@ def _setup_lora_tuning(
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
inference_mode=False,
use_dora=finetuning_args.use_dora,
**peft_kwargs,
)
model = get_peft_model(model, lora_config)

View File

@@ -96,7 +96,8 @@ def llama_attention_forward(
(
attn_output[:, :, : self.num_heads // 2],
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
)
),
dim=2,
)
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
@@ -194,7 +195,8 @@ def llama_flash_attention_2_forward(
(
attn_output[:, :, : self.num_heads // 2],
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
)
),
dim=2,
)
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
@@ -293,7 +295,8 @@ def llama_sdpa_attention_forward(
(
attn_output[:, :, : self.num_heads // 2],
attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
)
),
dim=2,
)
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
@@ -303,7 +306,7 @@ def llama_sdpa_attention_forward(
def _apply_llama_patch() -> None:
require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2")
require_version("transformers==4.41.2", "To fix: pip install transformers==4.41.2")
LlamaAttention.forward = llama_attention_forward
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
LlamaSdpaAttention.forward = llama_sdpa_attention_forward

View File

@@ -1,4 +1,5 @@
import os
import random
import pytest
from datasets import load_dataset
@@ -8,17 +9,17 @@ from llamafactory.hparams import get_train_args
from llamafactory.model import load_tokenizer
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
TRAINING_ARGS = {
TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA,
"stage": "sft",
"do_train": True,
"finetuning_type": "full",
"dataset": "llamafactory/tiny_dataset",
"dataset": "llamafactory/tiny-supervised-dataset",
"dataset_dir": "ONLINE",
"template": "llama3",
"cutoff_len": 1024,
"cutoff_len": 8192,
"overwrite_cache": True,
"output_dir": "dummy_dir",
"overwrite_output_dir": True,
@@ -26,19 +27,24 @@ TRAINING_ARGS = {
}
@pytest.mark.parametrize("test_num", [5])
def test_supervised(test_num: int):
model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS)
@pytest.mark.parametrize("num_samples", [10])
def test_supervised(num_samples: int):
model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS)
tokenizer_module = load_tokenizer(model_args)
tokenizer = tokenizer_module["tokenizer"]
tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)
original_data = load_dataset(TRAINING_ARGS["dataset"], split="train")
for test_idx in range(test_num):
decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx])
original_data = load_dataset(TRAIN_ARGS["dataset"], split="train")
indexes = random.choices(range(len(original_data)), k=num_samples)
for index in indexes:
decoded_result = tokenizer.decode(tokenized_data["input_ids"][index])
prompt = original_data[index]["instruction"]
if original_data[index]["input"]:
prompt += "\n" + original_data[index]["input"]
messages = [
{"role": "user", "content": original_data[test_idx]["instruction"]},
{"role": "assistant", "content": original_data[test_idx]["output"]},
{"role": "user", "content": prompt},
{"role": "assistant", "content": original_data[index]["output"]},
]
templated_result = tokenizer.apply_chat_template(messages, tokenize=False)
assert decode_result == templated_result
assert decoded_result == templated_result

View File

@@ -0,0 +1,77 @@
from llamafactory.eval.template import get_eval_template
def test_eval_template_en():
support_set = [
{
"question": "Fewshot question",
"A": "Fewshot1",
"B": "Fewshot2",
"C": "Fewshot3",
"D": "Fewshot4",
"answer": "B",
}
]
example = {
"question": "Target question",
"A": "Target1",
"B": "Target2",
"C": "Target3",
"D": "Target4",
"answer": "C",
}
template = get_eval_template(name="en")
messages = template.format_example(example, support_set=support_set, subject_name="SubName")
assert messages == [
{
"role": "user",
"content": (
"The following are multiple choice questions (with answers) about SubName.\n\n"
"Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:"
),
},
{"role": "assistant", "content": "B"},
{
"role": "user",
"content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:",
},
{"role": "assistant", "content": "C"},
]
def test_eval_template_zh():
support_set = [
{
"question": "示例问题",
"A": "示例答案1",
"B": "示例答案2",
"C": "示例答案3",
"D": "示例答案4",
"answer": "B",
}
]
example = {
"question": "目标问题",
"A": "目标答案1",
"B": "目标答案2",
"C": "目标答案3",
"D": "目标答案4",
"answer": "C",
}
template = get_eval_template(name="zh")
messages = template.format_example(example, support_set=support_set, subject_name="主题")
assert messages == [
{
"role": "user",
"content": (
"以下是中国关于主题考试的单项选择题,请选出其中的正确答案。\n\n"
"示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案:"
),
},
{"role": "assistant", "content": "B"},
{
"role": "user",
"content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案:",
},
{"role": "assistant", "content": "C"},
]

View File

@@ -6,7 +6,12 @@ from llamafactory.hparams import get_infer_args
from llamafactory.model import load_model, load_tokenizer
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
INFER_ARGS = {
"model_name_or_path": TINY_LLAMA,
"template": "llama3",
}
def test_attention():
@@ -23,13 +28,7 @@ def test_attention():
"fa2": "LlamaFlashAttention2",
}
for requested_attention in attention_available:
model_args, _, finetuning_args, _ = get_infer_args(
{
"model_name_or_path": TINY_LLAMA,
"template": "llama2",
"flash_attn": requested_attention,
}
)
model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS})
tokenizer_module = load_tokenizer(model_args)
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)
for module in model.modules():

View File

@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
from llamafactory.model import load_model, load_tokenizer
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
TRAINING_ARGS = {
TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA,
"stage": "sft",
"do_train": True,
"finetuning_type": "freeze",
"dataset": "llamafactory/tiny_dataset",
"dataset": "llamafactory/tiny-supervised-dataset",
"dataset_dir": "ONLINE",
"template": "llama3",
"cutoff_len": 1024,
@@ -25,12 +25,7 @@ TRAINING_ARGS = {
def test_freeze_all_modules():
model_args, _, _, finetuning_args, _ = get_train_args(
{
"freeze_trainable_layers": 1,
**TRAINING_ARGS,
}
)
model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS})
tokenizer_module = load_tokenizer(model_args)
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
for name, param in model.named_parameters():
@@ -44,11 +39,7 @@ def test_freeze_all_modules():
def test_freeze_extra_modules():
model_args, _, _, finetuning_args, _ = get_train_args(
{
"freeze_trainable_layers": 1,
"freeze_extra_modules": "embed_tokens,lm_head",
**TRAINING_ARGS,
}
{"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS}
)
tokenizer_module = load_tokenizer(model_args)
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)

View File

@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
from llamafactory.model import load_model, load_tokenizer
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
TRAINING_ARGS = {
TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA,
"stage": "sft",
"do_train": True,
"finetuning_type": "full",
"dataset": "llamafactory/tiny_dataset",
"dataset": "llamafactory/tiny-supervised-dataset",
"dataset_dir": "ONLINE",
"template": "llama3",
"cutoff_len": 1024,
@@ -25,7 +25,7 @@ TRAINING_ARGS = {
def test_full():
model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS)
model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS)
tokenizer_module = load_tokenizer(model_args)
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
for param in model.parameters():

63
tests/model/test_lora.py Normal file
View File

@@ -0,0 +1,63 @@
import os
import torch
from llamafactory.hparams import get_train_args
from llamafactory.model import load_model, load_tokenizer
TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
TRAIN_ARGS = {
"model_name_or_path": TINY_LLAMA,
"stage": "sft",
"do_train": True,
"finetuning_type": "lora",
"dataset": "llamafactory/tiny-supervised-dataset",
"dataset_dir": "ONLINE",
"template": "llama3",
"cutoff_len": 1024,
"overwrite_cache": True,
"output_dir": "dummy_dir",
"overwrite_output_dir": True,
"fp16": True,
}
def test_lora_all_modules():
model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS})
tokenizer_module = load_tokenizer(model_args)
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
linear_modules = set()
for name, param in model.named_parameters():
if any(module in name for module in ["lora_A", "lora_B"]):
linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1])
assert param.requires_grad is True
assert param.dtype == torch.float32
else:
assert param.requires_grad is False
assert param.dtype == torch.float16
assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"}
def test_lora_extra_modules():
model_args, _, _, finetuning_args, _ = get_train_args(
{"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS}
)
tokenizer_module = load_tokenizer(model_args)
model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
extra_modules = set()
for name, param in model.named_parameters():
if any(module in name for module in ["lora_A", "lora_B"]):
assert param.requires_grad is True
assert param.dtype == torch.float32
elif "modules_to_save" in name:
extra_modules.add(name.split(".modules_to_save", maxsplit=1)[0].split(".")[-1])
assert param.requires_grad is True
assert param.dtype == torch.float32
else:
assert param.requires_grad is False
assert param.dtype == torch.float16
assert extra_modules == {"embed_tokens", "lm_head"}