release v0.8.1

Former-commit-id: 875a34f492701d1c644facbe9ede411af2931513
fix #4160
2024-06-11 00:44:26 +08:00 · 2024-06-11 00:37:17 +08:00 · 2024-06-11 00:19:17 +08:00 · 2024-06-10 23:56:00 +08:00 · 2024-06-10 21:24:15 +08:00 · 2024-06-10 16:24:33 +08:00
17 changed files with 304 additions and 104 deletions
--- a/.github/workflows/label_issue.yml
+++ b/.github/workflows/label_issue.yml
@@ -0,0 +1,17 @@
+name: label_issue
+
+on:
+  issues:
+    types:
+      - opened
+
+jobs:
+  label_issue:
+    runs-on: ubuntu-latest
+
+    steps:
+      - env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          ISSUE_URL: ${{ github.event.issue.html_url }}
+        run: |
+          gh issue edit $ISSUE_URL --add-label "pending"
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -9,6 +9,8 @@ on:
      - "requirements.txt"
      - ".github/workflows/*.yml"
  pull_request:
+    types:
+      - review_requested
    branches:
      - main
    paths:
@@ -19,21 +21,27 @@ on:
 jobs:
  tests:
    runs-on: ubuntu-latest
+
    steps:
-      - uses: actions/checkout@v4
+      - name: Checkout
+        uses: actions/checkout@v4
+
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: "3.8"
          cache: "pip"
          cache-dependency-path: "setup.py"
+
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install .[torch,dev]
+
      - name: Check quality
        run: |
          make style && make quality
+
      - name: Test with pytest
        run: |
          make test
--- a/38
+++ b/38
@@ -1,14 +1,44 @@
-FROM nvcr.io/nvidia/pytorch:24.01-py3
+# Use the NVIDIA official image with PyTorch 2.3.0
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
+FROM nvcr.io/nvidia/pytorch:24.02-py3

+# Define installation arguments
+ARG INSTALL_BNB=false
+ARG INSTALL_VLLM=false
+ARG INSTALL_DEEPSPEED=false
+ARG PIP_INDEX=https://pypi.org/simple
+
+# Set the working directory
 WORKDIR /app

+# Install the requirements
 COPY requirements.txt /app/
-RUN pip install -r requirements.txt
+RUN pip config set global.index-url $PIP_INDEX
+RUN python -m pip install --upgrade pip
+RUN python -m pip install -r requirements.txt

+# Copy the rest of the application into the image
 COPY . /app/
-RUN pip install -e .[metrics,bitsandbytes,qwen]

+# Install the LLaMA Factory
+RUN EXTRA_PACKAGES="metrics"; \
+    if [ "$INSTALL_BNB" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
+    fi; \
+    if [ "$INSTALL_VLLM" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
+    fi; \
+    if [ "$INSTALL_DEEPSPEED" = "true" ]; then \
+        EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
+    fi; \
+    pip install -e .[$EXTRA_PACKAGES] && \
+    pip uninstall -y transformer-engine
+
+# Set up volumes
 VOLUME [ "/root/.cache/huggingface/", "/app/data", "/app/output" ]
+
+# Expose port 7860 for the LLaMA Board
 EXPOSE 7860

-CMD [ "llamafactory-cli", "webui" ]
+# Expose port 8000 for the API service
+EXPOSE 8000
--- a/README.md
+++ b/README.md
@@ -335,7 +335,7 @@ huggingface-cli login
 ```bash
 git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
-pip install -e '.[torch,metrics]'
+pip install -e ".[torch,metrics]"
 ```

 Extra dependencies available: torch, torch_npu, metrics, deepspeed, bitsandbytes, vllm, galore, badam, gptq, awq, aqlm, qwen, modelscope, quality
@@ -405,9 +405,9 @@ Please refer to [data/README.md](data/README.md) for checking the details about
 Use the following 3 commands to run LoRA **fine-tuning**, **inference** and **merging** of the Llama3-8B-Instruct model, respectively.

 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```

 See [examples/README.md](examples/README.md) for advanced usage (including distributed training).
@@ -417,33 +417,33 @@ See [examples/README.md](examples/README.md) for advanced usage (including distr

 ### Fine-Tuning with LLaMA Board GUI (powered by [Gradio](https://github.com/gradio-app/gradio))

-#### Use local environment
-
 ```bash
-CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
+llamafactory-cli webui
 ```

-</details>
-
-#### Use Docker
+### Build Docker

 ```bash
-docker build -f ./Dockerfile -t llama-factory:latest .
-docker run --gpus=all \
+docker build -f ./Dockerfile \
+    --build-arg INSTALL_BNB=false \
+    --build-arg INSTALL_VLLM=false \
+    --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    -t llamafactory:latest .
+
+docker run -it --gpus=all \
    -v ./hf_cache:/root/.cache/huggingface/ \
    -v ./data:/app/data \
    -v ./output:/app/output \
    -p 7860:7860 \
+    -p 8000:8000 \
    --shm-size 16G \
-    --name llama_factory \
-    -d llama-factory:latest
+    --name llamafactory \
+    llamafactory:latest
 ```

-#### Use Docker Compose
-
-```bash
-docker compose -f ./docker-compose.yml up -d
-```
+> [!TIP]
+> Use Docker Compose to build image via `docker compose up -d`.

 <details><summary>Details about volume</summary>

--- a/README_zh.md
+++ b/README_zh.md
@@ -335,7 +335,7 @@ huggingface-cli login
 ```bash
 git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
 cd LLaMA-Factory
-pip install -e '.[torch,metrics]'
+pip install -e ".[torch,metrics]"
 ```

 可选的额外依赖项：torch、torch_npu、metrics、deepspeed、bitsandbytes、vllm、galore、badam、gptq、awq、aqlm、qwen、modelscope、quality
@@ -405,9 +405,9 @@ Docker 镜像：
 下面三行命令分别对 Llama3-8B-Instruct 模型进行 LoRA **微调**、**推理**和**合并**。

 ```bash
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
-CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
+llamafactory-cli train examples/lora_single_gpu/llama3_lora_sft.yaml
+llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
+llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
 ```

 高级用法请参考 [examples/README_zh.md](examples/README_zh.md)（包括多 GPU 微调）。
@@ -417,31 +417,33 @@ CUDA_VISIBLE_DEVICES=0 llamafactory-cli export examples/merge_lora/llama3_lora_s

 ### LLaMA Board 可视化微调（由 [Gradio](https://github.com/gradio-app/gradio) 驱动）

-#### 使用本地环境
-
 ```bash
-CUDA_VISIBLE_DEVICES=0 GRADIO_SHARE=1 llamafactory-cli webui
+llamafactory-cli webui
 ```

-#### 使用 Docker
+### 构建 Docker

 ```bash
-docker build -f ./Dockerfile -t llama-factory:latest .
-docker run --gpus=all \
+docker build -f ./Dockerfile \
+    --build-arg INSTALL_BNB=false \
+    --build-arg INSTALL_VLLM=false \
+    --build-arg INSTALL_DEEPSPEED=false \
+    --build-arg PIP_INDEX=https://pypi.org/simple \
+    -t llamafactory:latest .
+
+docker run -it --gpus=all \
    -v ./hf_cache:/root/.cache/huggingface/ \
    -v ./data:/app/data \
    -v ./output:/app/output \
    -p 7860:7860 \
+    -p 8000:8000 \
    --shm-size 16G \
-    --name llama_factory \
-    -d llama-factory:latest
+    --name llamafactory \
+    llamafactory:latest
 ```

-#### 使用 Docker Compose
-
-```bash
-docker compose -f ./docker-compose.yml up -d
-```
+> [!TIP]
+> 通过 `docker compose up -d` 使用 Docker Compose 构建镜像。

 <details><summary>数据卷详情</summary>

--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,17 +1,23 @@
 version: '3.8'

 services:
-  llama-factory:
+  llamafactory:
    build:
      dockerfile: Dockerfile
      context: .
-    container_name: llama_factory
+      args:
+        INSTALL_BNB: false
+        INSTALL_VLLM: false
+        INSTALL_DEEPSPEED: false
+        PIP_INDEX: https://pypi.org/simple
+    container_name: llamafactory
    volumes:
      - ./hf_cache:/root/.cache/huggingface/
      - ./data:/app/data
      - ./output:/app/output
    ports:
      - "7860:7860"
+      - "8000:8000"
    ipc: host
    deploy:
      resources:
--- a/src/llamafactory/eval/evaluator.py
+++ b/src/llamafactory/eval/evaluator.py
@@ -26,9 +26,7 @@ class Evaluator:
        self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
        self.model = load_model(self.tokenizer, self.model_args, finetuning_args)
        self.eval_template = get_eval_template(self.eval_args.lang)
-        self.choice_inputs = [
-            self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES
-        ]
+        self.choice_inputs = [self.tokenizer.encode(ch, add_special_tokens=False)[-1] for ch in CHOICES]

    @torch.inference_mode()
    def batch_inference(self, batch_input: Dict[str, torch.Tensor]) -> List[str]:
--- a/src/llamafactory/eval/template.py
+++ b/src/llamafactory/eval/template.py
@@ -10,7 +10,6 @@ class EvalTemplate:
    system: str
    choice: str
    answer: str
-    prefix: str

    def _parse_example(self, example: Dict[str, str]) -> Tuple[str, str]:
        r"""
@@ -42,8 +41,8 @@ class EvalTemplate:
 eval_templates: Dict[str, "EvalTemplate"] = {}


-def _register_eval_template(name: str, system: str, choice: str, answer: str, prefix: str) -> None:
-    eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer, prefix=prefix)
+def _register_eval_template(name: str, system: str, choice: str, answer: str) -> None:
+    eval_templates[name] = EvalTemplate(system=system, choice=choice, answer=answer)


 def get_eval_template(name: str) -> "EvalTemplate":
@@ -56,8 +55,7 @@ _register_eval_template(
    name="en",
    system="The following are multiple choice questions (with answers) about {subject}.\n\n",
    choice="\n{choice}. {content}",
-    answer="\nAnswer: ",
-    prefix=" ",
+    answer="\nAnswer:",
 )


@@ -66,5 +64,4 @@ _register_eval_template(
    system="以下是中国关于{subject}考试的单项选择题，请选出其中的正确答案。\n\n",
    choice="\n{choice}. {content}",
    answer="\n答案：",
-    prefix=" ",
 )
--- a/src/llamafactory/extras/env.py
+++ b/src/llamafactory/extras/env.py
@@ -6,13 +6,10 @@ import peft
 import torch
 import transformers
 import trl
-from transformers.integrations import is_deepspeed_available
-from transformers.utils import is_bitsandbytes_available, is_torch_cuda_available, is_torch_npu_available
-
-from .packages import is_vllm_available
+from transformers.utils import is_torch_cuda_available, is_torch_npu_available


-VERSION = "0.8.0"
+VERSION = "0.8.1"


 def print_env() -> None:
@@ -37,19 +34,25 @@ def print_env() -> None:
        info["NPU type"] = torch.npu.get_device_name()
        info["CANN version"] = torch.version.cann

-    if is_deepspeed_available():
+    try:
        import deepspeed  # type: ignore

        info["DeepSpeed version"] = deepspeed.__version__
+    except Exception:
+        pass

-    if is_bitsandbytes_available():
+    try:
        import bitsandbytes

        info["Bitsandbytes version"] = bitsandbytes.__version__
+    except Exception:
+        pass

-    if is_vllm_available():
+    try:
        import vllm

        info["vLLM version"] = vllm.__version__
+    except Exception:
+        pass

    print("\n" + "\n".join(["- {}: {}".format(key, value) for key, value in info.items()]) + "\n")
--- a/src/llamafactory/model/adapter.py
+++ b/src/llamafactory/model/adapter.py
@@ -209,6 +209,7 @@ def _setup_lora_tuning(
            "lora_alpha": finetuning_args.lora_alpha,
            "lora_dropout": finetuning_args.lora_dropout,
            "use_rslora": finetuning_args.use_rslora,
+            "use_dora": finetuning_args.use_dora,
            "modules_to_save": finetuning_args.additional_target,
        }

@@ -218,7 +219,6 @@ def _setup_lora_tuning(
            lora_config = LoraConfig(
                task_type=TaskType.CAUSAL_LM,
                inference_mode=False,
-                use_dora=finetuning_args.use_dora,
                **peft_kwargs,
            )
            model = get_peft_model(model, lora_config)
--- a/src/llamafactory/model/model_utils/longlora.py
+++ b/src/llamafactory/model/model_utils/longlora.py
@@ -96,7 +96,8 @@ def llama_attention_forward(
            (
                attn_output[:, :, : self.num_heads // 2],
                attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
-            )
+            ),
+            dim=2,
        )

    attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
@@ -194,7 +195,8 @@ def llama_flash_attention_2_forward(
            (
                attn_output[:, :, : self.num_heads // 2],
                attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
-            )
+            ),
+            dim=2,
        )

    attn_output = attn_output.reshape(bsz, q_len, self.hidden_size).contiguous()
@@ -293,7 +295,8 @@ def llama_sdpa_attention_forward(
            (
                attn_output[:, :, : self.num_heads // 2],
                attn_output[:, :, self.num_heads // 2 :].roll(groupsz // 2, dims=1),
-            )
+            ),
+            dim=2,
        )

    attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
@@ -303,7 +306,7 @@ def llama_sdpa_attention_forward(


 def _apply_llama_patch() -> None:
-    require_version("transformers==4.40.2", "To fix: pip install transformers==4.40.2")
+    require_version("transformers==4.41.2", "To fix: pip install transformers==4.41.2")
    LlamaAttention.forward = llama_attention_forward
    LlamaFlashAttention2.forward = llama_flash_attention_2_forward
    LlamaSdpaAttention.forward = llama_sdpa_attention_forward
--- a/tests/data/test_supervised.py
+++ b/tests/data/test_supervised.py
@@ -1,4 +1,5 @@
 import os
+import random

 import pytest
 from datasets import load_dataset
@@ -8,17 +9,17 @@ from llamafactory.hparams import get_train_args
 from llamafactory.model import load_tokenizer


-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")

-TRAINING_ARGS = {
+TRAIN_ARGS = {
    "model_name_or_path": TINY_LLAMA,
    "stage": "sft",
    "do_train": True,
    "finetuning_type": "full",
-    "dataset": "llamafactory/tiny_dataset",
+    "dataset": "llamafactory/tiny-supervised-dataset",
    "dataset_dir": "ONLINE",
    "template": "llama3",
-    "cutoff_len": 1024,
+    "cutoff_len": 8192,
    "overwrite_cache": True,
    "output_dir": "dummy_dir",
    "overwrite_output_dir": True,
@@ -26,19 +27,24 @@ TRAINING_ARGS = {
 }


-@pytest.mark.parametrize("test_num", [5])
-def test_supervised(test_num: int):
-    model_args, data_args, training_args, _, _ = get_train_args(TRAINING_ARGS)
+@pytest.mark.parametrize("num_samples", [10])
+def test_supervised(num_samples: int):
+    model_args, data_args, training_args, _, _ = get_train_args(TRAIN_ARGS)
    tokenizer_module = load_tokenizer(model_args)
    tokenizer = tokenizer_module["tokenizer"]
    tokenized_data = get_dataset(model_args, data_args, training_args, stage="sft", **tokenizer_module)

-    original_data = load_dataset(TRAINING_ARGS["dataset"], split="train")
-    for test_idx in range(test_num):
-        decode_result = tokenizer.decode(tokenized_data["input_ids"][test_idx])
+    original_data = load_dataset(TRAIN_ARGS["dataset"], split="train")
+    indexes = random.choices(range(len(original_data)), k=num_samples)
+    for index in indexes:
+        decoded_result = tokenizer.decode(tokenized_data["input_ids"][index])
+        prompt = original_data[index]["instruction"]
+        if original_data[index]["input"]:
+            prompt += "\n" + original_data[index]["input"]
+
        messages = [
-            {"role": "user", "content": original_data[test_idx]["instruction"]},
-            {"role": "assistant", "content": original_data[test_idx]["output"]},
+            {"role": "user", "content": prompt},
+            {"role": "assistant", "content": original_data[index]["output"]},
        ]
        templated_result = tokenizer.apply_chat_template(messages, tokenize=False)
-        assert decode_result == templated_result
+        assert decoded_result == templated_result
--- a/tests/eval/test_eval_template.py
+++ b/tests/eval/test_eval_template.py
@@ -0,0 +1,77 @@
+from llamafactory.eval.template import get_eval_template
+
+
+def test_eval_template_en():
+    support_set = [
+        {
+            "question": "Fewshot question",
+            "A": "Fewshot1",
+            "B": "Fewshot2",
+            "C": "Fewshot3",
+            "D": "Fewshot4",
+            "answer": "B",
+        }
+    ]
+    example = {
+        "question": "Target question",
+        "A": "Target1",
+        "B": "Target2",
+        "C": "Target3",
+        "D": "Target4",
+        "answer": "C",
+    }
+    template = get_eval_template(name="en")
+    messages = template.format_example(example, support_set=support_set, subject_name="SubName")
+    assert messages == [
+        {
+            "role": "user",
+            "content": (
+                "The following are multiple choice questions (with answers) about SubName.\n\n"
+                "Fewshot question\nA. Fewshot1\nB. Fewshot2\nC. Fewshot3\nD. Fewshot4\nAnswer:"
+            ),
+        },
+        {"role": "assistant", "content": "B"},
+        {
+            "role": "user",
+            "content": "Target question\nA. Target1\nB. Target2\nC. Target3\nD. Target4\nAnswer:",
+        },
+        {"role": "assistant", "content": "C"},
+    ]
+
+
+def test_eval_template_zh():
+    support_set = [
+        {
+            "question": "示例问题",
+            "A": "示例答案1",
+            "B": "示例答案2",
+            "C": "示例答案3",
+            "D": "示例答案4",
+            "answer": "B",
+        }
+    ]
+    example = {
+        "question": "目标问题",
+        "A": "目标答案1",
+        "B": "目标答案2",
+        "C": "目标答案3",
+        "D": "目标答案4",
+        "answer": "C",
+    }
+    template = get_eval_template(name="zh")
+    messages = template.format_example(example, support_set=support_set, subject_name="主题")
+    assert messages == [
+        {
+            "role": "user",
+            "content": (
+                "以下是中国关于主题考试的单项选择题，请选出其中的正确答案。\n\n"
+                "示例问题\nA. 示例答案1\nB. 示例答案2\nC. 示例答案3\nD. 示例答案4\n答案："
+            ),
+        },
+        {"role": "assistant", "content": "B"},
+        {
+            "role": "user",
+            "content": "目标问题\nA. 目标答案1\nB. 目标答案2\nC. 目标答案3\nD. 目标答案4\n答案：",
+        },
+        {"role": "assistant", "content": "C"},
+    ]
--- a/tests/model/model_utils/test_attention.py
+++ b/tests/model/model_utils/test_attention.py
@@ -6,7 +6,12 @@ from llamafactory.hparams import get_infer_args
 from llamafactory.model import load_model, load_tokenizer


-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+
+INFER_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "template": "llama3",
+}


 def test_attention():
@@ -23,13 +28,7 @@ def test_attention():
        "fa2": "LlamaFlashAttention2",
    }
    for requested_attention in attention_available:
-        model_args, _, finetuning_args, _ = get_infer_args(
-            {
-                "model_name_or_path": TINY_LLAMA,
-                "template": "llama2",
-                "flash_attn": requested_attention,
-            }
-        )
+        model_args, _, finetuning_args, _ = get_infer_args({"flash_attn": requested_attention, **INFER_ARGS})
        tokenizer_module = load_tokenizer(model_args)
        model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args)
        for module in model.modules():
--- a/tests/model/test_freeze.py
+++ b/tests/model/test_freeze.py
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
 from llamafactory.model import load_model, load_tokenizer


-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")

-TRAINING_ARGS = {
+TRAIN_ARGS = {
    "model_name_or_path": TINY_LLAMA,
    "stage": "sft",
    "do_train": True,
    "finetuning_type": "freeze",
-    "dataset": "llamafactory/tiny_dataset",
+    "dataset": "llamafactory/tiny-supervised-dataset",
    "dataset_dir": "ONLINE",
    "template": "llama3",
    "cutoff_len": 1024,
@@ -25,12 +25,7 @@ TRAINING_ARGS = {


 def test_freeze_all_modules():
-    model_args, _, _, finetuning_args, _ = get_train_args(
-        {
-            "freeze_trainable_layers": 1,
-            **TRAINING_ARGS,
-        }
-    )
+    model_args, _, _, finetuning_args, _ = get_train_args({"freeze_trainable_layers": 1, **TRAIN_ARGS})
    tokenizer_module = load_tokenizer(model_args)
    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
    for name, param in model.named_parameters():
@@ -44,11 +39,7 @@ def test_freeze_all_modules():

 def test_freeze_extra_modules():
    model_args, _, _, finetuning_args, _ = get_train_args(
-        {
-            "freeze_trainable_layers": 1,
-            "freeze_extra_modules": "embed_tokens,lm_head",
-            **TRAINING_ARGS,
-        }
+        {"freeze_trainable_layers": 1, "freeze_extra_modules": "embed_tokens,lm_head", **TRAIN_ARGS}
    )
    tokenizer_module = load_tokenizer(model_args)
    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
--- a/tests/model/test_full.py
+++ b/tests/model/test_full.py
@@ -6,14 +6,14 @@ from llamafactory.hparams import get_train_args
 from llamafactory.model import load_model, load_tokenizer


-TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-LlamaForCausalLM")
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")

-TRAINING_ARGS = {
+TRAIN_ARGS = {
    "model_name_or_path": TINY_LLAMA,
    "stage": "sft",
    "do_train": True,
    "finetuning_type": "full",
-    "dataset": "llamafactory/tiny_dataset",
+    "dataset": "llamafactory/tiny-supervised-dataset",
    "dataset_dir": "ONLINE",
    "template": "llama3",
    "cutoff_len": 1024,
@@ -25,7 +25,7 @@ TRAINING_ARGS = {


 def test_full():
-    model_args, _, _, finetuning_args, _ = get_train_args(TRAINING_ARGS)
+    model_args, _, _, finetuning_args, _ = get_train_args(TRAIN_ARGS)
    tokenizer_module = load_tokenizer(model_args)
    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
    for param in model.parameters():
--- a/tests/model/test_lora.py
+++ b/tests/model/test_lora.py
@@ -0,0 +1,63 @@
+import os
+
+import torch
+
+from llamafactory.hparams import get_train_args
+from llamafactory.model import load_model, load_tokenizer
+
+
+TINY_LLAMA = os.environ.get("TINY_LLAMA", "llamafactory/tiny-random-Llama-3")
+
+TRAIN_ARGS = {
+    "model_name_or_path": TINY_LLAMA,
+    "stage": "sft",
+    "do_train": True,
+    "finetuning_type": "lora",
+    "dataset": "llamafactory/tiny-supervised-dataset",
+    "dataset_dir": "ONLINE",
+    "template": "llama3",
+    "cutoff_len": 1024,
+    "overwrite_cache": True,
+    "output_dir": "dummy_dir",
+    "overwrite_output_dir": True,
+    "fp16": True,
+}
+
+
+def test_lora_all_modules():
+    model_args, _, _, finetuning_args, _ = get_train_args({"lora_target": "all", **TRAIN_ARGS})
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    linear_modules = set()
+    for name, param in model.named_parameters():
+        if any(module in name for module in ["lora_A", "lora_B"]):
+            linear_modules.add(name.split(".lora_", maxsplit=1)[0].split(".")[-1])
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
+
+    assert linear_modules == {"q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "gate_proj", "down_proj"}
+
+
+def test_lora_extra_modules():
+    model_args, _, _, finetuning_args, _ = get_train_args(
+        {"lora_target": "all", "additional_target": "embed_tokens,lm_head", **TRAIN_ARGS}
+    )
+    tokenizer_module = load_tokenizer(model_args)
+    model = load_model(tokenizer_module["tokenizer"], model_args, finetuning_args, is_trainable=True)
+    extra_modules = set()
+    for name, param in model.named_parameters():
+        if any(module in name for module in ["lora_A", "lora_B"]):
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        elif "modules_to_save" in name:
+            extra_modules.add(name.split(".modules_to_save", maxsplit=1)[0].split(".")[-1])
+            assert param.requires_grad is True
+            assert param.dtype == torch.float32
+        else:
+            assert param.requires_grad is False
+            assert param.dtype == torch.float16
+
+    assert extra_modules == {"embed_tokens", "lm_head"}
Author	SHA1	Message	Date
hiyouga	0f1e592326	release v0.8.1 Former-commit-id: 875a34f492701d1c644facbe9ede411af2931513	2024-06-11 00:44:26 +08:00
hiyouga	4d7dd0330d	fix #4160 The split heads should be concatenated in dim=2 Former-commit-id: 4b3f247f270d44df9fe226cfe0dabfb7fcd2deda	2024-06-11 00:37:17 +08:00
hiyouga	ea2ca2777f	fix #4145 Fix the docker image Former-commit-id: a9838281156fe870bfcde5d1f7afc15264fd4aad	2024-06-11 00:19:17 +08:00
hiyouga	4b2b92fd9a	update evaluator Former-commit-id: bb8661e62481ff7027b8969f3d8a6a17290c9da3	2024-06-10 23:56:00 +08:00
hiyouga	784088db3f	fix #2666 Former-commit-id: f121d5c4f94af9f165132c4309cb9bdc8217d985	2024-06-10 21:24:15 +08:00
hoshi-hiyouga	0ecf0d51e3	Merge pull request #4167 from yzoaim/branch fix README Former-commit-id: 1a877b0fbf54478dbf905fb3e84bd079a55bb725	2024-06-10 16:24:33 +08:00
-.-	041ecd0de1	fix README Former-commit-id: fa30028c0b83c38610b596209493a748b8ca0928	2024-06-08 23:51:56 +08:00
hiyouga	d812249db7	add pr ci Former-commit-id: 9b05bb8540b946d0c74bf804bcafc4a785d22c47	2024-06-08 21:25:35 +08:00
hiyouga	88528f1a87	Update tests.yml Former-commit-id: e90f0cc30d6bb819246ccc08935c39e714c179a1	2024-06-08 21:15:36 +08:00
hiyouga	82533114a7	update git workflows Former-commit-id: 5a3f26bc53433caa98b2a66294becaf156280a4c	2024-06-08 21:11:32 +08:00
hiyouga	6d9fbb3fa9	fix llamafactory-cli env Former-commit-id: b0515e5f42831b67d1f4d049999ecb68756e66db	2024-06-08 07:15:45 +08:00
hiyouga	9953ae3d03	set dev version Former-commit-id: 08b7fe1c452cc99264ff0312e310b579590c6a45	2024-06-08 06:46:09 +08:00