fix PPO trainer #551 , update readme

Former-commit-id: faead74849470cebae9e37cde5fab2a71b32aa43
2023-08-18 11:43:10 +08:00
parent 736ddd0319
commit be4d2822ea
6 changed files with 42 additions and 53 deletions
--- a/src/llmtuner/hparams/finetuning_args.py
+++ b/src/llmtuner/hparams/finetuning_args.py
@@ -8,7 +8,7 @@ class FinetuningArguments:
    r"""
    Arguments pertaining to which techniques we are going to fine-tuning with.
    """
-    finetuning_type: Optional[Literal["none", "freeze", "lora", "full"]] = field(
+    finetuning_type: Optional[Literal["lora", "freeze", "full", "none"]] = field(
        default="lora",
        metadata={"help": "Which fine-tuning method to use."}
    )
@@ -49,7 +49,7 @@ class FinetuningArguments:
        metadata={"help": "Dropout rate for the LoRA fine-tuning."}
    )
    lora_target: Optional[str] = field(
-        default="q_proj,v_proj",
+        default=None,
        metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \
                  LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
                  BLOOM & Falcon choices: [\"query_key_value\", \"self_attention.dense\", \"mlp.dense\"], \
@@ -77,7 +77,7 @@ class FinetuningArguments:

        self.trainable_layers = ["{:d}.{}".format(idx, self.name_module_trainable) for idx in trainable_layer_ids]

-        assert self.finetuning_type in ["none", "freeze", "lora", "full"], "Invalid fine-tuning method."
+        assert self.finetuning_type in ["lora", "freeze", "full", "none"], "Invalid fine-tuning method."

    def save_to_json(self, json_path: str):
        r"""Saves the content of this instance in JSON format inside `json_path`."""