fix PPO trainer #551 , update readme

Former-commit-id: faead74849470cebae9e37cde5fab2a71b32aa43
This commit is contained in:
hiyouga
2023-08-18 11:43:10 +08:00
parent 736ddd0319
commit be4d2822ea
6 changed files with 42 additions and 53 deletions

View File

@@ -8,7 +8,7 @@ class FinetuningArguments:
r"""
Arguments pertaining to which techniques we are going to fine-tuning with.
"""
finetuning_type: Optional[Literal["none", "freeze", "lora", "full"]] = field(
finetuning_type: Optional[Literal["lora", "freeze", "full", "none"]] = field(
default="lora",
metadata={"help": "Which fine-tuning method to use."}
)
@@ -49,7 +49,7 @@ class FinetuningArguments:
metadata={"help": "Dropout rate for the LoRA fine-tuning."}
)
lora_target: Optional[str] = field(
default="q_proj,v_proj",
default=None,
metadata={"help": "Name(s) of target modules to apply LoRA. Use commas to separate multiple modules. \
LLaMA choices: [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\", \"gate_proj\", \"up_proj\", \"down_proj\"], \
BLOOM & Falcon choices: [\"query_key_value\", \"self_attention.dense\", \"mlp.dense\"], \
@@ -77,7 +77,7 @@ class FinetuningArguments:
self.trainable_layers = ["{:d}.{}".format(idx, self.name_module_trainable) for idx in trainable_layer_ids]
assert self.finetuning_type in ["none", "freeze", "lora", "full"], "Invalid fine-tuning method."
assert self.finetuning_type in ["lora", "freeze", "full", "none"], "Invalid fine-tuning method."
def save_to_json(self, json_path: str):
r"""Saves the content of this instance in JSON format inside `json_path`."""