support ppo score norm (trl 0.5.1.dev required)

Former-commit-id: 2b25db6d260ec1532281a592e873579346c7d21c
2023-08-18 12:02:42 +08:00
parent be4d2822ea
commit 2b191ca776
2 changed files with 10 additions and 0 deletions
--- a/src/llmtuner/hparams/finetuning_args.py
+++ b/src/llmtuner/hparams/finetuning_args.py
@@ -61,6 +61,10 @@ class FinetuningArguments:
        default=True,
        metadata={"help": "Whether to resume training from the last LoRA weights or create new weights after merging them."}
    )
+    ppo_score_norm: Optional[bool] = field(
+        default=False,
+        metadata={"help": "Use score normalization in PPO Training."}
+    )
    dpo_beta: Optional[float] = field(
        default=0.1,
        metadata={"help": "The beta parameter for the DPO loss."}