fix ppo args

Former-commit-id: 0f12899951808f53a482082eb116bda309775930
This commit is contained in:
hiyouga
2023-10-11 23:40:50 +08:00
parent 3198a7e5f4
commit 97b74d328b
4 changed files with 18 additions and 9 deletions

View File

@@ -57,7 +57,15 @@ class FinetuningArguments:
)
ppo_score_norm: Optional[bool] = field(
default=False,
metadata={"help": "Use score normalization in PPO Training."}
metadata={"help": "Use score normalization in PPO training."}
)
ppo_logger: Optional[str] = field(
default=None,
metadata={"help": "Log with either 'wandb' or 'tensorboard' in PPO training."}
)
ppo_target: Optional[float] = field(
default=6.0,
metadata={"help": "Target KL value for adaptive KL control in PPO training."}
)
dpo_beta: Optional[float] = field(
default=0.1,