support ppo score norm (trl 0.5.1.dev required)
Former-commit-id: 2b25db6d260ec1532281a592e873579346c7d21c
This commit is contained in:
@@ -61,6 +61,10 @@ class FinetuningArguments:
|
||||
default=True,
|
||||
metadata={"help": "Whether to resume training from the last LoRA weights or create new weights after merging them."}
|
||||
)
|
||||
ppo_score_norm: Optional[bool] = field(
|
||||
default=False,
|
||||
metadata={"help": "Use score normalization in PPO Training."}
|
||||
)
|
||||
dpo_beta: Optional[float] = field(
|
||||
default=0.1,
|
||||
metadata={"help": "The beta parameter for the DPO loss."}
|
||||
|
||||
Reference in New Issue
Block a user