[trainer] Add LD-DPO objective (#8362)

This commit is contained in:
Aman Gupta
2025-06-12 01:10:38 -07:00
committed by GitHub
parent 44f1b9b5ad
commit 8e4ac78607
3 changed files with 35 additions and 5 deletions

View File

@@ -202,6 +202,10 @@ class RLHFArguments:
default="lora",
metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."},
)
ld_alpha: Optional[float] = field(
default=None,
metadata={"help": "α parameter from the LD-DPO paper, which controls the weighting of the verbose token log-probabilities in responses"},
)
@dataclass