[feature] Support MPO (#8930)
This commit is contained in:
@@ -134,6 +134,10 @@ class RLHFArguments:
|
||||
default=0.0,
|
||||
metadata={"help": "The supervised fine-tuning loss coefficient in DPO training."},
|
||||
)
|
||||
pref_bco_weight: float = field(
|
||||
default=0.0,
|
||||
metadata={"help": "The Binary Classifier Optimization coefficient in DPO training."},
|
||||
)
|
||||
pref_loss: Literal["sigmoid", "hinge", "ipo", "kto_pair", "orpo", "simpo"] = field(
|
||||
default="sigmoid",
|
||||
metadata={"help": "The type of DPO loss to use."},
|
||||
|
||||
Reference in New Issue
Block a user