support SimPO #3900

Former-commit-id: 6b954ce60155cf8334150b795cfc4bb63ca74c8b
This commit is contained in:
hiyouga
2024-05-26 23:46:33 +08:00
parent 26f293d587
commit b0d9966663
19 changed files with 145 additions and 339 deletions

View File

@@ -50,10 +50,10 @@ class CustomKTOTrainer(KTOTrainer):
self._stored_metrics = defaultdict(lambda: defaultdict(list))
# kto hyperparams
self.beta = finetuning_args.kto_beta
self.beta = finetuning_args.pref_beta
self.desirable_weight = finetuning_args.kto_chosen_weight
self.undesirable_weight = finetuning_args.kto_rejected_weight
self.ftx_gamma = finetuning_args.kto_ftx
self.ftx_gamma = finetuning_args.pref_ftx
Trainer.__init__(self, model=model, **kwargs)
if not hasattr(self, "accelerator"):