improve KTO impl., replace datasets

Former-commit-id: e56a57ddcf061de6e4acc8679f7dbf0b68364986
2024-05-18 03:44:56 +08:00
parent e4570e28a8
commit 2bff90719b
53 changed files with 448 additions and 330 deletions
--- a/src/llamafactory/train/dpo/trainer.py
+++ b/src/llamafactory/train/dpo/trainer.py
@@ -47,11 +47,13 @@ class CustomDPOTrainer(DPOTrainer):
        self._peft_has_been_casted_to_bf16 = False

        self.ref_model = ref_model
+        self._stored_metrics = defaultdict(lambda: defaultdict(list))
+
+        # dpo hyperparams
        self.beta = finetuning_args.dpo_beta
        self.label_smoothing = finetuning_args.dpo_label_smoothing
        self.loss_type = finetuning_args.dpo_loss
        self.ftx_gamma = finetuning_args.dpo_ftx
-        self._stored_metrics = defaultdict(lambda: defaultdict(list))

        Trainer.__init__(self, model=model, **kwargs)
        if not hasattr(self, "accelerator"):
@@ -143,6 +145,7 @@ class CustomDPOTrainer(DPOTrainer):
            policy_chosen_logits,
            policy_rejected_logits,
        ) = self.concatenated_forward(model, batch)
+
        with torch.no_grad():
            if self.ref_model is None:
                ref_model = self.model