refactor data preprocessing, fix mllm rlhf

Former-commit-id: 53ff2dd24f9121ea30c95063bb72e49a9b31e980
2026-03-21 12:03:08 +00:00 · 2024-05-24 04:08:25 +08:00
parent 1078611259
commit bf59383783
15 changed files with 572 additions and 464 deletions
--- a/src/llamafactory/train/orpo/trainer.py
+++ b/src/llamafactory/train/orpo/trainer.py
@@ -85,9 +85,7 @@ class CustomORPOTrainer(DPOTrainer):
        r"""
        Computes the average log probabilities of the labels under the given logits.
        """
-        all_logits: "torch.Tensor" = model(
-            input_ids=batch["input_ids"], attention_mask=batch["attention_mask"], return_dict=True, use_cache=False
-        ).logits.to(torch.float32)
+        all_logits: "torch.Tensor" = model(**batch, return_dict=True, use_cache=False).logits.to(torch.float32)

        all_logps = self.get_batch_logps(
            logits=all_logits,