add rlhf-v dataset

Former-commit-id: 3fd18fc34a0c994a738504746abfd5548e002437
This commit is contained in:
hiyouga
2024-09-01 22:57:41 +08:00
parent 7621526d22
commit 60cf12727b
12 changed files with 107 additions and 33 deletions

View File

@@ -176,7 +176,6 @@ class CustomDPOTrainer(DPOTrainer):
batch = {k: v.detach().clone() for k, v in batch.items()} # avoid error
all_logits: "torch.Tensor" = model(**batch, return_dict=True, use_cache=False).logits.to(torch.float32)
all_logps, valid_length = get_batch_logps(logits=all_logits, labels=batch["labels"])
if self.loss_type in ["ipo", "orpo", "simpo"]:
all_logps = all_logps / valid_length