support ORPO

Former-commit-id: f44a4c27e2461cdaa1b16865f597a31033c0e6d9
This commit is contained in:
hiyouga
2024-03-31 18:29:50 +08:00
parent 526111a303
commit d764cd8736
22 changed files with 395 additions and 47 deletions

View File

@@ -134,6 +134,7 @@ class LogCallback(TrainerCallback):
eval_loss=state.log_history[-1].get("eval_loss", None),
predict_loss=state.log_history[-1].get("predict_loss", None),
reward=state.log_history[-1].get("reward", None),
accuracy=state.log_history[-1].get("rewards/accuracies", None),
learning_rate=state.log_history[-1].get("learning_rate", None),
epoch=state.log_history[-1].get("epoch", None),
percentage=round(self.cur_steps / self.max_steps * 100, 2) if self.max_steps != 0 else 100,