support orpo in webui

Former-commit-id: dd5cc78d4fb18dd0a2e9d57f0f046cfe9f0dc2c9
This commit is contained in:
hiyouga
2024-03-31 18:34:59 +08:00
parent d764cd8736
commit 1aba442bcd
3 changed files with 22 additions and 4 deletions

View File

@@ -174,10 +174,11 @@ class Runner:
]
)
args["reward_model_type"] = "lora" if args["finetuning_type"] == "lora" else "full"
if args["stage"] == "dpo":
elif args["stage"] == "dpo":
args["dpo_beta"] = get("train.dpo_beta")
args["dpo_ftx"] = get("train.dpo_ftx")
elif args["stage"] == "orpo":
args["orpo_beta"] = get("train.orpo_beta")
if get("train.val_size") > 1e-6 and args["stage"] != "ppo":
args["val_size"] = get("train.val_size")