change to right-padding, update reward score #803

Former-commit-id: baa90415bc8f5ebd423d001378b51c3a3a6c2ec7
This commit is contained in:
hiyouga
2023-09-08 20:04:31 +08:00
parent bb1b67c076
commit 612d97db6f
15 changed files with 97 additions and 59 deletions

View File

@@ -87,7 +87,6 @@ class Runner:
save_steps: int,
warmup_steps: int,
compute_type: str,
padding_side: str,
lora_rank: int,
lora_dropout: float,
lora_target: str,
@@ -129,7 +128,6 @@ class Runner:
logging_steps=logging_steps,
save_steps=save_steps,
warmup_steps=warmup_steps,
padding_side=padding_side,
lora_rank=lora_rank,
lora_dropout=lora_dropout,
lora_target=lora_target or DEFAULT_MODULE.get(model_name.split("-")[0], "q_proj,v_proj"),
@@ -142,7 +140,6 @@ class Runner:
if args["stage"] == "ppo":
args["reward_model"] = reward_model
args["padding_side"] = "left"
val_size = 0
if args["stage"] == "dpo":