implement rm server #1543

Former-commit-id: 2e5bb6888c86079493456c2ddd525f8c52b9963e
This commit is contained in:
hiyouga
2023-12-03 20:52:54 +08:00
parent 4a14099cfd
commit 29545d0e5e
11 changed files with 104 additions and 24 deletions

View File

@@ -118,9 +118,9 @@ class RLHFArguments:
default=None,
metadata={"help": "The number of bits to quantize the reward model."}
)
reward_model_type: Optional[Literal["lora", "full"]] = field(
reward_model_type: Optional[Literal["lora", "full", "api"]] = field(
default="lora",
metadata={"help": "The checkpoint type of the reward model. The lora type only supports lora training."}
metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."}
)