implement rm server #1543
Former-commit-id: 2e5bb6888c86079493456c2ddd525f8c52b9963e
This commit is contained in:
@@ -118,9 +118,9 @@ class RLHFArguments:
|
||||
default=None,
|
||||
metadata={"help": "The number of bits to quantize the reward model."}
|
||||
)
|
||||
reward_model_type: Optional[Literal["lora", "full"]] = field(
|
||||
reward_model_type: Optional[Literal["lora", "full", "api"]] = field(
|
||||
default="lora",
|
||||
metadata={"help": "The checkpoint type of the reward model. The lora type only supports lora training."}
|
||||
metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."}
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user