implement rm server #1543

Former-commit-id: 2e5bb6888c86079493456c2ddd525f8c52b9963e
2023-12-03 20:52:54 +08:00
parent 4a14099cfd
commit 29545d0e5e
11 changed files with 104 additions and 24 deletions
--- a/src/llmtuner/hparams/finetuning_args.py
+++ b/src/llmtuner/hparams/finetuning_args.py
@@ -118,9 +118,9 @@ class RLHFArguments:
        default=None,
        metadata={"help": "The number of bits to quantize the reward model."}
    )
-    reward_model_type: Optional[Literal["lora", "full"]] = field(
+    reward_model_type: Optional[Literal["lora", "full", "api"]] = field(
        default="lora",
-        metadata={"help": "The checkpoint type of the reward model. The lora type only supports lora training."}
+        metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."}
    )