fix #2782 #2798

Former-commit-id: eb3ab610610a0964bc8a1c9fa015805353f04c31
2024-03-12 15:53:29 +08:00
parent 9ee416a8fc
commit c9ed3fc3a4
3 changed files with 19 additions and 2 deletions
--- a/src/llmtuner/hparams/model_args.py
+++ b/src/llmtuner/hparams/model_args.py
@@ -89,6 +89,14 @@ class ModelArguments:
        default=2048,
        metadata={"help": "Maximum input length of the vLLM engine."},
    )
+    vllm_gpu_util: float = field(
+        default=0.9,
+        metadata={"help": "The fraction of GPU memory in (0,1) to be used for the vLLM engine."},
+    )
+    vllm_enforce_eager: bool = field(
+        default=False,
+        metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."},
+    )
    hf_hub_token: Optional[str] = field(
        default=None,
        metadata={"help": "Auth token to log in with Hugging Face Hub."},