feat: pass the max_lora_rank parameter to vLLM backend

Former-commit-id: a8756d839405ecb5deabe885cf11d1a61564deee
This commit is contained in:
juejuezi
2024-05-17 16:07:39 +08:00
parent a32c3a50fc
commit 6373d307ec
2 changed files with 2 additions and 0 deletions

View File

@@ -117,6 +117,7 @@ class ModelArguments:
default=False,
metadata={"help": "Whether or not to disable CUDA graph in the vLLM engine."},
)
vllm_max_lora_rank: int = field(default=8, metadata={"help": "The maximum supported rank of all LoRAs."})
offload_folder: str = field(
default="offload",
metadata={"help": "Path to offload model weights."},