add test cases
Former-commit-id: 731176ff34cdf0cbf6b41c40c69f4ceb54c2daf6
This commit is contained in:
@@ -136,10 +136,6 @@ class ModelArguments:
|
||||
default=8,
|
||||
metadata={"help": "Maximum rank of all LoRAs in the vLLM engine."},
|
||||
)
|
||||
vllm_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field(
|
||||
default="auto",
|
||||
metadata={"help": "Data type for model weights and activations in the vLLM engine."},
|
||||
)
|
||||
offload_folder: str = field(
|
||||
default="offload",
|
||||
metadata={"help": "Path to offload model weights."},
|
||||
@@ -148,6 +144,10 @@ class ModelArguments:
|
||||
default=True,
|
||||
metadata={"help": "Whether or not to use KV cache in generation."},
|
||||
)
|
||||
infer_dtype: Literal["auto", "float16", "bfloat16", "float32"] = field(
|
||||
default="auto",
|
||||
metadata={"help": "Data type for model weights and activations at inference."}
|
||||
)
|
||||
hf_hub_token: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Auth token to log in with Hugging Face Hub."},
|
||||
|
||||
Reference in New Issue
Block a user