support infer 4bit model on GPUs #3023
Former-commit-id: 950a9dab9055839990656b2b40956792b253573d
This commit is contained in:
@@ -53,6 +53,10 @@ class ModelArguments:
|
||||
default=True,
|
||||
metadata={"help": "Whether or not to use double quantization in int4 training."},
|
||||
)
|
||||
quantization_device_map: Optional[Literal["auto"]] = field(
|
||||
default=None,
|
||||
metadata={"help": "Device map used for loading the 4-bit quantized model, needs bitsandbytes>=0.43.0."},
|
||||
)
|
||||
rope_scaling: Optional[Literal["linear", "dynamic"]] = field(
|
||||
default=None,
|
||||
metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."},
|
||||
|
||||
Reference in New Issue
Block a user