support infer 4bit model on GPUs #3023

Former-commit-id: 950a9dab9055839990656b2b40956792b253573d
This commit is contained in:
hiyouga
2024-04-01 17:34:04 +08:00
parent 61eb3a3d46
commit e7f13098c6
2 changed files with 14 additions and 6 deletions

View File

@@ -53,6 +53,10 @@ class ModelArguments:
default=True,
metadata={"help": "Whether or not to use double quantization in int4 training."},
)
quantization_device_map: Optional[Literal["auto"]] = field(
default=None,
metadata={"help": "Device map used for loading the 4-bit quantized model, needs bitsandbytes>=0.43.0."},
)
rope_scaling: Optional[Literal["linear", "dynamic"]] = field(
default=None,
metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."},