support loftq
Former-commit-id: e7ac2eb7f7daae17525a278ffbe2f82c0fbd8093
This commit is contained in:
@@ -55,6 +55,10 @@ class LoraArguments:
|
||||
Phi-1.5 choices: [\"Wqkv\", \"out_proj\", \"fc1\", \"fc2\"], \
|
||||
Others choices: the same as LLaMA."}
|
||||
)
|
||||
loftq_init: Optional[bool] = field(
|
||||
default=False,
|
||||
metadata={"help": "Use LoftQ initialization for quantized LoRA fine-tuning."}
|
||||
)
|
||||
resume_lora_training: Optional[bool] = field(
|
||||
default=True,
|
||||
metadata={"help": "Whether to resume training from the last LoRA weights or create new weights after merging them."}
|
||||
|
||||
@@ -91,6 +91,16 @@ def init_adapter(
|
||||
else:
|
||||
target_modules = finetuning_args.lora_target
|
||||
|
||||
config_kwargs = {}
|
||||
if model_args.quantization_bit is not None and finetuning_args.loftq_init:
|
||||
if model_args.quantization_bit != 4:
|
||||
raise ValueError("LoftQ initialization only support 4-bit quantized training.")
|
||||
|
||||
from peft import LoftQConfig # type: ignore
|
||||
loftq_config = LoftQConfig(loftq_bits=4)
|
||||
config_kwargs["init_lora_weights"] = "loftq"
|
||||
config_kwargs["loftq_config"] = loftq_config
|
||||
|
||||
lora_config = LoraConfig(
|
||||
task_type=TaskType.CAUSAL_LM,
|
||||
inference_mode=False,
|
||||
@@ -98,7 +108,8 @@ def init_adapter(
|
||||
lora_alpha=finetuning_args.lora_alpha,
|
||||
lora_dropout=finetuning_args.lora_dropout,
|
||||
target_modules=target_modules,
|
||||
modules_to_save=finetuning_args.additional_target
|
||||
modules_to_save=finetuning_args.additional_target,
|
||||
**config_kwargs
|
||||
)
|
||||
model = get_peft_model(model, lora_config)
|
||||
|
||||
|
||||
@@ -144,28 +144,32 @@ def load_model_and_tokenizer(
|
||||
model_args.quantization_bit = None
|
||||
config_kwargs["device_map"] = {"": get_current_device()}
|
||||
quantization_config = getattr(config, "quantization_config", None)
|
||||
logger.info("Loading {}-bit quantized model.".format(quantization_config.get("bits", -1)))
|
||||
logger.info("Loading {}-bit pre-quantized model.".format(quantization_config.get("bits", -1)))
|
||||
|
||||
# Quantization configurations (using bitsandbytes library)
|
||||
# Quantization configurations (using bitsandbytes)
|
||||
if model_args.quantization_bit is not None:
|
||||
if is_deepspeed_zero3_enabled():
|
||||
raise ValueError("DeepSpeed ZeRO-3 is incompatible with quantization.")
|
||||
|
||||
if model_args.quantization_bit == 8:
|
||||
require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0")
|
||||
config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
|
||||
if finetuning_args.loftq_init:
|
||||
require_version("peft>=0.7.1.dev0", "To fix: pip install git+https://github.com/hiyouga/peft.git")
|
||||
logger.info("Skip bnb quantization because using loftq.")
|
||||
else:
|
||||
if model_args.quantization_bit == 8:
|
||||
require_version("bitsandbytes>=0.37.0", "To fix: pip install bitsandbytes>=0.37.0")
|
||||
config_kwargs["quantization_config"] = BitsAndBytesConfig(load_in_8bit=True)
|
||||
|
||||
if model_args.quantization_bit == 4:
|
||||
require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0")
|
||||
config_kwargs["quantization_config"] = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=model_args.compute_dtype,
|
||||
bnb_4bit_use_double_quant=model_args.double_quantization,
|
||||
bnb_4bit_quant_type=model_args.quantization_type
|
||||
)
|
||||
if model_args.quantization_bit == 4:
|
||||
require_version("bitsandbytes>=0.39.0", "To fix: pip install bitsandbytes>=0.39.0")
|
||||
config_kwargs["quantization_config"] = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=model_args.compute_dtype,
|
||||
bnb_4bit_use_double_quant=model_args.double_quantization,
|
||||
bnb_4bit_quant_type=model_args.quantization_type
|
||||
)
|
||||
|
||||
config_kwargs["device_map"] = {"": get_current_device()}
|
||||
logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit))
|
||||
config_kwargs["device_map"] = {"": get_current_device()}
|
||||
logger.info("Quantizing model to {} bit.".format(model_args.quantization_bit))
|
||||
|
||||
# Load pre-trained models (without valuehead)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
|
||||
Reference in New Issue
Block a user