[deps] adapt to transformers v5 (#10147)

Co-authored-by: frozenleaves <frozen@Mac.local>
Co-authored-by: hiyouga <hiyouga@buaa.edu.cn>
This commit is contained in:
浮梦
2026-02-02 12:07:19 +08:00
committed by GitHub
parent 762b480131
commit bf04ca6af8
23 changed files with 149 additions and 120 deletions

View File

@@ -94,10 +94,10 @@ def check_version(requirement: str, mandatory: bool = False) -> None:
def check_dependencies() -> None:
r"""Check the version of the required packages."""
check_version("transformers>=4.51.0,<=4.57.1")
check_version("transformers>=4.51.0,<=5.0.0")
check_version("datasets>=2.16.0,<=4.0.0")
check_version("accelerate>=1.3.0,<=1.11.0")
check_version("peft>=0.14.0,<=0.17.1")
check_version("peft>=0.18.0,<=0.18.1")
check_version("trl>=0.18.0,<=0.24.0")

View File

@@ -65,7 +65,9 @@ class DataArguments:
)
mix_strategy: Literal["concat", "interleave_under", "interleave_over", "interleave_once"] = field(
default="concat",
metadata={"help": "Strategy to use in dataset mixing (concat/interleave) (undersampling/oversampling/sampling w.o. replacement)."},
metadata={
"help": "Strategy to use in dataset mixing (concat/interleave) (undersampling/oversampling/sampling w.o. replacement)."
},
)
interleave_probs: str | None = field(
default=None,

View File

@@ -206,9 +206,6 @@ class BaseModelArguments:
if self.model_name_or_path is None:
raise ValueError("Please provide `model_name_or_path`.")
if self.split_special_tokens and self.use_fast_tokenizer:
raise ValueError("`split_special_tokens` is only supported for slow tokenizers.")
if self.adapter_name_or_path is not None: # support merging multiple lora weights
self.adapter_name_or_path = [path.strip() for path in self.adapter_name_or_path.split(",")]

View File

@@ -139,10 +139,6 @@ def _verify_model_args(
if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1:
raise ValueError("Quantized model only accepts a single adapter. Merge them first.")
if data_args.template == "yi" and model_args.use_fast_tokenizer:
logger.warning_rank0("We should use slow tokenizer for the Yi models. Change `use_fast_tokenizer` to False.")
model_args.use_fast_tokenizer = False
def _check_extra_dependencies(
model_args: "ModelArguments",
@@ -188,9 +184,7 @@ def _check_extra_dependencies(
if training_args is not None:
if training_args.deepspeed:
# pin deepspeed version < 0.17 because of https://github.com/deepspeedai/DeepSpeed/issues/7347
check_version("deepspeed", mandatory=True)
check_version("deepspeed>=0.10.0,<=0.16.9")
if training_args.predict_with_generate:
check_version("jieba", mandatory=True)

View File

@@ -22,7 +22,6 @@ from transformers import (
AutoModelForImageTextToText,
AutoModelForSeq2SeqLM,
AutoModelForTextToWaveform,
AutoModelForVision2Seq,
AutoProcessor,
AutoTokenizer,
)
@@ -166,11 +165,9 @@ def load_model(
else:
if type(config) in AutoModelForImageTextToText._model_mapping.keys(): # image-text
load_class = AutoModelForImageTextToText
elif type(config) in AutoModelForVision2Seq._model_mapping.keys(): # image-text
load_class = AutoModelForVision2Seq
elif type(config) in AutoModelForSeq2SeqLM._model_mapping.keys(): # audio-text
load_class = AutoModelForSeq2SeqLM
elif type(config) in AutoModelForTextToWaveform._model_mapping.keys(): # audio hack for qwen omni
elif type(config) in AutoModelForTextToWaveform._model_mapping.keys(): # audio-text for qwen omni
load_class = AutoModelForTextToWaveform
else:
load_class = AutoModelForCausalLM

View File

@@ -374,7 +374,13 @@ _register_composite_model(
_register_composite_model(
model_type="qwen3_omni_moe_thinker",
projector_key="visual.merger",
vision_model_keys=["visual.pos_embed", "visual.patch_embed", "visual.blocks", "visual.deepstack_merger_list", "audio_tower"],
vision_model_keys=[
"visual.pos_embed",
"visual.patch_embed",
"visual.blocks",
"visual.deepstack_merger_list",
"audio_tower",
],
language_model_keys=["model", "lm_head"],
lora_conflict_keys=["patch_embed"],
)

View File

@@ -103,7 +103,9 @@ class FixValueHeadModelCallback(TrainerCallback):
if args.should_save:
output_dir = os.path.join(args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}")
fix_valuehead_checkpoint(
model=kwargs.pop("model"), output_dir=output_dir, safe_serialization=args.save_safetensors
model=kwargs.pop("model"),
output_dir=output_dir,
safe_serialization=getattr(args, "save_safetensors", True),
)
@@ -137,7 +139,7 @@ class PissaConvertCallback(TrainerCallback):
if isinstance(model, PeftModel):
init_lora_weights = getattr(model.peft_config["default"], "init_lora_weights")
setattr(model.peft_config["default"], "init_lora_weights", True)
model.save_pretrained(pissa_init_dir, safe_serialization=args.save_safetensors)
model.save_pretrained(pissa_init_dir, safe_serialization=getattr(args, "save_safetensors", True))
setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights)
@override
@@ -155,11 +157,11 @@ class PissaConvertCallback(TrainerCallback):
if isinstance(model, PeftModel):
init_lora_weights = getattr(model.peft_config["default"], "init_lora_weights")
setattr(model.peft_config["default"], "init_lora_weights", True)
model.save_pretrained(pissa_backup_dir, safe_serialization=args.save_safetensors)
model.save_pretrained(pissa_backup_dir, safe_serialization=getattr(args, "save_safetensors", True))
setattr(model.peft_config["default"], "init_lora_weights", init_lora_weights)
model.save_pretrained(
pissa_convert_dir,
safe_serialization=args.save_safetensors,
safe_serialization=getattr(args, "save_safetensors", True),
path_initial_model_for_weight_conversion=pissa_init_dir,
)
model.load_adapter(pissa_backup_dir, "default", is_trainable=True)

View File

@@ -72,7 +72,7 @@ def run_ppo(
ppo_trainer.ppo_train(resume_from_checkpoint=training_args.resume_from_checkpoint)
ppo_trainer.save_model()
if training_args.should_save:
fix_valuehead_checkpoint(model, training_args.output_dir, training_args.save_safetensors)
fix_valuehead_checkpoint(model, training_args.output_dir, getattr(training_args, "save_safetensors", True))
ppo_trainer.save_state() # must be called after save_model to have a folder
if ppo_trainer.is_world_process_zero() and finetuning_args.plot_loss:

View File

@@ -114,7 +114,7 @@ class PairwiseTrainer(Trainer):
if state_dict is None:
state_dict = self.model.state_dict()
if self.args.save_safetensors:
if getattr(self.args, "save_safetensors", True):
from collections import defaultdict
ptrs = defaultdict(list)

View File

@@ -65,7 +65,7 @@ def run_rm(
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
trainer.save_model()
if training_args.should_save:
fix_valuehead_checkpoint(model, training_args.output_dir, training_args.save_safetensors)
fix_valuehead_checkpoint(model, training_args.output_dir, getattr(training_args, "save_safetensors", True))
trainer.log_metrics("train", train_result.metrics)
trainer.save_metrics("train", train_result.metrics)