[model] add Qwen2.5-Omni model (#7537)

* preserve image_sizes

* preserve image_sizes

* init plugin

* support audio-text2text lora

* nit

* support image/video-text2text, audio-text2text

* remove args

* remove lines

* add docs && nit

* remove some comments

* fix && add merge part script

* add license
This commit is contained in:
Kingsley
2025-03-31 20:39:35 +08:00
committed by GitHub
parent 0f8296626a
commit 7eed496336
10 changed files with 348 additions and 2 deletions

View File

@@ -257,6 +257,17 @@ _register_composite_model(
)
_register_composite_model(
model_type="qwen2_5_omni_thinker",
projector_key="visual.merger",
vision_model_keys=["visual.patch_embed", "visual.blocks", "audio_tower"],
language_model_keys=["model", "lm_head"],
lora_conflict_keys=[
"patch_embed",
],
)
_register_composite_model(
model_type="qwen2_vl",
projector_key="visual.merger",