[model] Support Kimi_VL thinking/instruct (#7719)

* add kimi_vl

* patch config

* check version

* Update mm_plugin.py

* Update mm_plugin.py

---------

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
This commit is contained in:
Kingsley
2025-04-15 00:21:58 +08:00
committed by GitHub
parent 3f91a95250
commit 2101399c94
5 changed files with 90 additions and 1 deletions

View File

@@ -54,6 +54,12 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
_set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE])
if model_type in ["kimi_vl", "deepseek_v3"]:
check_version("transformers>=4.51.1")
from transformers.models.deepseek_v3.modeling_deepseek_v3 import DeepseekV3MoE
_set_z3_leaf_modules(model, [DeepseekV3MoE])
if model_type == "mixtral":
from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock

View File

@@ -117,6 +117,10 @@ def patch_config(
setattr(config, "init_audio", True)
setattr(config, "init_tts", False)
# replace the top-k gating method
if getattr(config, "model_type", None) == "kimi_vl" and is_trainable:
setattr(config.text_config, "topk_method", "greedy")
if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []):
raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf")