[model] Support Kimi_VL thinking/instruct (#7719)

* add kimi_vl * patch config * check version * Update mm_plugin.py * Update mm_plugin.py --------- Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
2025-04-15 00:21:58 +08:00
parent 3f91a95250
commit 2101399c94
5 changed files with 90 additions and 1 deletions
--- a/src/llamafactory/model/model_utils/moe.py
+++ b/src/llamafactory/model/model_utils/moe.py
@@ -54,6 +54,12 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:

        _set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE])

+    if model_type in ["kimi_vl", "deepseek_v3"]:
+        check_version("transformers>=4.51.1")
+        from transformers.models.deepseek_v3.modeling_deepseek_v3 import DeepseekV3MoE
+
+        _set_z3_leaf_modules(model, [DeepseekV3MoE])
+
    if model_type == "mixtral":
        from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock

--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -117,6 +117,10 @@ def patch_config(
        setattr(config, "init_audio", True)
        setattr(config, "init_tts", False)

+    # replace the top-k gating method
+    if getattr(config, "model_type", None) == "kimi_vl" and is_trainable:
+        setattr(config.text_config, "topk_method", "greedy")
+
    if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []):
        raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf")