[model] Support Kimi_VL thinking/instruct (#7719)
* add kimi_vl * patch config * check version * Update mm_plugin.py * Update mm_plugin.py --------- Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
This commit is contained in:
@@ -54,6 +54,12 @@ def add_z3_leaf_module(model: "PreTrainedModel") -> None:
|
||||
|
||||
_set_z3_leaf_modules(model, [JetMoeMoA, JetMoeMoE])
|
||||
|
||||
if model_type in ["kimi_vl", "deepseek_v3"]:
|
||||
check_version("transformers>=4.51.1")
|
||||
from transformers.models.deepseek_v3.modeling_deepseek_v3 import DeepseekV3MoE
|
||||
|
||||
_set_z3_leaf_modules(model, [DeepseekV3MoE])
|
||||
|
||||
if model_type == "mixtral":
|
||||
from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
|
||||
|
||||
|
||||
@@ -117,6 +117,10 @@ def patch_config(
|
||||
setattr(config, "init_audio", True)
|
||||
setattr(config, "init_tts", False)
|
||||
|
||||
# replace the top-k gating method
|
||||
if getattr(config, "model_type", None) == "kimi_vl" and is_trainable:
|
||||
setattr(config.text_config, "topk_method", "greedy")
|
||||
|
||||
if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []):
|
||||
raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user