[model] support keye-vl-8b (#8776)

This commit is contained in:
Kingsley
2025-07-29 21:24:08 +08:00
committed by GitHub
parent 4e0bf35eb4
commit 52882d01c3
4 changed files with 41 additions and 2 deletions

View File

@@ -211,10 +211,10 @@ class MultiModalDataCollatorForSeq2Seq(DataCollatorForSeq2Seq):
if (
self.model is not None
and getattr(self.model.config, "model_type", None)
in ["glm4v", "qwen2_vl", "qwen2_5_vl", "qwen2_5_omni_thinker"]
in ["glm4v", "Keye", "qwen2_vl", "qwen2_5_vl", "qwen2_5_omni_thinker"]
and ("position_ids" not in features or features["position_ids"].dim() != 3)
):
raise ValueError("Qwen2-VL/Qwen2.5-Omni model requires 3D position ids for mrope.")
raise ValueError(f"{self.model.config.model_type} requires 3D position ids for mrope.")
if "cross_attention_mask" in mm_inputs: # for mllama inputs when pad_to_multiple_of is enabled
cross_attention_mask = mm_inputs.pop("cross_attention_mask")

View File

@@ -1171,6 +1171,24 @@ register_template(
)
# copied from qwen template
register_template(
name="keye_vl",
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="qwen"),
format_observation=StringFormatter(
slots=["<|im_start|>user\n<tool_response>\n{{content}}\n</tool_response><|im_end|>\n<|im_start|>assistant\n"]
),
format_tools=ToolFormatter(tool_format="qwen"),
stop_words=["<|im_end|>"],
replace_eos=True,
mm_plugin=get_mm_plugin(name="qwen2_vl", image_token="<|image_pad|>", video_token="<|video_pad|>"),
template_class=ReasoningTemplate,
)
register_template(
name="kimi_vl",
format_user=StringFormatter(