[model] support youtu-vl model (#10152)

This commit is contained in:
Hertz
2026-02-02 21:42:43 +08:00
committed by GitHub
parent bf04ca6af8
commit b53d7037c2
5 changed files with 95 additions and 0 deletions

View File

@@ -2159,6 +2159,40 @@ class LFMVLPlugin(BasePlugin):
return messages
@dataclass
class YoutuVLPlugin(BasePlugin):
r"""Plugin for Youtu-VL vision-language models."""
vision_bos_token: str = "<|vision_start|>"
vision_eos_token: str = "<|vision_end|>"
@override
def process_messages(
self,
messages: list[dict[str, str]],
images: list["ImageInput"],
videos: list["VideoInput"],
audios: list["AudioInput"],
processor: Optional["MMProcessor"],
) -> list[dict[str, str]]:
self._validate_input(processor, images, videos, audios)
self._validate_messages(messages, images, videos, audios)
messages = deepcopy(messages)
for message in messages:
content = message["content"]
content = content.replace(
IMAGE_PLACEHOLDER, f"{self.vision_bos_token}{self.image_token}{self.vision_eos_token}"
)
content = content.replace(
VIDEO_PLACEHOLDER, f"{self.vision_bos_token}{self.video_token}{self.vision_eos_token}"
)
message["content"] = content
return messages
PLUGINS = {
"base": BasePlugin,
"ernie_vl": ErnieVLPlugin,
@@ -2181,6 +2215,7 @@ PLUGINS = {
"qwen2_vl": Qwen2VLPlugin,
"qwen3_vl": Qwen3VLPlugin,
"video_llava": VideoLlavaPlugin,
"youtu_vl": YoutuVLPlugin,
}