mirror of
https://github.com/hiyouga/LlamaFactory.git
synced 2026-02-04 21:23:09 +00:00
[model] support youtu-vl model (#10152)
This commit is contained in:
@@ -2159,6 +2159,40 @@ class LFMVLPlugin(BasePlugin):
|
||||
return messages
|
||||
|
||||
|
||||
@dataclass
|
||||
class YoutuVLPlugin(BasePlugin):
|
||||
r"""Plugin for Youtu-VL vision-language models."""
|
||||
|
||||
vision_bos_token: str = "<|vision_start|>"
|
||||
vision_eos_token: str = "<|vision_end|>"
|
||||
|
||||
@override
|
||||
def process_messages(
|
||||
self,
|
||||
messages: list[dict[str, str]],
|
||||
images: list["ImageInput"],
|
||||
videos: list["VideoInput"],
|
||||
audios: list["AudioInput"],
|
||||
processor: Optional["MMProcessor"],
|
||||
) -> list[dict[str, str]]:
|
||||
self._validate_input(processor, images, videos, audios)
|
||||
self._validate_messages(messages, images, videos, audios)
|
||||
messages = deepcopy(messages)
|
||||
|
||||
for message in messages:
|
||||
content = message["content"]
|
||||
content = content.replace(
|
||||
IMAGE_PLACEHOLDER, f"{self.vision_bos_token}{self.image_token}{self.vision_eos_token}"
|
||||
)
|
||||
content = content.replace(
|
||||
VIDEO_PLACEHOLDER, f"{self.vision_bos_token}{self.video_token}{self.vision_eos_token}"
|
||||
)
|
||||
|
||||
message["content"] = content
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
PLUGINS = {
|
||||
"base": BasePlugin,
|
||||
"ernie_vl": ErnieVLPlugin,
|
||||
@@ -2181,6 +2215,7 @@ PLUGINS = {
|
||||
"qwen2_vl": Qwen2VLPlugin,
|
||||
"qwen3_vl": Qwen3VLPlugin,
|
||||
"video_llava": VideoLlavaPlugin,
|
||||
"youtu_vl": YoutuVLPlugin,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user