[model] add Qwen2.5-Omni model (#7537)

* preserve image_sizes * preserve image_sizes * init plugin * support audio-text2text lora * nit * support image/video-text2text, audio-text2text * remove args * remove lines * add docs && nit * remove some comments * fix && add merge part script * add license
2025-03-31 20:39:35 +08:00
parent 0f8296626a
commit 7eed496336
10 changed files with 348 additions and 2 deletions
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -222,6 +222,10 @@ class ProcessorArguments:
        default=False,
        metadata={"help": "Use pan and scan to process image for gemma3."},
    )
+    use_audio_in_video: bool = field(
+        default=False,
+        metadata={"help": "Whether or not to use audio in video inputs."},
+    )
    video_max_pixels: int = field(
        default=256 * 256,
        metadata={"help": "The maximum number of pixels of video inputs."},