[infer] vllm video/audio inference (#7566)

2025-04-02 02:27:04 +08:00
parent 2bfcad2394
commit 5e22597ff1
10 changed files with 329 additions and 285 deletions
--- a/src/llamafactory/hparams/model_args.py
+++ b/src/llamafactory/hparams/model_args.py
@@ -242,6 +242,10 @@ class ProcessorArguments:
        default=128,
        metadata={"help": "The maximum number of sampled frames for video inputs."},
    )
+    audio_sampling_rate: int = field(
+        default=16000,
+        metadata={"help": "The sampling rate of audio inputs."},
+    )

    def __post_init__(self):
        if self.image_max_pixels < self.image_min_pixels: