[infer] vllm video/audio inference (#7566)

This commit is contained in:
hoshi-hiyouga
2025-04-02 02:27:04 +08:00
committed by GitHub
parent 2bfcad2394
commit 5e22597ff1
10 changed files with 329 additions and 285 deletions

View File

@@ -242,6 +242,10 @@ class ProcessorArguments:
default=128,
metadata={"help": "The maximum number of sampled frames for video inputs."},
)
audio_sampling_rate: int = field(
default=16000,
metadata={"help": "The sampling rate of audio inputs."},
)
def __post_init__(self):
if self.image_max_pixels < self.image_min_pixels: