[data] update vlm args (#6976)
Former-commit-id: c28e710636a0286d4b8a1d494529b25168a8f3ab
This commit is contained in:
@@ -105,18 +105,18 @@ class MMPluginMixin:
|
||||
)
|
||||
|
||||
def _preprocess_image(
|
||||
self, image: "ImageObject", image_max_resolution: int, image_min_resolution: int, **kwargs
|
||||
self, image: "ImageObject", image_max_pixels: int, image_min_pixels: int, **kwargs
|
||||
) -> "ImageObject":
|
||||
r"""
|
||||
Pre-processes a single image.
|
||||
"""
|
||||
if (image.width * image.height) > image_max_resolution:
|
||||
resize_factor = math.sqrt(image_max_resolution / (image.width * image.height))
|
||||
if (image.width * image.height) > image_max_pixels:
|
||||
resize_factor = math.sqrt(image_max_pixels / (image.width * image.height))
|
||||
width, height = int(image.width * resize_factor), int(image.height * resize_factor)
|
||||
image = image.resize((width, height), resample=Image.Resampling.NEAREST)
|
||||
|
||||
if (image.width * image.height) < image_min_resolution:
|
||||
resize_factor = math.sqrt(image_min_resolution / (image.width * image.height))
|
||||
if (image.width * image.height) < image_min_pixels:
|
||||
resize_factor = math.sqrt(image_min_pixels / (image.width * image.height))
|
||||
width, height = int(image.width * resize_factor), int(image.height * resize_factor)
|
||||
image = image.resize((width, height), resample=Image.Resampling.NEAREST)
|
||||
|
||||
@@ -225,16 +225,16 @@ class MMPluginMixin:
|
||||
if len(images) != 0:
|
||||
images = self._regularize_images(
|
||||
images,
|
||||
image_max_resolution=getattr(processor, "image_max_resolution", 768 * 768),
|
||||
image_min_resolution=getattr(processor, "image_min_resolution", 32 * 32),
|
||||
image_max_pixels=getattr(processor, "image_max_pixels", 768 * 768),
|
||||
image_min_pixels=getattr(processor, "image_min_pixels", 32 * 32),
|
||||
)
|
||||
mm_inputs.update(image_processor(images, return_tensors="pt"))
|
||||
|
||||
if len(videos) != 0:
|
||||
videos = self._regularize_videos(
|
||||
videos,
|
||||
image_max_resolution=getattr(processor, "video_max_resolution", 256 * 256),
|
||||
image_min_resolution=getattr(processor, "video_min_resolution", 16 * 16),
|
||||
image_max_pixels=getattr(processor, "video_max_pixels", 256 * 256),
|
||||
image_min_pixels=getattr(processor, "video_min_pixels", 16 * 16),
|
||||
video_fps=getattr(processor, "video_fps", 2.0),
|
||||
video_maxlen=getattr(processor, "video_maxlen", 128),
|
||||
)
|
||||
@@ -616,8 +616,8 @@ class MiniCPMVPlugin(BasePlugin):
|
||||
if len(images) != 0:
|
||||
images = self._regularize_images(
|
||||
images,
|
||||
image_max_resolution=getattr(processor, "image_max_resolution", 768 * 768),
|
||||
image_min_resolution=getattr(processor, "image_min_resolution", 32 * 32),
|
||||
image_max_pixels=getattr(processor, "image_max_pixels", 768 * 768),
|
||||
image_min_pixels=getattr(processor, "image_min_pixels", 32 * 32),
|
||||
)
|
||||
if "valid_image_nums_ls" in kwargs:
|
||||
valid_image_nums_ls = kwargs["valid_image_nums_ls"]
|
||||
@@ -637,8 +637,8 @@ class MiniCPMVPlugin(BasePlugin):
|
||||
if len(videos) != 0:
|
||||
videos = self._regularize_videos(
|
||||
videos,
|
||||
image_max_resolution=getattr(processor, "video_max_resolution", 256 * 256),
|
||||
image_min_resolution=getattr(processor, "video_min_resolution", 16 * 16),
|
||||
image_max_pixels=getattr(processor, "video_max_pixels", 256 * 256),
|
||||
image_min_pixels=getattr(processor, "video_min_pixels", 16 * 16),
|
||||
video_fps=getattr(processor, "video_fps", 2.0),
|
||||
video_maxlen=getattr(processor, "video_maxlen", 128),
|
||||
)
|
||||
@@ -788,8 +788,8 @@ class MllamaPlugin(BasePlugin):
|
||||
image_processor: "BaseImageProcessor" = getattr(processor, "image_processor")
|
||||
images = self._regularize_images(
|
||||
images,
|
||||
image_max_resolution=getattr(processor, "image_max_resolution", 768 * 768),
|
||||
image_min_resolution=getattr(processor, "image_min_resolution", 32 * 32),
|
||||
image_max_pixels=getattr(processor, "image_max_pixels", 768 * 768),
|
||||
image_min_pixels=getattr(processor, "image_min_pixels", 32 * 32),
|
||||
)
|
||||
batch_images = []
|
||||
for image_length in imglens:
|
||||
@@ -1082,16 +1082,16 @@ class Qwen2vlPlugin(BasePlugin):
|
||||
if len(images) != 0:
|
||||
images = self._regularize_images(
|
||||
images,
|
||||
image_max_resolution=getattr(processor, "image_max_resolution", 768 * 768),
|
||||
image_min_resolution=getattr(processor, "image_min_resolution", 32 * 32),
|
||||
image_max_pixels=getattr(processor, "image_max_pixels", 768 * 768),
|
||||
image_min_pixels=getattr(processor, "image_min_pixels", 32 * 32),
|
||||
)
|
||||
mm_inputs.update(image_processor(images, return_tensors="pt"))
|
||||
|
||||
if len(videos) != 0:
|
||||
videos, fps_per_video = self._regularize_videos(
|
||||
videos,
|
||||
image_max_resolution=getattr(processor, "video_max_resolution", 256 * 256),
|
||||
image_min_resolution=getattr(processor, "video_min_resolution", 16 * 16),
|
||||
image_max_pixels=getattr(processor, "video_max_pixels", 256 * 256),
|
||||
image_min_pixels=getattr(processor, "video_min_pixels", 16 * 16),
|
||||
video_fps=getattr(processor, "video_fps", 2.0),
|
||||
video_maxlen=getattr(processor, "video_maxlen", 128),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user