[data] support nested images input for videos (#8264)

This commit is contained in:
Kingsley
2025-06-03 20:26:29 +08:00
committed by GitHub
parent c4e51d40e0
commit c224d17cb2
2 changed files with 63 additions and 27 deletions

View File

@@ -51,12 +51,27 @@ class DatasetConverter:
else:
medias = medias[:]
if self.dataset_attr.load_from in ["script", "file"] and isinstance(medias[0], str):
for i in range(len(medias)):
if os.path.isfile(os.path.join(self.data_args.media_dir, medias[i])):
medias[i] = os.path.join(self.data_args.media_dir, medias[i])
else:
logger.warning_rank0_once(f"Media {medias[i]} does not exist in `media_dir`. Use original path.")
if self.dataset_attr.load_from in ["script", "file"]:
if isinstance(medias[0], str):
for i in range(len(medias)):
media_path = os.path.join(self.data_args.media_dir, medias[i])
if os.path.isfile(media_path):
medias[i] = media_path
else:
logger.warning_rank0_once(
f"Media {medias[i]} does not exist in `media_dir`. Use original path."
)
elif isinstance(medias[0], list): # for processed video frames
# medias is a list of lists, e.g., [[frame1.jpg, frame2.jpg], [frame3.jpg, frame4.jpg]]
for i in range(len(medias)):
for j in range(len(medias[i])):
media_path = os.path.join(self.data_args.media_dir, medias[i][j])
if os.path.isfile(media_path):
medias[i][j] = media_path
else:
logger.warning_rank0_once(
f"Media {medias[i][j]} does not exist in `media_dir`. Use original path."
)
return medias