add llava and instructblip

Former-commit-id: 142fb6f4541a1acfefe66ff2574dabde53b00c06
This commit is contained in:
BUAADreamer
2024-04-25 00:22:43 +08:00
parent 1451297c78
commit 12c51655ce
16 changed files with 273 additions and 214 deletions

View File

@@ -199,8 +199,7 @@ def get_mm_dataset(
with training_args.main_process_first(desc="load dataset"):
all_datasets = []
for dataset_attr in get_dataset_list(data_args):
local_path = os.path.join(data_args.dataset_dir, dataset_attr.dataset_name)
all_datasets.append(load_dataset("json", data_files=local_path)['train'])
all_datasets.append(load_dataset(dataset_attr.dataset_name)['train'])
dataset = merge_dataset(all_datasets, data_args, training_args)
return dataset

View File

@@ -275,4 +275,4 @@ def get_preprocess_and_print_func(
)
print_function = partial(print_unsupervised_dataset_example, tokenizer=tokenizer)
return preprocess_func, print_function
return preprocess_func, print_function