@@ -2,7 +2,7 @@ import os
|
||||
import hashlib
|
||||
from typing import TYPE_CHECKING, List, Optional
|
||||
|
||||
from datasets import concatenate_datasets, interleave_datasets, load_dataset
|
||||
from datasets import Value, concatenate_datasets, interleave_datasets, load_dataset
|
||||
|
||||
from llmtuner.extras.logging import get_logger
|
||||
|
||||
@@ -93,7 +93,11 @@ def get_dataset(
|
||||
dataset = dataset.rename_column(getattr(dataset_attr, column_name), column_name)
|
||||
|
||||
if dataset_attr.source_prefix: # add prefix
|
||||
dataset = dataset.map(lambda _: {"prefix": dataset_attr.source_prefix})
|
||||
features = None
|
||||
if data_args.streaming:
|
||||
features = dataset.features
|
||||
features["prefix"] = Value(dtype="string", id=None)
|
||||
dataset = dataset.map(lambda _: {"prefix": dataset_attr.source_prefix}, features=features)
|
||||
|
||||
all_datasets.append(dataset)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user