Former-commit-id: 69e9ed9b96a7cfb3d3b43ec5ddd01aa0bfd9b784
This commit is contained in:
hiyouga
2023-08-01 18:43:53 +08:00
parent 250fecfcd4
commit 19256086c7
2 changed files with 12 additions and 12 deletions

View File

@@ -2,7 +2,7 @@ import os
import hashlib
from typing import TYPE_CHECKING, List, Optional
from datasets import concatenate_datasets, interleave_datasets, load_dataset
from datasets import Value, concatenate_datasets, interleave_datasets, load_dataset
from llmtuner.extras.logging import get_logger
@@ -93,7 +93,11 @@ def get_dataset(
dataset = dataset.rename_column(getattr(dataset_attr, column_name), column_name)
if dataset_attr.source_prefix: # add prefix
dataset = dataset.map(lambda _: {"prefix": dataset_attr.source_prefix})
features = None
if data_args.streaming:
features = dataset.features
features["prefix"] = Value(dtype="string", id=None)
dataset = dataset.map(lambda _: {"prefix": dataset_attr.source_prefix}, features=features)
all_datasets.append(dataset)