fix system prompt

Former-commit-id: 411e775aa939bdd154a3f1e92921ede90d989f18
This commit is contained in:
hiyouga
2023-08-16 01:35:52 +08:00
parent ca9a494d0c
commit baa709674f
15 changed files with 170 additions and 152 deletions

View File

@@ -92,14 +92,13 @@ def get_dataset(
if getattr(dataset_attr, column_name) and getattr(dataset_attr, column_name) != column_name:
dataset = dataset.rename_column(getattr(dataset_attr, column_name), column_name)
if dataset_attr.source_prefix: # add prefix
if dataset_attr.system_prompt: # add system prompt
if data_args.streaming:
features = dataset.features
features["prefix"] = Value(dtype="string", id=None)
dataset = dataset.map(lambda _: {"prefix": dataset_attr.source_prefix}, features=features)
features["system"] = Value(dtype="string", id=None)
dataset = dataset.map(lambda _: {"system": dataset_attr.system_prompt}, features=features)
else:
prefix_data = [dataset_attr.source_prefix] * len(dataset)
dataset = dataset.add_column("prefix", prefix_data)
dataset = dataset.add_column("system", [dataset_attr.system_prompt] * len(dataset))
all_datasets.append(dataset)