Former-commit-id: ec51986cf70b0bdd79b8141e45916670fb97a08e
This commit is contained in:
enji.zhou
2024-05-17 13:09:17 +08:00
parent 92b3697e2c
commit 66b5634ebf
12 changed files with 452 additions and 8 deletions

View File

@@ -28,6 +28,7 @@ class DatasetAttr:
""" columns """
system: Optional[str] = None
images: Optional[str] = None
tag: Optional[bool] = None
""" columns for the alpaca format """
prompt: Optional[str] = "instruction"
query: Optional[str] = "input"
@@ -106,7 +107,7 @@ def get_dataset_list(data_args: "DataArguments") -> List["DatasetAttr"]:
dataset_attr.set_attr("formatting", dataset_info[name], default="alpaca")
if "columns" in dataset_info[name]:
column_names = ["system", "images"]
column_names = ["system", "images", "tag"]
if dataset_attr.formatting == "alpaca":
column_names.extend(["prompt", "query", "response", "history"])
else: