fix pretrain data loader

Former-commit-id: 2a812b706ecc527013e79edc504ec18a4193123d
This commit is contained in:
hiyouga
2024-01-18 14:42:52 +08:00
parent 546d6bd0b2
commit ffc874ec6f
2 changed files with 9 additions and 6 deletions

View File

@@ -25,11 +25,14 @@ def convert_alpaca(examples: Dict[str, List[Any]], dataset_attr: "DatasetAttr")
instruction += "\n" + examples[dataset_attr.query][i]
prompt.append({"role": Role.USER, "content": instruction})
if isinstance(examples[dataset_attr.response][i], list):
response = [{"role": Role.ASSISTANT, "content": content} for content in examples[dataset_attr.response][i]]
if dataset_attr.response:
if isinstance(examples[dataset_attr.response][i], list):
response = [{"role": Role.ASSISTANT, "content": content} for content in examples[dataset_attr.response][i]]
else:
response = [{"role": Role.ASSISTANT, "content": examples[dataset_attr.response][i]}]
else:
response = [{"role": Role.ASSISTANT, "content": examples[dataset_attr.response][i]}]
response = []
outputs["prompt"].append(prompt)
outputs["response"].append(response)
outputs["system"].append(examples[dataset_attr.system][i] if dataset_attr.system else "")