Former-commit-id: 830511a6d0216da99520aee8b3a753d347a71fa9
This commit is contained in:
hiyouga
2024-08-30 03:21:50 +08:00
parent 30d97ca879
commit 665db18661
8 changed files with 24 additions and 13 deletions

View File

@@ -62,7 +62,7 @@ def preprocess_unsupervised_dataset(
tokenizer: "PreTrainedTokenizer",
processor: Optional["ProcessorMixin"],
data_args: "DataArguments",
) -> Dict[str, List[List[int]]]:
) -> Dict[str, List[Any]]:
# build inputs with format `<bos> X` and labels with format `Y <eos>`
model_inputs = defaultdict(list)
for i in range(len(examples["prompt"])):