Former-commit-id: 830511a6d0216da99520aee8b3a753d347a71fa9
This commit is contained in:
hiyouga
2024-08-30 03:21:50 +08:00
parent 30d97ca879
commit 665db18661
8 changed files with 24 additions and 13 deletions

View File

@@ -70,7 +70,7 @@ def preprocess_pairwise_dataset(
tokenizer: "PreTrainedTokenizer",
processor: Optional["ProcessorMixin"],
data_args: "DataArguments",
) -> Dict[str, List[List[int]]]:
) -> Dict[str, List[Any]]:
# build input pairs with format `<bos> X`, `Y1 <eos>` and `Y2 <eos>`
model_inputs = defaultdict(list)
for i in range(len(examples["prompt"])):