fix rm #420, fix template #426, fix #423

Former-commit-id: 70ea3caaa7a7695c77179cd1bb18707a80a373d7
This commit is contained in:
hiyouga
2023-08-09 16:23:31 +08:00
parent 031a819257
commit a3a7465f00
3 changed files with 7 additions and 10 deletions

View File

@@ -103,13 +103,13 @@ def preprocess_dataset(
if len(source_ids) > data_args.max_source_length:
source_ids = source_ids[:data_args.max_source_length]
if len(accept_ids) > data_args.max_target_length - 1: # eos token
if len(accept_ids) > data_args.max_target_length:
accept_ids = accept_ids[:data_args.max_target_length - 1]
if len(reject_ids) > data_args.max_target_length - 1: # eos token
if len(reject_ids) > data_args.max_target_length:
reject_ids = reject_ids[:data_args.max_target_length - 1]
accept_ids = source_ids + accept_ids + [tokenizer.eos_token_id]
reject_ids = source_ids + reject_ids + [tokenizer.eos_token_id]
accept_ids = source_ids + accept_ids
reject_ids = source_ids + reject_ids
model_inputs["accept_ids"].append(accept_ids)
model_inputs["reject_ids"].append(reject_ids)