fix chatglm2 tokenizer

Former-commit-id: 1ab60b4a93fa1be5dfe6ffbd4deb64c0f9d9b431
This commit is contained in:
hiyouga
2023-09-09 13:50:29 +08:00
parent 9f83e93839
commit 50e93392dd
4 changed files with 17 additions and 16 deletions

View File

@@ -72,6 +72,10 @@ def load_model_and_tokenizer(
**config_kwargs
)
# Fix tokenizer (for ChatGLM2)
if "PreTrainedTokenizerBase" not in str(tokenizer._pad.__func__):
tokenizer._pad = MethodType(PreTrainedTokenizerBase._pad, tokenizer)
if finetuning_args.finetuning_type == "full" and model_args.checkpoint_dir is not None:
model_to_load = model_args.checkpoint_dir[0]
else: