improve fix tokenizer

Former-commit-id: 57b138abad6397596bc47be94e092e8fabedc06f
This commit is contained in:
hiyouga
2024-02-09 14:53:14 +08:00
parent 2c3ef480a6
commit b98a64448a
6 changed files with 105 additions and 71 deletions

View File

@@ -24,7 +24,7 @@ class Evaluator:
self.model, self.tokenizer = load_model_and_tokenizer(self.model_args, finetuning_args)
self.tokenizer.padding_side = "right" # avoid overflow issue in batched inference for llama2
self.model = dispatch_model(self.model)
self.template = get_template_and_fix_tokenizer(self.data_args.template, self.tokenizer)
self.template = get_template_and_fix_tokenizer(self.tokenizer, self.data_args.template)
self.eval_template = get_eval_template(self.eval_args.lang)
self.choice_inputs = [
self.tokenizer.encode(self.eval_template.prefix + ch, add_special_tokens=False)[-1] for ch in CHOICES