[data] refactor template (#6896)

Former-commit-id: f78d5a3eca947ed965ca2f6c87d60441b1a59867
This commit is contained in:
hoshi-hiyouga
2025-02-11 17:59:25 +08:00
committed by GitHub
parent b72c4bd118
commit 3f7bd98bfa
3 changed files with 180 additions and 147 deletions

View File

@@ -19,7 +19,6 @@ import pytest
from transformers import AutoTokenizer
from llamafactory.data import get_template_and_fix_tokenizer
from llamafactory.data.template import _get_jinja_template
from llamafactory.hparams import DataArguments
@@ -115,7 +114,7 @@ def test_jinja_template(use_fast: bool):
tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
ref_tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA, use_fast=use_fast)
template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
tokenizer.chat_template = _get_jinja_template(template, tokenizer) # llama3 template no replace
tokenizer.chat_template = template._get_jinja_template(tokenizer) # llama3 template no replace
assert tokenizer.chat_template != ref_tokenizer.chat_template
assert tokenizer.apply_chat_template(MESSAGES) == ref_tokenizer.apply_chat_template(MESSAGES)