[data] fix gemma2 eos token (#8480)

Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
This commit is contained in:
Liu Jiajun
2025-06-27 18:19:15 +08:00
committed by GitHub
parent 2c26ce6ac4
commit 4f0da0aec9
6 changed files with 40 additions and 4 deletions

View File

@@ -951,6 +951,22 @@ register_template(
)
# copied from gemma template
register_template(
name="gemma2",
format_user=StringFormatter(slots=["<start_of_turn>user\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]),
format_assistant=StringFormatter(slots=["{{content}}<end_of_turn>\n"]),
format_system=StringFormatter(slots=["{{content}}\n\n"]),
format_observation=StringFormatter(
slots=["<start_of_turn>tool\n{{content}}<end_of_turn>\n<start_of_turn>model\n"]
),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
stop_words=["<eos>", "<end_of_turn>"],
efficient_eos=True,
template_class=Llama2Template,
)
# copied from gemma template
register_template(
name="gemma3",