[inference] fix stop token for object detection (#6624)

* fix stop token * update minicpm data pipeline * fix npu qlora examples Former-commit-id: 844919fadaa8a61dfae47020971ea80730b2346f
2025-01-13 21:34:20 +08:00
parent 11c38b9173
commit 2a05941b14
15 changed files with 101 additions and 45 deletions
--- a/tests/data/test_template.py
+++ b/tests/data/test_template.py
@@ -120,6 +120,12 @@ def test_jinja_template(use_fast: bool):
    assert tokenizer.apply_chat_template(MESSAGES) == ref_tokenizer.apply_chat_template(MESSAGES)


+def test_get_stop_token_ids():
+    tokenizer = AutoTokenizer.from_pretrained(TINY_LLAMA)
+    template = get_template_and_fix_tokenizer(tokenizer, DataArguments(template="llama3"))
+    assert set(template.get_stop_token_ids(tokenizer)) == {128008, 128009}
+
+
@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.")
@pytest.mark.parametrize("use_fast", [True, False])
 def test_gemma_template(use_fast: bool):