add models

Former-commit-id: b9eaadde8b5f4b9f89fa7bb910b325fcf9c84434
2023-11-30 19:16:13 +08:00
parent 08d5340bd8
commit 7ef8f46591
4 changed files with 53 additions and 10 deletions
--- a/src/llmtuner/data/template.py
+++ b/src/llmtuner/data/template.py
@@ -408,18 +408,31 @@ register_template(
        "{{system}}"
    ],
    prompt=[
-        "### Instruction:\n{{query}}\n\n### Response:\n"
+        "User: {{query}}\n\nAssistant:"
+    ],
+    system="",
+    sep=[]
+)
+
+
+register_template(
+    name="deepseekcoder",
+    prefix=[
+        "{{system}}"
+    ],
+    prompt=[
+        "### Instruction:\n{{query}}\n### Response:\n"
    ],
    system=(
        "You are an AI programming assistant, utilizing the Deepseek Coder model, "
        "developed by Deepseek Company, and you only answer questions related to computer science. "
        "For politically sensitive questions, security and privacy issues, "
-        "and other non-computer science questions, you will refuse to answer."
+        "and other non-computer science questions, you will refuse to answer\n"
    ),
    sep=[
        "\n",
        {"token": "<|EOT|>"},
-        "\n\n"
+        "\n"
    ],
    stop_words=[
        "<|EOT|>"
--- a/src/llmtuner/extras/constants.py
+++ b/src/llmtuner/extras/constants.py
@@ -131,6 +131,28 @@ register_model_group(
 )


+register_model_group(
+    models={
+        "DeepseekLLM-7B-Base": "deepseek-ai/deepseek-llm-7b-base",
+        "DeepseekLLM-67B-Base": "deepseek-ai/deepseek-llm-67b-base",
+        "DeepseekLLM-7B-Chat": "deepseek-ai/deepseek-llm-7b-chat",
+        "DeepseekLLM-67B-Chat": "deepseek-ai/deepseek-llm-67b-chat"
+    },
+    template="deepseek"
+)
+
+
+register_model_group(
+    models={
+        "DeepseekCoder-6.7B-Base": "deepseek-ai/deepseek-coder-6.7b-base",
+        "DeepseekCoder-6.7B-Chat": "deepseek-ai/deepseek-coder-6.7b-instruct",
+        "DeepseekCoder-33B-Base": "deepseek-ai/deepseek-coder-33b-base",
+        "DeepseekCoder-33B-Chat": "deepseek-ai/deepseek-coder-33b-instruct"
+    },
+    template="deepseekcoder"
+)
+
+
 register_model_group(
    models={
        "Falcon-7B": "tiiuae/falcon-7b",
@@ -214,14 +236,22 @@ register_model_group(

 register_model_group(
    models={
+        "Qwen-1.8B": "Qwen/Qwen-1_8B",
        "Qwen-7B": "Qwen/Qwen-7B",
        "Qwen-14B": "Qwen/Qwen-14B",
+        "Qwen-72B": "Qwen/Qwen-72B",
+        "Qwen-1.8B-Chat": "Qwen/Qwen-1_8B-Chat",
        "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
        "Qwen-14B-Chat": "Qwen/Qwen-14B-Chat",
+        "Qwen-72B-Chat": "Qwen/Qwen-72B-Chat",
+        "Qwen-1.8B-int8-Chat": "Qwen/Qwen-1_8B-Chat-Int8",
+        "Qwen-1.8B-int4-Chat": "Qwen/Qwen-1_8B-Chat-Int4",
        "Qwen-7B-int8-Chat": "Qwen/Qwen-7B-Chat-Int8",
        "Qwen-7B-int4-Chat": "Qwen/Qwen-7B-Chat-Int4",
        "Qwen-14B-int8-Chat": "Qwen/Qwen-14B-Chat-Int8",
-        "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4"
+        "Qwen-14B-int4-Chat": "Qwen/Qwen-14B-Chat-Int4",
+        "Qwen-72B-int8-Chat": "Qwen/Qwen-72B-Chat-Int8",
+        "Qwen-72B-int4-Chat": "Qwen/Qwen-72B-Chat-Int4"
    },
    module="c_attn",
    template="qwen"