update webui

Former-commit-id: da30d0fb4abdb825f3383ddd106bb06a84695b7a
2023-08-14 22:45:26 +08:00
parent 5fc5e776ff
commit 7f0b908de2
8 changed files with 47 additions and 78 deletions
--- a/src/llmtuner/extras/constants.py
+++ b/src/llmtuner/extras/constants.py
@@ -10,7 +10,13 @@ LAYERNORM_NAMES = ["norm", "ln_f", "ln_attn", "ln_mlp"]

 METHODS = ["full", "freeze", "lora"]

-STAGES = ["Supervised Finetuning", "Reward Modeling", "PPO", "DPO", "Pretraining"]
+STAGES = [
+    "SFT",
+    "Reward Modeling",
+    "PPO",
+    "DPO",
+    "Pre-Training"
+]

 SUPPORTED_MODELS = {
    "LLaMA-7B": "huggyllama/llama-7b",
@@ -23,6 +29,10 @@ SUPPORTED_MODELS = {
    "LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
    "LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf",
    "LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf",
+    "ChineseLLaMA2-7B": "ziqingyang/chinese-llama-2-7b",
+    "ChineseLLaMA2-13B": "ziqingyang/chinese-llama-2-13b",
+    "ChineseLLaMA2-7B-Chat": "ziqingyang/chinese-alpaca-2-7b",
+    "ChineseLLaMA2-13B-Chat": "ziqingyang/chinese-alpaca-2-13b",
    "BLOOM-560M": "bigscience/bloom-560m",
    "BLOOM-3B": "bigscience/bloom-3b",
    "BLOOM-7B1": "bigscience/bloom-7b1",
@@ -41,12 +51,13 @@ SUPPORTED_MODELS = {
    "Qwen-7B": "Qwen/Qwen-7B",
    "Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",
    "XVERSE-13B": "xverse/XVERSE-13B",
-    "ChatGLM2-6B": "THUDM/chatglm2-6b"
+    "ChatGLM2-6B-Chat": "THUDM/chatglm2-6b"
 }

 DEFAULT_MODULE = {
    "LLaMA": "q_proj,v_proj",
    "LLaMA2": "q_proj,v_proj",
+    "ChineseLLaMA2": "q_proj,v_proj",
    "BLOOM": "query_key_value",
    "BLOOMZ": "query_key_value",
    "Falcon": "query_key_value",
@@ -59,28 +70,9 @@ DEFAULT_MODULE = {

 DEFAULT_TEMPLATE = {
    "LLaMA2": "llama2",
+    "ChineseLLaMA2": "llama2_zh",
    "Baichuan": "baichuan",
    "InternLM": "intern",
    "Qwen": "chatml",
    "ChatGLM2": "chatglm2"
 }
-
-# huggingface model name prefix 2 template
-DEFAULT_TEMPLATE_WITH_CUSTOM_MODEL = {
-    "Llama-2": "llama2",
-    "chinese-alpaca-2": "llama2_zh",
-    "alpaca-7b-wdiff": "alpaca",
-    "vicuna": "vicuna",
-    "BELLE": "belle",
-    "Chinese-LLaMA-2": "linly",
-    "BiLLa": "billa",
-    "Ziya": "ziya",
-    "aquilachat": "aquila",
-    "internlm": "intern",
-    "aquilachat": "aquila",
-    "internlm": "intern",
-    "Baichuan":"baichuan",
-    "starchat":"starchat",
-    "Qwen":"chatml",
-    "chatglm2":"chatglm2"
-}
--- a/src/llmtuner/extras/misc.py
+++ b/src/llmtuner/extras/misc.py
@@ -95,7 +95,6 @@ def prepare_model_for_training(
    use_gradient_checkpointing: Optional[bool] = True,
    layer_norm_names: Optional[List[str]] = LAYERNORM_NAMES
 ) -> "PreTrainedModel":
-
    for name, param in model.named_parameters():
        if param.ndim == 1 and any(layer_norm_name in name for layer_norm_name in layer_norm_names):
            param.data = param.data.to(torch.float32)
@@ -112,9 +111,6 @@ def prepare_model_for_training(
        model.config.use_cache = False # turn off when gradient checkpointing is enabled

    if finetuning_type != "full" and hasattr(model, output_layer_name):
-        if hasattr(model, "config") and hasattr(model.config, "pretraining_tp"):
-            model.config.pretraining_tp = 1 # disable TP for LoRA (https://github.com/huggingface/peft/pull/728)
-
        output_layer: torch.nn.Linear = getattr(model, output_layer_name)
        input_dtype = output_layer.weight.dtype

--- a/src/llmtuner/extras/template.py
+++ b/src/llmtuner/extras/template.py
@@ -273,8 +273,8 @@ register_template(


 r"""
-Supports: https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
-          https://github.com/ymcui/Chinese-LLaMA-Alpaca-2
+Supports: https://github.com/ymcui/Chinese-LLaMA-Alpaca-2
+          https://huggingface.co/ziqingyang/chinese-alpaca-2-7b
 """
 register_template(
    name="llama2_zh",