Merge branch 'hiyouga:main' into main
Former-commit-id: 1f3163509ecd05902ea216a905b4ca15ddd3696f
This commit is contained in:
@@ -324,6 +324,14 @@ register_model_group(
|
||||
DownloadSource.DEFAULT: "deepseek-ai/deepseek-moe-16b-chat",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-moe-16b-chat",
|
||||
},
|
||||
"DeepSeek-MoE-236B": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2",
|
||||
},
|
||||
"DeepSeek-MoE-236B-Chat": {
|
||||
DownloadSource.DEFAULT: "deepseek-ai/DeepSeek-V2-Chat",
|
||||
DownloadSource.MODELSCOPE: "deepseek-ai/DeepSeek-V2-Chat",
|
||||
},
|
||||
},
|
||||
template="deepseek",
|
||||
)
|
||||
@@ -569,6 +577,10 @@ register_model_group(
|
||||
DownloadSource.DEFAULT: "meta-llama/Meta-Llama-3-70B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/Meta-Llama-3-70B-Instruct",
|
||||
},
|
||||
"LLaMA3-8B-Chinese-Chat": {
|
||||
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat",
|
||||
},
|
||||
},
|
||||
template="llama3",
|
||||
)
|
||||
|
||||
@@ -57,3 +57,7 @@ def prepare_valuehead_model(model: "PreTrainedModel") -> None:
|
||||
if getattr(model.config, "model_type", None) == "chatglm":
|
||||
setattr(model, "lm_head", model.transformer.output_layer)
|
||||
setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
|
||||
|
||||
if getattr(model.config, "model_type", None) == "internlm2":
|
||||
setattr(model, "lm_head", model.output)
|
||||
setattr(model, "_keys_to_ignore_on_save", ["lm_head.weight"])
|
||||
|
||||
Reference in New Issue
Block a user