Support InternLM3 Dense 8B Model (#6640)

* support internlm3

* update

* update

* update

* add hint

Former-commit-id: 24ab7ae0944c5f373e9cac60f0332e704824a057
This commit is contained in:
Haian Huang(深度眸)
2025-01-14 18:07:27 +08:00
committed by GitHub
parent 381f7120e6
commit 1bb06e06df
5 changed files with 26 additions and 0 deletions

View File

@@ -732,6 +732,14 @@ _register_template(
stop_words=["<|im_end|>"],
)
_register_template(
name="intern3",
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
format_prefix=EmptyFormatter(slots=[{"bos_token"}]),
stop_words=["<|im_end|>"]
)
_register_template(
name="llama2",

View File

@@ -817,6 +817,15 @@ register_model_group(
template="intern2",
)
register_model_group(
models={
"InternLM3-8B-Chat": {
DownloadSource.DEFAULT: "internlm/internlm3-8b-instruct",
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm3-8b-instruct",
},
},
template="intern3",
)
register_model_group(
models={

View File

@@ -24,6 +24,7 @@ from transformers.modeling_utils import is_fsdp_enabled
from ..extras import logging
from ..extras.misc import infer_optim_dtype
from ..extras.packages import is_transformers_version_greater_than
from .model_utils.attention import configure_attn_implementation, print_attn_implementation
from .model_utils.checkpointing import prepare_model_for_training
from .model_utils.embedding import resize_embedding_layer
@@ -117,6 +118,9 @@ def patch_config(
if "LlavaLlamaForCausalLM" in getattr(config, "architectures", []):
raise ValueError("Please download llava models with hf-compatible format: https://huggingface.co/llava-hf")
if getattr(config, "model_type", None) == "internlm3" and not is_transformers_version_greater_than("4.47.1"):
raise RuntimeError("InternLM3 model requires transformers >= 4.47.1, please upgrade it.")
# deepspeed zero3 is not compatible with low_cpu_mem_usage
init_kwargs["low_cpu_mem_usage"] = model_args.low_cpu_mem_usage and (not is_deepspeed_zero3_enabled())