[misc] fix import error (#9299)

This commit is contained in:
Yaowei Zheng
2025-10-17 17:46:27 +08:00
committed by GitHub
parent a442fa90ad
commit d9d67ba62d
7 changed files with 34 additions and 16 deletions

View File

@@ -94,7 +94,7 @@ def _description_based_initialization(
if len(valid_token_ids) == 0:
# Fallback: use mean of all existing embeddings
logger.warning_rank0(
f"Description for token {i+1}/{num_new_tokens} contains no valid tokens. "
f"Description for token {i + 1}/{num_new_tokens} contains no valid tokens. "
"Using mean of existing embeddings."
)
base_embedding = embed_weight[:-num_new_tokens].mean(dim=0)

View File

@@ -28,11 +28,11 @@ if TYPE_CHECKING:
def configure_kv_cache(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
if not is_trainable:
setattr(config, "use_cache", model_args.use_cache)
setattr(config, "use_cache", model_args.use_kv_cache)
if hasattr(config, "text_config"):
setattr(config.text_config, "use_cache", model_args.use_cache)
setattr(config.text_config, "use_cache", model_args.use_kv_cache)
if model_args.use_cache:
if model_args.use_kv_cache:
logger.info_rank0("KV cache is enabled for faster generation.")
else:
logger.info_rank0("KV cache is disabled.")

View File

@@ -154,7 +154,17 @@ def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_t
]:
setattr(text_config, "output_router_logits", True)
if model_type in ["ernie4_5_moe", "granitemoe", "jamba", "llama4", "mixtral", "olmoe", "phimoe", "qwen2_moe", "qwen3_moe"]:
if model_type in [
"ernie4_5_moe",
"granitemoe",
"jamba",
"llama4",
"mixtral",
"olmoe",
"phimoe",
"qwen2_moe",
"qwen3_moe",
]:
setattr(config, "router_aux_loss_coef", model_args.moe_aux_loss_coef)
elif text_config and getattr(text_config, "model_type", None) in ["qwen3_moe"]: