mirror of
https://github.com/hiyouga/LlamaFactory.git
synced 2026-02-01 20:23:37 +00:00
[misc] fix import error (#9299)
This commit is contained in:
@@ -94,7 +94,7 @@ def _description_based_initialization(
|
||||
if len(valid_token_ids) == 0:
|
||||
# Fallback: use mean of all existing embeddings
|
||||
logger.warning_rank0(
|
||||
f"Description for token {i+1}/{num_new_tokens} contains no valid tokens. "
|
||||
f"Description for token {i + 1}/{num_new_tokens} contains no valid tokens. "
|
||||
"Using mean of existing embeddings."
|
||||
)
|
||||
base_embedding = embed_weight[:-num_new_tokens].mean(dim=0)
|
||||
|
||||
@@ -28,11 +28,11 @@ if TYPE_CHECKING:
|
||||
|
||||
def configure_kv_cache(config: "PretrainedConfig", model_args: "ModelArguments", is_trainable: bool) -> None:
|
||||
if not is_trainable:
|
||||
setattr(config, "use_cache", model_args.use_cache)
|
||||
setattr(config, "use_cache", model_args.use_kv_cache)
|
||||
if hasattr(config, "text_config"):
|
||||
setattr(config.text_config, "use_cache", model_args.use_cache)
|
||||
setattr(config.text_config, "use_cache", model_args.use_kv_cache)
|
||||
|
||||
if model_args.use_cache:
|
||||
if model_args.use_kv_cache:
|
||||
logger.info_rank0("KV cache is enabled for faster generation.")
|
||||
else:
|
||||
logger.info_rank0("KV cache is disabled.")
|
||||
|
||||
@@ -154,7 +154,17 @@ def configure_moe(config: "PretrainedConfig", model_args: "ModelArguments", is_t
|
||||
]:
|
||||
setattr(text_config, "output_router_logits", True)
|
||||
|
||||
if model_type in ["ernie4_5_moe", "granitemoe", "jamba", "llama4", "mixtral", "olmoe", "phimoe", "qwen2_moe", "qwen3_moe"]:
|
||||
if model_type in [
|
||||
"ernie4_5_moe",
|
||||
"granitemoe",
|
||||
"jamba",
|
||||
"llama4",
|
||||
"mixtral",
|
||||
"olmoe",
|
||||
"phimoe",
|
||||
"qwen2_moe",
|
||||
"qwen3_moe",
|
||||
]:
|
||||
setattr(config, "router_aux_loss_coef", model_args.moe_aux_loss_coef)
|
||||
|
||||
elif text_config and getattr(text_config, "model_type", None) in ["qwen3_moe"]:
|
||||
|
||||
Reference in New Issue
Block a user