[fix] qwen3.5 projector path (#10242 )

Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>
[model] support Qwen3.5 all series models (#10237 )
2026-03-22 12:23:08 +00:00 · 2026-03-04 01:31:09 +08:00 · 2026-03-03 17:34:59 +08:00 · 2026-03-03 17:19:37 +08:00
7 changed files with 122 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -319,7 +319,7 @@ Read technical notes:
 | [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral              |
 | [Qwen2 (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)          | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                 |
 | [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink  |
-| [Qwen3.5](https://huggingface.co/Qwen)                            | 27B/35B/122B/397B                | qwen3_5              |
+| [Qwen3.5](https://huggingface.co/Qwen)                            | 0.8B/2B/4B/9B/27B/35B/122B/397B  | qwen3_5              |
 | [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio          |
 | [Qwen2.5-Omni](https://huggingface.co/Qwen)                       | 3B/7B                            | qwen2_omni           |
 | [Qwen3-Omni](https://huggingface.co/Qwen)                         | 30B                              | qwen3_omni           |
--- a/README_zh.md
+++ b/README_zh.md
@@ -321,7 +321,7 @@ https://github.com/user-attachments/assets/43b700c6-a178-41db-b1f8-8190a5d3fcfc
 | [Pixtral](https://huggingface.co/mistralai)                       | 12B                              | pixtral              |
 | [Qwen2 (Code/Math/MoE/QwQ)](https://huggingface.co/Qwen)          | 0.5B/1.5B/3B/7B/14B/32B/72B/110B | qwen                 |
 | [Qwen3 (MoE/Instruct/Thinking/Next)](https://huggingface.co/Qwen) | 0.6B/1.7B/4B/8B/14B/32B/80B/235B | qwen3/qwen3_nothink  |
-| [Qwen3.5](https://huggingface.co/Qwen)                            | 27B/35B/122B/397B                | qwen3_5              |
+| [Qwen3.5](https://huggingface.co/Qwen)                            | 0.8B/2B/4B/9B/27B/35B/122B/397B  | qwen3_5              |
 | [Qwen2-Audio](https://huggingface.co/Qwen)                        | 7B                               | qwen2_audio          |
 | [Qwen2.5-Omni](https://huggingface.co/Qwen)                       | 3B/7B                            | qwen2_omni           |
 | [Qwen3-Omni](https://huggingface.co/Qwen)                         | 30B                              | qwen3_omni           |
--- a/src/llamafactory/extras/constants.py
+++ b/src/llamafactory/extras/constants.py
@@ -2812,10 +2812,42 @@ register_model_group(

 register_model_group(
    models={
+        "Qwen3.5-0.8B-Base": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-0.8B-Base",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-0.8B-Base",
+        },
+        "Qwen3.5-2B-Base": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-2B-Base",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-2B-Base",
+        },
+        "Qwen3.5-4B-Base": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-4B-Base",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-4B-Base",
+        },
+        "Qwen3.5-9B-Base": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-9B-Base",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-9B-Base",
+        },
        "Qwen3.5-35B-A3B-Base": {
            DownloadSource.DEFAULT: "Qwen/Qwen3.5-35B-A3B-Base",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-35B-A3B-Base",
        },
+        "Qwen3.5-0.8B-Thinking": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-0.8B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-0.8B",
+        },
+        "Qwen3.5-2B-Thinking": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-2B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-2B",
+        },
+        "Qwen3.5-4B-Thinking": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-4B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-4B",
+        },
+        "Qwen3.5-9B-Thinking": {
+            DownloadSource.DEFAULT: "Qwen/Qwen3.5-9B",
+            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-9B",
+        },
        "Qwen3.5-27B-Thinking": {
            DownloadSource.DEFAULT: "Qwen/Qwen3.5-27B",
            DownloadSource.MODELSCOPE: "Qwen/Qwen3.5-27B",
--- a/src/llamafactory/hparams/parser.py
+++ b/src/llamafactory/hparams/parser.py
@@ -100,6 +100,52 @@ def _parse_args(
    return tuple(parsed_args)


+def _verify_trackio_args(training_args: "TrainingArguments") -> None:
+    """Validates Trackio-specific arguments.
+
+    Args:
+        training_args: TrainingArguments instance (not a dictionary)
+    """
+    report_to = training_args.report_to
+    if not report_to:
+        return
+
+    if isinstance(report_to, str):
+        report_to = [report_to]
+
+    if "trackio" not in report_to:
+        return
+
+    # --- Enforce project (required by Trackio) ---
+    if not training_args.project:
+        raise ValueError("`--project` must be specified when using Trackio.")
+
+    # --- Validate trackio_space_id format ---
+    space_id = training_args.trackio_space_id
+    if space_id:
+        if space_id != "trackio" and "/" not in space_id:
+            logger.warning(
+                f"trackio_space_id '{space_id}' should typically be in format "
+                "'org/space' for Hugging Face Spaces deployment."
+            )
+
+    # --- Inform about default project usage ---
+    if training_args.project == "huggingface":
+        logger.info(
+            "Using default project name 'huggingface'. "
+            "Consider setting a custom project name with --project "
+            "for better organization."
+        )
+
+    # --- Validate hub repo privacy flag ---
+    if training_args.hub_private_repo:
+        logger.info("Repository will be created as private on Hugging Face Hub.")
+
+    # --- Recommend run_name for experiment clarity ---
+    if not training_args.run_name:
+        logger.warning("Consider setting --run_name for better experiment tracking clarity.")
+
+
 def _set_transformers_logging() -> None:
    if os.getenv("LLAMAFACTORY_VERBOSITY", "INFO") in ["DEBUG", "INFO"]:
        transformers.utils.logging.set_verbosity_info()
@@ -278,8 +324,10 @@ def get_train_args(args: dict[str, Any] | list[str] | None = None) -> _TRAIN_CLS
        if finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
            raise ValueError("Unsloth does not support lora reward model.")

-        if training_args.report_to and training_args.report_to[0] not in ["wandb", "tensorboard"]:
-            raise ValueError("PPO only accepts wandb or tensorboard logger.")
+        if training_args.report_to and any(
+            logger not in ("wandb", "tensorboard", "trackio", "none") for logger in training_args.report_to
+        ):
+            raise ValueError("PPO only accepts wandb, tensorboard, or trackio logger.")

    if not model_args.use_kt and training_args.parallel_mode == ParallelMode.NOT_DISTRIBUTED:
        raise ValueError("Please launch distributed training with `llamafactory-cli` or `torchrun`.")
@@ -352,6 +400,7 @@ def get_train_args(args: dict[str, Any] | list[str] | None = None) -> _TRAIN_CLS
    _set_env_vars()
    _verify_model_args(model_args, data_args, finetuning_args)
    _check_extra_dependencies(model_args, finetuning_args, training_args)
+    _verify_trackio_args(training_args)

    if not finetuning_args.use_mca and training_args.fp8_enable_fsdp_float8_all_gather and not training_args.fp8:
        logger.warning_rank0("fp8_enable_fsdp_float8_all_gather requires fp8=True. Setting fp8=True.")
--- a/src/llamafactory/model/model_utils/visual.py
+++ b/src/llamafactory/model/model_utils/visual.py
@@ -390,14 +390,14 @@ _register_composite_model(
        "visual.deepstack_merger_list",
        "audio_tower",
    ],
-    language_model_keys=["model", "lm_head"],
+    language_model_keys=["language_model", "lm_head"],
    lora_conflict_keys=["patch_embed"],
 )


 _register_composite_model(
    model_type="qwen3_5",
-    projector_key="visual.merger",
+    projector_key="model.visual.merger",
    vision_model_keys=["visual.pos_embed", "visual.patch_embed", "visual.blocks"],
    language_model_keys=["language_model", "lm_head"],
    lora_conflict_keys=["patch_embed"],
@@ -406,7 +406,7 @@ _register_composite_model(

 _register_composite_model(
    model_type="qwen3_5_moe",
-    projector_key="visual.merger",
+    projector_key="model.visual.merger",
    vision_model_keys=["visual.pos_embed", "visual.patch_embed", "visual.blocks"],
    language_model_keys=["language_model", "lm_head"],
    lora_conflict_keys=["patch_embed"],
--- a/src/llamafactory/train/callbacks.py
+++ b/src/llamafactory/train/callbacks.py
@@ -371,6 +371,18 @@ class ReporterCallback(TrainerCallback):
                }
            )

+        if "trackio" in args.report_to:
+            import trackio
+
+            trackio.config.update(
+                {
+                    "model_args": self.model_args.to_dict(),
+                    "data_args": self.data_args.to_dict(),
+                    "finetuning_args": self.finetuning_args.to_dict(),
+                    "generating_args": self.generating_args.to_dict(),
+                }
+            )
+
        if self.finetuning_args.use_swanlab:
            import swanlab  # type: ignore

--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -108,11 +108,26 @@ def create_train_tab(engine: "Engine") -> dict[str, "Component"]:
            with gr.Column():
                enable_thinking = gr.Checkbox(value=True)
                report_to = gr.Dropdown(
-                    choices=["none", "wandb", "mlflow", "neptune", "tensorboard", "all"],
+                    choices=["none", "wandb", "mlflow", "neptune", "tensorboard", "trackio", "all"],
                    value="none",
                    allow_custom_value=True,
                )

+            with gr.Accordion("Trackio Settings", open=False):
+                project = gr.Textbox(
+                    value="huggingface",
+                    label="Project Name",
+                    info="Project name for experiment tracking (used by Trackio, W&B, etc.)",
+                )
+
+                trackio_space_id = gr.Textbox(
+                    value="trackio", label="Trackio Space ID", info="Hugging Face Space ID for Trackio deployment"
+                )
+
+                hub_private_repo = gr.Checkbox(
+                    value=False, label="Private Repository", info="Make the Hugging Face repository private"
+                )
+
    input_elems.update(
        {
            logging_steps,
@@ -128,6 +143,9 @@ def create_train_tab(engine: "Engine") -> dict[str, "Component"]:
            use_llama_pro,
            enable_thinking,
            report_to,
+            project,
+            trackio_space_id,
+            hub_private_repo,
        }
    )
    elem_dict.update(
@@ -146,6 +164,9 @@ def create_train_tab(engine: "Engine") -> dict[str, "Component"]:
            use_llama_pro=use_llama_pro,
            enable_thinking=enable_thinking,
            report_to=report_to,
+            project=project,
+            trackio_space_id=trackio_space_id,
+            hub_private_repo=hub_private_repo,
        )
    )
Author	SHA1	Message	Date
LittleYanlin	2d9bd2aa14	[fix] qwen3.5 projector path (#10242 ) Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>	2026-03-04 01:31:09 +08:00
Hertz	c0245c43fc	[model] support Qwen3.5 all series models (#10237 ) Co-authored-by: gatilin <gatilin@tencent.com> Co-authored-by: Yaowei Zheng <hiyouga@buaa.edu.cn>	2026-03-03 17:34:59 +08:00
Parag Ekbote	eb976d75a2	[tracker] Add Trackio Integration for LlamaFactory (#10165 ) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2026-03-03 17:19:37 +08:00