7 Commits

Author SHA1 Message Date
LincolnBurrows2017
70653026f5 [fix] make position_id_per_seconds configurable for Qwen2OmniPlugin (#10281)
Co-authored-by: LincolnBurrows2017 <lincoln@example.com>
2026-03-16 19:42:38 +08:00
Ruijie Hou
246192abd2 [data] correct gpt_oss template format_assistant (#10269) 2026-03-10 21:36:38 +08:00
浮梦
0258dc14d0 [docker] update npu docker (#10268)
Co-authored-by: frozenleaves <frozen@Mac.local>
2026-03-10 19:37:27 +08:00
xxddccaa
3045adf0ba [fix] fallback to audio_processor when feature_extractor is missing (#10267)
Co-authored-by: kevin <742971636@qq.com>
2026-03-10 19:36:41 +08:00
Kingsley
a3d44e3152 [mca] support qwen3.5 (#10265)
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
2026-03-10 10:55:16 +08:00
JiangNan
edeb953bc7 [data] convert filter() to list in read_cloud_json to fix broken empty-check (#10260)
Signed-off-by: JiangNan <1394485448@qq.com>
2026-03-09 17:12:53 +08:00
yizhouChen
d045794387 [docs] fix Python version requirement from 3.10 to >=3.11.0 (#10259)
Co-authored-by: chaiyzh <chaiyzh@126.com>
2026-03-09 16:44:07 +08:00
13 changed files with 48 additions and 24 deletions

View File

@@ -473,7 +473,7 @@ huggingface-cli login
| Mandatory | Minimum | Recommend |
| ------------ | ------- | --------- |
| python | 3.9 | 3.10 |
| python | 3.11 | >=3.11 |
| torch | 2.0.0 | 2.6.0 |
| torchvision | 0.15.0 | 0.21.0 |
| transformers | 4.49.0 | 4.50.0 |

View File

@@ -475,7 +475,7 @@ huggingface-cli login
| 必需项 | 至少 | 推荐 |
| ------------ | ------- | --------- |
| python | 3.9 | 3.10 |
| python | 3.11 | >=3.11 |
| torch | 2.0.0 | 2.6.0 |
| torchvision | 0.15.0 | 0.21.0 |
| transformers | 4.49.0 | 4.50.0 |

View File

@@ -1,6 +1,6 @@
# https://hub.docker.com/r/ascendai/cann/tags
ARG BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-910b-ubuntu22.04-py3.11
ARG BASE_IMAGE=quay.io/ascend/cann:8.5.1-910b-ubuntu22.04-py3.11
FROM ${BASE_IMAGE}
# Installation arguments
@@ -33,9 +33,11 @@ RUN pip config set global.index-url "${PIP_INDEX}" && \
COPY . /app
# Install torch-npu
RUN pip uninstall -y torch torchvision torchaudio && \
pip install --no-cache-dir "torch==2.7.1" "torch-npu==2.7.1" "torchvision==0.22.1" "torchaudio==2.7.1" --index-url "${PYTORCH_INDEX}" && \
pip install --no-cache-dir -e . --no-build-isolation && \
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh
RUN pip uninstall -y torch torchvision torchaudio
RUN pip install --no-cache-dir -r requirements/npu.txt --index-url "${PYTORCH_INDEX}"
RUN pip install --no-cache-dir -r requirements/deepspeed.txt
RUN pip install --no-cache-dir -e . --no-build-isolation && \
pip install --no-cache-dir -r requirements/metrics.txt --no-build-isolation
# Set up volumes

View File

@@ -33,7 +33,7 @@ services:
dockerfile: ./docker/docker-npu/Dockerfile
context: ../..
args:
BASE_IMAGE: quay.io/ascend/cann:8.3.rc2-a3-ubuntu22.04-py3.11
BASE_IMAGE: quay.io/ascend/cann:8.5.1-a3-ubuntu22.04-py3.11
PIP_INDEX: https://pypi.org/simple
container_name: llamafactory-a3
image: llamafactory:npu-a3

View File

@@ -1,4 +1,4 @@
torch==2.7.1
torch-npu==2.7.1
torch-npu==2.7.1.post2
torchvision==0.22.1
torchaudio==2.7.1

View File

@@ -71,6 +71,7 @@ def convert(
pipeline_model_parallel_size: int = 1,
expert_model_parallel_size: int = 1,
virtual_pipeline_model_parallel_size: int | None = None,
moe_grouped_gemm: bool | None = None,
):
"""Convert checkpoint between MCA and HuggingFace formats.
@@ -84,6 +85,10 @@ def convert(
pipeline_model_parallel_size: Pipeline model parallel size
expert_model_parallel_size: Expert model parallel size
virtual_pipeline_model_parallel_size: Virtual pipeline model parallel size
moe_grouped_gemm: Use grouped gemm for MoE experts. When enabled, expert
weights are stored in a flattened format (linear_fc1.weight0, weight1, ...)
rather than per-expert format (local_experts.0.linear_fc1.weight, ...).
Must match the format used when saving the checkpoint.
"""
if bf16 and fp16:
raise ValueError("bf16 and fp16 cannot be both True.")
@@ -97,8 +102,9 @@ def convert(
pipeline_model_parallel_size=pipeline_model_parallel_size,
expert_model_parallel_size=expert_model_parallel_size,
virtual_pipeline_model_parallel_size=virtual_pipeline_model_parallel_size,
moe_grouped_gemm=moe_grouped_gemm,
transformer_impl="transformer_engine", # hard code here since we default using te for training
)
convert_checkpoint_to_mca(
checkpoint_path,
output_path,

View File

@@ -196,7 +196,7 @@ def read_cloud_json(cloud_path: str) -> list[Any]:
# filter out non-JSON files
files = [x["Key"] for x in fs.listdir(cloud_path)] if fs.isdir(cloud_path) else [cloud_path]
files = filter(lambda file: file.endswith(".json") or file.endswith(".jsonl"), files)
files = list(filter(lambda file: file.endswith(".json") or file.endswith(".jsonl"), files))
if not files:
raise ValueError(f"No JSON/JSONL files found in the specified path: {cloud_path}.")

View File

@@ -161,7 +161,9 @@ class MMPluginMixin:
video_processor: BaseImageProcessor = getattr(
processor, "video_processor", getattr(processor, "image_processor", None)
)
feature_extractor: SequenceFeatureExtractor = getattr(processor, "feature_extractor", None)
feature_extractor: SequenceFeatureExtractor = getattr(processor, "feature_extractor", None) or getattr(
processor, "audio_processor", None
)
if len(images) != 0 and self.image_token is None:
raise ValueError(
"This model does not support image input. Please check whether the correct `template` is used."
@@ -390,7 +392,9 @@ class MMPluginMixin:
mm_inputs.update(video_processor(videos, return_tensors="pt"))
if len(audios) != 0:
feature_extractor: SequenceFeatureExtractor = getattr(processor, "feature_extractor", None)
feature_extractor: SequenceFeatureExtractor = getattr(processor, "feature_extractor", None) or getattr(
processor, "audio_processor", None
)
audios = self._regularize_audios(
audios,
sampling_rate=getattr(processor, "audio_sampling_rate", 16000),
@@ -1876,7 +1880,9 @@ class Qwen2OmniPlugin(Qwen2VLPlugin):
) -> dict[str, "torch.Tensor"]:
image_processor: BaseImageProcessor = getattr(processor, "image_processor", None)
video_processor: BaseVideoProcessor = getattr(processor, "video_processor", None)
feature_extractor: SequenceFeatureExtractor = getattr(processor, "feature_extractor", None)
feature_extractor: SequenceFeatureExtractor = getattr(processor, "feature_extractor", None) or getattr(
processor, "audio_processor", None
)
mm_inputs = {}
if len(images) != 0:
images = self._regularize_images(
@@ -1981,6 +1987,7 @@ class Qwen2OmniPlugin(Qwen2VLPlugin):
f"Each {VIDEO_PLACEHOLDER} must be followed by an {AUDIO_PLACEHOLDER} when using audio in video."
)
position_id_per_seconds: int = getattr(processor, "position_id_per_seconds", 25)
audio_t_index = torch.arange(audio_lengths[num_audio_tokens])
video_t_index = (
torch.arange(video_grid_thw[num_video_tokens][0])
@@ -1992,9 +1999,9 @@ class Qwen2OmniPlugin(Qwen2VLPlugin):
)
.flatten()
* mm_inputs["video_second_per_grid"][num_video_tokens]
* 25 # FIXME hardcode of position_id_per_seconds=25
* position_id_per_seconds
).long()
t_ntoken_per_chunk = 50 # FIXME hardcode: [25 * 2]
t_ntoken_per_chunk = position_id_per_seconds * 2
video_chunk_indices = processor.get_chunked_index(video_t_index, t_ntoken_per_chunk)
audio_chunk_indices = processor.get_chunked_index(audio_t_index, t_ntoken_per_chunk)
placeholder_string = ""

View File

@@ -1113,7 +1113,7 @@ register_template(
register_template(
name="gpt_oss",
format_user=StringFormatter(slots=["<|start|>user<|message|>{{content}}<|end|><|start|>assistant"]),
format_assistant=StringFormatter(slots=["{{content}}<|end|>"]),
format_assistant=StringFormatter(slots=["{{content}}"]),
format_system=StringFormatter(slots=["<|start|>system<|message|>{{content}}<|end|>"]),
default_system="You are ChatGPT, a large language model trained by OpenAI.",
thought_words=("<|channel|>analysis<|message|>", "<|end|><|start|>assistant<|channel|>final<|message|>"),

View File

@@ -69,6 +69,8 @@ MCA_SUPPORTED_MODELS = {
"qwen3",
"qwen3_moe",
"qwen3_next",
"qwen3_5",
"qwen3_5_moe",
}
METHODS = ["full", "freeze", "lora", "oft"]

View File

@@ -470,7 +470,7 @@ def get_train_args(args: dict[str, Any] | list[str] | None = None) -> _TRAIN_CLS
training_args.resume_from_checkpoint is None
and training_args.do_train
and os.path.isdir(training_args.output_dir)
and not training_args.overwrite_output_dir
and not getattr(training_args, "overwrite_output_dir", False) # for mca training args and transformers >= 5.0
and can_resume_from_checkpoint
):
last_checkpoint = get_last_checkpoint(training_args.output_dir)

View File

@@ -228,7 +228,7 @@ class LogCallback(TrainerCallback):
if (
args.should_save
and os.path.exists(os.path.join(args.output_dir, TRAINER_LOG))
and args.overwrite_output_dir
and getattr(args, "overwrite_output_dir", False)
):
logger.warning_rank0_once("Previous trainer log in this folder will be deleted.")
os.remove(os.path.join(args.output_dir, TRAINER_LOG))

View File

@@ -13,6 +13,8 @@
# limitations under the License.
import functools
import json
import os
from collections.abc import Sequence
from copy import deepcopy
from typing import TYPE_CHECKING, Any, Optional
@@ -77,20 +79,25 @@ def _data_collator_wrapper(data_collator: Any):
def _check_model_support(model_args: "ModelArguments"):
from transformers import AutoConfig as HfAutoConfig
if os.path.exists(os.path.join(model_args.model_name_or_path, "mca_config.json")): # load from mcore ckpt
mca_config = json.load(open(os.path.join(model_args.model_name_or_path, "mca_config.json")))
model_type = mca_config.get("hf_model_type", None)
else:
config = HfAutoConfig.from_pretrained(
model_args.model_name_or_path, trust_remote_code=model_args.trust_remote_code
)
model_type = config.model_type
config = HfAutoConfig.from_pretrained(
model_args.model_name_or_path, trust_remote_code=model_args.trust_remote_code
)
if config.model_type not in MCA_SUPPORTED_MODELS:
if model_type not in MCA_SUPPORTED_MODELS:
raise ValueError(
f"Model {config.model_type} is not supported by mcore_adapter."
f"Model {model_type} is not supported by mcore_adapter."
"You can try to upgrade mcore_adapter to the latest version for more supported models."
)
def _freeze_model_parameters(model: Any, finetuning_args: "FinetuningArguments"):
"""Freeze model parameters for qwen_vl series models based on finetuning arguments."""
if getattr(model.config, "hf_model_type", None) not in ["qwen2_vl", "qwen2_5_vl", "qwen3_vl", "qwen3_vl_moe"]:
if getattr(model.config, "hf_model_type", None) not in ["qwen2_vl", "qwen2_5_vl", "qwen3_vl", "qwen3_vl_moe", "qwen3_5", "qwen3_5_moe"]:
return
params_to_freeze = []