[v1] add cli sampler (#9721)

This commit is contained in:
Yaowei Zheng
2026-01-06 23:31:27 +08:00
committed by GitHub
parent e944dc442c
commit ea0b4e2466
45 changed files with 1091 additions and 505 deletions

View File

@@ -1,173 +0,0 @@
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Integration tests for DataLoader with different combinations of packing and dynamic batching.
Tests the 4 scenarios:
a) non pack + non dynamic.
b) non pack + dynamic.
c) pack + non dynamic.
d) pack + dynamic.
"""
import torch
from torch.utils.data import DataLoader as TorchDataLoader
from torch.utils.data import Dataset
from transformers import AutoTokenizer
from llamafactory.v1.config.data_args import DataArguments
from llamafactory.v1.core.data_engine import DataEngine
from llamafactory.v1.core.trainer_utils.data_collator import (
DefaultCollator,
)
from llamafactory.v1.core.trainer_utils.data_loader import DataLoader
from llamafactory.v1.plugins.data_plugins.template import QwenTemplate
from llamafactory.v1.utils.batching_queue import TextBatchingQueue
class TensorDataset(Dataset):
"""Wrapper dataset that converts DataEngine samples to tensor format."""
def __init__(self, data_engine: DataEngine, processor, template, max_samples: int = None):
self.data_engine = data_engine
self.processor = processor
self.template = template
self.max_samples = max_samples or len(data_engine)
self.tokenizer = processor.tokenizer if hasattr(processor, "tokenizer") else processor
def __len__(self):
return min(self.max_samples, len(self.data_engine))
def __getitem__(self, idx):
# Get sample from DataEngine
sample = self.data_engine[idx]
# Extract messages from sample
# DataEngine returns samples with format like {"messages": [...], ...}
# For llamafactory/v1-sft-demo, the format should have "messages" field
messages = None
if "messages" in sample:
messages = sample["messages"]
elif "conversations" in sample:
messages = sample["conversations"]
elif "conversation" in sample:
messages = sample["conversation"]
else:
# Try to find message-like fields (skip _dataset_name)
for key, value in sample.items():
if key.startswith("_"):
continue
if isinstance(value, list) and len(value) > 0:
# Check if it looks like a message list
if isinstance(value[0], dict) and "role" in value[0]:
messages = value
break
if messages is None:
raise ValueError(f"Could not find messages in sample: {list(sample.keys())}")
# Encode messages using template
encoded = self.template.encode_messages(self.tokenizer, messages)
# Convert to tensors
return {
"input_ids": torch.tensor(encoded["input_ids"], dtype=torch.long),
"attention_mask": torch.tensor(encoded["attention_mask"], dtype=torch.long),
"labels": torch.tensor(encoded["labels"], dtype=torch.long),
}
def create_real_dataset(max_samples: int = 20, batch_size: int = 4):
"""Create a real dataset using DataEngine."""
data_args = DataArguments(dataset="llamafactory/v1-sft-demo")
data_engine = DataEngine(data_args)
# Create processor and template
processor = AutoTokenizer.from_pretrained("llamafactory/tiny-random-qwen2.5")
template = QwenTemplate()
# Create tensor dataset
raw_data_dataset = TensorDataset(data_engine, processor, template, max_samples=max_samples)
# Create torch DataLoader
torch_dataloader = TorchDataLoader(
raw_data_dataset,
batch_size=batch_size,
shuffle=False,
collate_fn=lambda x: x,
)
return torch_dataloader, processor, template
class TestDataLoaderNonPackNonDynamic:
"""Test case a) non pack + non dynamic."""
def test_basic_functionality(self):
"""Test DataLoader without packing and without dynamic batching."""
# Create real dataset
torch_dataloader, processor, template = create_real_dataset(max_samples=80, batch_size=8)
# Create collator (non-packing)
collator = DefaultCollator(processor=processor, template=template)
# Create DataLoader without batching_queue (non-dynamic)
data_loader = DataLoader(
dataloader=torch_dataloader,
collate_fn=collator,
num_micro_batch=1,
batching_queue=None,
)
# Iterate and check results
batches = list(iter(data_loader))
assert len(batches) > 0
# Check first batch
one_batch = batches[0]
micro_batches = one_batch[0]
assert "input_ids" in micro_batches
assert "attention_mask" in micro_batches
assert "labels" in micro_batches
assert micro_batches["input_ids"].shape[0] == 1 # batch_size=1
assert micro_batches["input_ids"].ndim == 2 # [batch_size, seq_len]
class TestDataLoaderNonPackDynamic:
"""Test case b) non pack + dynamic."""
def test_basic_functionality(self):
"""Test DataLoader without packing but with dynamic batching."""
# Create real dataset
torch_dataloader, processor, template = create_real_dataset(max_samples=80, batch_size=8)
collator = DefaultCollator(processor=processor, template=template)
# Create batching queue for dynamic batching
batching_queue = TextBatchingQueue(
token_micro_bsz=120,
buffer_size=8,
)
data_loader = DataLoader(
dataloader=torch_dataloader,
collate_fn=collator,
num_micro_batch=4,
batching_queue=batching_queue,
)
# Iterate and check
batches = list(iter(data_loader))
micro_batch_tokens_first = [micro_batch["attention_mask"].sum() for micro_batch in batches[0]]
assert all(num_tokens <= 120 for num_tokens in micro_batch_tokens_first)
assert len(batches) > 0

View File

@@ -15,18 +15,18 @@
import torch
from llamafactory.v1.config.model_args import ModelArguments, PluginConfig
from llamafactory.v1.core.model_loader import ModelLoader
from llamafactory.v1.core.model_engine import ModelEngine
def test_tiny_qwen():
from transformers import Qwen2Config, Qwen2ForCausalLM, Qwen2TokenizerFast
model_args = ModelArguments(model="llamafactory/tiny-random-qwen2.5")
model_loader = ModelLoader(model_args)
assert isinstance(model_loader.processor, Qwen2TokenizerFast)
assert isinstance(model_loader.model.config, Qwen2Config)
assert isinstance(model_loader.model, Qwen2ForCausalLM)
assert model_loader.model.dtype == torch.bfloat16
model_engine = ModelEngine(model_args)
assert isinstance(model_engine.processor, Qwen2TokenizerFast)
assert isinstance(model_engine.model_config, Qwen2Config)
assert isinstance(model_engine.model, Qwen2ForCausalLM)
assert model_engine.model.dtype == torch.bfloat16
def test_tiny_qwen_with_kernel_plugin():
@@ -37,13 +37,14 @@ def test_tiny_qwen_with_kernel_plugin():
model_args = ModelArguments(
model="llamafactory/tiny-random-qwen2.5", kernel_config=PluginConfig(name="auto", include_kernels="auto")
)
model_loader = ModelLoader(model_args)
model_engine = ModelEngine(model_args)
# test enable apply kernel plugin
if hasattr(torch, "npu"):
assert model_loader.model.model.layers[0].input_layernorm.forward.__code__ == npu_rms_norm_forward.__code__
assert model_engine.model.model.layers[0].input_layernorm.forward.__code__ == npu_rms_norm_forward.__code__
else:
assert model_loader.model.model.layers[0].input_layernorm.forward.__code__ != npu_rms_norm_forward.__code__
assert isinstance(model_loader.model, Qwen2ForCausalLM)
assert model_engine.model.model.layers[0].input_layernorm.forward.__code__ != npu_rms_norm_forward.__code__
assert isinstance(model_engine.model, Qwen2ForCausalLM)
if __name__ == "__main__":

View File

@@ -0,0 +1,171 @@
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Integration tests for DataLoader with different combinations of packing and dynamic batching.
Tests the 4 scenarios:
a) non pack + non dynamic.
b) non pack + dynamic.
c) pack + non dynamic.
d) pack + dynamic.
"""
# import torch
# from torch.utils.data import DataLoader as TorchDataLoader
# from torch.utils.data import Dataset
# from transformers import AutoTokenizer
# from llamafactory.v1.config.data_args import DataArguments
# from llamafactory.v1.core.data_engine import DataEngine
# from llamafactory.v1.core.utils.data_collator import DefaultCollator
# from llamafactory.v1.core.utils.data_loader import DataLoader
# from llamafactory.v1.plugins.data_plugins.rendering import QwenTemplate
# from llamafactory.v1.utils.batching_queue import TextBatchingQueue
# class TensorDataset(Dataset):
# """Wrapper dataset that converts DataEngine samples to tensor format."""
# def __init__(self, data_engine: DataEngine, processor, template, max_samples: int = None):
# self.data_engine = data_engine
# self.processor = processor
# self.template = template
# self.max_samples = max_samples or len(data_engine)
# self.tokenizer = processor.tokenizer if hasattr(processor, "tokenizer") else processor
# def __len__(self):
# return min(self.max_samples, len(self.data_engine))
# def __getitem__(self, idx):
# # Get sample from DataEngine
# sample = self.data_engine[idx]
# # Extract messages from sample
# # DataEngine returns samples with format like {"messages": [...], ...}
# # For llamafactory/v1-sft-demo, the format should have "messages" field
# messages = None
# if "messages" in sample:
# messages = sample["messages"]
# elif "conversations" in sample:
# messages = sample["conversations"]
# elif "conversation" in sample:
# messages = sample["conversation"]
# else:
# # Try to find message-like fields (skip _dataset_name)
# for key, value in sample.items():
# if key.startswith("_"):
# continue
# if isinstance(value, list) and len(value) > 0:
# # Check if it looks like a message list
# if isinstance(value[0], dict) and "role" in value[0]:
# messages = value
# break
# if messages is None:
# raise ValueError(f"Could not find messages in sample: {list(sample.keys())}")
# # Encode messages using template
# encoded = self.template.encode_messages(self.tokenizer, messages)
# # Convert to tensors
# return {
# "input_ids": torch.tensor(encoded["input_ids"], dtype=torch.long),
# "attention_mask": torch.tensor(encoded["attention_mask"], dtype=torch.long),
# "labels": torch.tensor(encoded["labels"], dtype=torch.long),
# }
# def create_real_dataset(max_samples: int = 20, batch_size: int = 4):
# """Create a real dataset using DataEngine."""
# data_args = DataArguments(dataset="llamafactory/v1-sft-demo")
# data_engine = DataEngine(data_args)
# # Create processor and template
# processor = AutoTokenizer.from_pretrained("llamafactory/tiny-random-qwen2.5")
# template = QwenTemplate()
# # Create tensor dataset
# raw_data_dataset = TensorDataset(data_engine, processor, template, max_samples=max_samples)
# # Create torch DataLoader
# torch_dataloader = TorchDataLoader(
# raw_data_dataset,
# batch_size=batch_size,
# shuffle=False,
# collate_fn=lambda x: x,
# )
# return torch_dataloader, processor, template
# class TestDataLoaderNonPackNonDynamic:
# """Test case a) non pack + non dynamic."""
# def test_basic_functionality(self):
# """Test DataLoader without packing and without dynamic batching."""
# # Create real dataset
# torch_dataloader, processor, template = create_real_dataset(max_samples=80, batch_size=8)
# # Create collator (non-packing)
# collator = DefaultCollator(processor=processor, template=template)
# # Create DataLoader without batching_queue (non-dynamic)
# data_loader = DataLoader(
# dataloader=torch_dataloader,
# collate_fn=collator,
# num_micro_batch=1,
# batching_queue=None,
# )
# # Iterate and check results
# batches = list(iter(data_loader))
# assert len(batches) > 0
# # Check first batch
# one_batch = batches[0]
# micro_batches = one_batch[0]
# assert "input_ids" in micro_batches
# assert "attention_mask" in micro_batches
# assert "labels" in micro_batches
# assert micro_batches["input_ids"].shape[0] == 1 # batch_size=1
# assert micro_batches["input_ids"].ndim == 2 # [batch_size, seq_len]
# class TestDataLoaderNonPackDynamic:
# """Test case b) non pack + dynamic."""
# def test_basic_functionality(self):
# """Test DataLoader without packing but with dynamic batching."""
# # Create real dataset
# torch_dataloader, processor, template = create_real_dataset(max_samples=80, batch_size=8)
# collator = DefaultCollator(processor=processor, template=template)
# # Create batching queue for dynamic batching
# batching_queue = TextBatchingQueue(
# token_micro_bsz=120,
# buffer_size=8,
# )
# data_loader = DataLoader(
# dataloader=torch_dataloader,
# collate_fn=collator,
# num_micro_batch=4,
# batching_queue=batching_queue,
# )
# # Iterate and check
# batches = list(iter(data_loader))
# micro_batch_tokens_first = [micro_batch["attention_mask"].sum() for micro_batch in batches[0]]
# assert all(num_tokens <= 120 for num_tokens in micro_batch_tokens_first)
# assert len(batches) > 0

View File

@@ -0,0 +1,65 @@
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from transformers import AutoTokenizer
from llamafactory.v1.core.utils.rendering import Renderer
from llamafactory.v1.utils.types import Processor
HF_MESSAGES = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "What is LLM?"},
{"role": "assistant", "content": "LLM stands for Large Language Model."},
]
V1_MESSAGES = [
{"role": "system", "content": [{"type": "text", "value": "You are a helpful assistant."}]},
{"role": "user", "content": [{"type": "text", "value": "What is LLM?"}]},
{"role": "assistant", "content": [{"type": "text", "value": "LLM stands for Large Language Model."}]},
]
def test_chatml_rendering():
tokenizer: Processor = AutoTokenizer.from_pretrained("llamafactory/tiny-random-qwen3")
renderer = Renderer(template="chatml", processor=tokenizer)
hf_inputs = tokenizer.apply_chat_template(HF_MESSAGES[:-1], add_generation_prompt=True)
v1_inputs = renderer.render_messages(V1_MESSAGES[:-1], is_generate=True)
assert v1_inputs["input_ids"] == hf_inputs
assert v1_inputs["attention_mask"] == [1] * len(hf_inputs)
assert v1_inputs["labels"] == [-100] * len(hf_inputs)
assert v1_inputs["loss_weights"] == [0.0] * len(hf_inputs)
hf_inputs_part = tokenizer.apply_chat_template(HF_MESSAGES[:-1], add_generation_prompt=False)
hf_inputs_full = tokenizer.apply_chat_template(HF_MESSAGES, add_generation_prompt=False)
v1_inputs_full = renderer.render_messages(V1_MESSAGES, is_generate=False)
assert v1_inputs_full["input_ids"] == hf_inputs_full
assert v1_inputs_full["attention_mask"] == [1] * len(hf_inputs_full)
assert v1_inputs_full["labels"] == [-100] * len(hf_inputs_part) + hf_inputs_full[len(hf_inputs_part) :]
assert v1_inputs_full["loss_weights"] == [0.0] * len(hf_inputs_part) + [1.0] * (
len(hf_inputs_full) - len(hf_inputs_part)
)
def test_chatml_parse():
tokenizer: Processor = AutoTokenizer.from_pretrained("llamafactory/tiny-random-qwen3")
renderer = Renderer(template="chatml", processor=tokenizer)
generated_text = "LLM stands for Large Language Model."
parsed_message = renderer.parse_message(generated_text)
assert parsed_message == V1_MESSAGES[-1]
if __name__ == "__main__":
test_chatml_rendering()
test_chatml_parse()

View File

@@ -54,7 +54,7 @@ def test_sharegpt_converter():
"conversations": [
{"from": "system", "value": "System"},
{"from": "human", "value": "User"},
{"from": "function_call", "value": "Tool"},
{"from": "function_call", "value": "1"},
{"from": "observation", "value": "Observation"},
{"from": "gpt", "value": "Assistant"},
]
@@ -63,7 +63,7 @@ def test_sharegpt_converter():
"messages": [
{"content": [{"type": "text", "value": "System"}], "loss_weight": 0.0, "role": "system"},
{"content": [{"type": "text", "value": "User"}], "loss_weight": 0.0, "role": "user"},
{"content": [{"type": "tool_calls", "value": "Tool"}], "loss_weight": 1.0, "role": "assistant"},
{"content": [{"type": "tool_call", "value": "1"}], "loss_weight": 1.0, "role": "assistant"},
{"content": [{"type": "text", "value": "Observation"}], "loss_weight": 0.0, "role": "tool"},
{"content": [{"type": "text", "value": "Assistant"}], "loss_weight": 1.0, "role": "assistant"},
]

View File

@@ -12,11 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from llamafactory.v1.accelerator.interface import DistributedInterface
from llamafactory.v1.config.arg_parser import get_args
from llamafactory.v1.core.model_loader import ModelLoader
from llamafactory.v1.core.model_engine import ModelEngine
def test_init_on_meta():
@@ -26,11 +25,10 @@ def test_init_on_meta():
init_config={"name": "init_on_meta"},
)
)
model_loader = ModelLoader(model_args=model_args)
assert model_loader.model.device.type == "meta"
model_engine = ModelEngine(model_args=model_args)
assert model_engine.model.device.type == "meta"
@pytest.mark.runs_on(["cuda", "npu"])
def test_init_on_rank0():
_, model_args, *_ = get_args(
dict(
@@ -38,11 +36,11 @@ def test_init_on_rank0():
init_config={"name": "init_on_rank0"},
)
)
model_loader = ModelLoader(model_args=model_args)
model_engine = ModelEngine(model_args=model_args)
if DistributedInterface().get_rank() == 0:
assert model_loader.model.device.type == "cpu"
assert model_engine.model.device.type == "cpu"
else:
assert model_loader.model.device.type == "meta"
assert model_engine.model.device.type == "meta"
def test_init_on_default():
@@ -52,5 +50,5 @@ def test_init_on_default():
init_config={"name": "init_on_default"},
)
)
model_loader = ModelLoader(model_args=model_args)
assert model_loader.model.device.type == DistributedInterface().current_accelerator.type
model_engine = ModelEngine(model_args=model_args)
assert model_engine.model.device == DistributedInterface().current_device

View File

@@ -0,0 +1,41 @@
# Copyright 2025 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pytest
from llamafactory.v1.config import ModelArguments, SampleArguments
from llamafactory.v1.core.model_engine import ModelEngine
from llamafactory.v1.samplers.cli_sampler import SyncSampler
@pytest.mark.runs_on(["cuda", "npu"])
def test_sync_sampler():
model_args = ModelArguments(model="Qwen/Qwen3-4B-Instruct-2507")
sample_args = SampleArguments()
model_engine = ModelEngine(model_args)
sampler = SyncSampler(sample_args, model_args, model_engine.model, model_engine.renderer)
messages = [{"role": "user", "content": [{"type": "text", "value": "Say 'This is a test.'"}]}]
response = ""
for new_text in sampler.generate(messages):
response += new_text
print(response)
assert model_engine.renderer.parse_message(response) == {
"role": "assistant",
"content": [{"type": "text", "value": "This is a test."}],
}
if __name__ == "__main__":
test_sync_sampler()