diff --git a/scripts/api_example/test_image.py b/scripts/api_example/test_image.py index 34a937e56..77d6d7c82 100644 --- a/scripts/api_example/test_image.py +++ b/scripts/api_example/test_image.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/api_example/test_toolcall.py b/scripts/api_example/test_toolcall.py index 04410db01..6a0a6f381 100644 --- a/scripts/api_example/test_toolcall.py +++ b/scripts/api_example/test_toolcall.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/convert_ckpt/llamafy_baichuan2.py b/scripts/convert_ckpt/llamafy_baichuan2.py index 077703f6a..75e849b21 100644 --- a/scripts/convert_ckpt/llamafy_baichuan2.py +++ b/scripts/convert_ckpt/llamafy_baichuan2.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,15 +19,10 @@ from typing import Any, Dict import fire import torch +from huggingface_hub import split_torch_state_dict_into_shards from safetensors.torch import save_file from tqdm import tqdm -from transformers.modeling_utils import ( - SAFE_WEIGHTS_INDEX_NAME, - SAFE_WEIGHTS_NAME, - WEIGHTS_INDEX_NAME, - WEIGHTS_NAME, - shard_checkpoint, -) +from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME CONFIG_NAME = "config.json" @@ -40,34 +35,42 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu") baichuan2_state_dict.update(shard_weight) - llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict() + llama_state_dict: Dict[str, torch.Tensor] = OrderedDict() for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"): if "W_pack" in key: proj_size = value.size(0) // 3 - llama2_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :] - llama2_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :] - llama2_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :] + llama_state_dict[key.replace("W_pack", "q_proj")] = value[:proj_size, :] + llama_state_dict[key.replace("W_pack", "k_proj")] = value[proj_size : 2 * proj_size, :] + llama_state_dict[key.replace("W_pack", "v_proj")] = value[2 * proj_size :, :] elif "lm_head" in key: - llama2_state_dict[key] = torch.nn.functional.normalize(value) + llama_state_dict[key] = torch.nn.functional.normalize(value) else: - llama2_state_dict[key] = value + llama_state_dict[key] = value weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME - shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name) - - for shard_file, shard in tqdm(shards.items(), desc="Save weights"): + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + state_dict_split = split_torch_state_dict_into_shards( + llama_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size + ) + for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"): + shard = {tensor: llama_state_dict[tensor].contiguous() for tensor in tensors} if save_safetensors: save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) else: torch.save(shard, os.path.join(output_dir, shard_file)) - if index is None: - print(f"Model weights saved in {os.path.join(output_dir, WEIGHTS_NAME)}") + if not state_dict_split.is_sharded: + print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.") else: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: json.dump(index, f, indent=2, sort_keys=True) - print(f"Model weights saved in {output_dir}") + + print(f"Model weights saved in {output_dir}.") def save_config(input_dir: str, output_dir: str): @@ -81,6 +84,7 @@ def save_config(input_dir: str, output_dir: str): with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f: json.dump(llama2_config_dict, f, indent=2) + print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}") diff --git a/scripts/convert_ckpt/llamafy_qwen.py b/scripts/convert_ckpt/llamafy_qwen.py index beabe4b19..bb3fe5191 100644 --- a/scripts/convert_ckpt/llamafy_qwen.py +++ b/scripts/convert_ckpt/llamafy_qwen.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,16 +19,11 @@ from typing import Any, Dict import fire import torch +from huggingface_hub import split_torch_state_dict_into_shards from safetensors import safe_open from safetensors.torch import save_file from tqdm import tqdm -from transformers.modeling_utils import ( - SAFE_WEIGHTS_INDEX_NAME, - SAFE_WEIGHTS_NAME, - WEIGHTS_INDEX_NAME, - WEIGHTS_NAME, - shard_checkpoint, -) +from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME from transformers.utils import check_min_version @@ -49,60 +44,68 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso for key in f.keys(): qwen_state_dict[key] = f.get_tensor(key) - llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict() + llama_state_dict: Dict[str, torch.Tensor] = OrderedDict() torch_dtype = None for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"): if torch_dtype is None: torch_dtype = value.dtype if "wte" in key: - llama2_state_dict["model.embed_tokens.weight"] = value + llama_state_dict["model.embed_tokens.weight"] = value elif "ln_f" in key: - llama2_state_dict["model.norm.weight"] = value + llama_state_dict["model.norm.weight"] = value else: key = key.replace("transformer.h", "model.layers") if "attn.c_attn" in key: proj_size = value.size(0) // 3 - llama2_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...] - llama2_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[ + llama_state_dict[key.replace("attn.c_attn", "self_attn.q_proj")] = value[:proj_size, ...] + llama_state_dict[key.replace("attn.c_attn", "self_attn.k_proj")] = value[ proj_size : 2 * proj_size, ... ] - llama2_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...] + llama_state_dict[key.replace("attn.c_attn", "self_attn.v_proj")] = value[2 * proj_size :, ...] elif "attn.c_proj" in key: - llama2_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value - llama2_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like( + llama_state_dict[key.replace("attn.c_proj", "self_attn.o_proj")] = value + llama_state_dict[key.replace("attn.c_proj.weight", "self_attn.o_proj.bias")] = torch.zeros_like( value[:, 0] ).squeeze() elif "ln_1" in key: - llama2_state_dict[key.replace("ln_1", "input_layernorm")] = value + llama_state_dict[key.replace("ln_1", "input_layernorm")] = value elif "ln_2" in key: - llama2_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value + llama_state_dict[key.replace("ln_2", "post_attention_layernorm")] = value elif "mlp.w1" in key: - llama2_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value + llama_state_dict[key.replace("mlp.w1", "mlp.up_proj")] = value elif "mlp.w2" in key: - llama2_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value + llama_state_dict[key.replace("mlp.w2", "mlp.gate_proj")] = value elif "mlp.c_proj" in key: - llama2_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value + llama_state_dict[key.replace("mlp.c_proj", "mlp.down_proj")] = value elif "lm_head" in key: - llama2_state_dict[key] = value + llama_state_dict[key] = value else: raise KeyError(f"Unable to process key {key}") weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME - shards, index = shard_checkpoint(llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name) - - for shard_file, shard in tqdm(shards.items(), desc="Save weights"): + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + state_dict_split = split_torch_state_dict_into_shards( + llama_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size + ) + for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"): + shard = {tensor: llama_state_dict[tensor].contiguous() for tensor in tensors} if save_safetensors: save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) else: torch.save(shard, os.path.join(output_dir, shard_file)) - if index is None: - print(f"Model weights saved in {os.path.join(output_dir, weights_name)}") + if not state_dict_split.is_sharded: + print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.") else: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: json.dump(index, f, indent=2, sort_keys=True) - print(f"Model weights saved in {output_dir}") + + print(f"Model weights saved in {output_dir}.") return str(torch_dtype).replace("torch.", "") @@ -134,6 +137,7 @@ def save_config(input_dir: str, output_dir: str, torch_dtype: str): with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f: json.dump(llama2_config_dict, f, indent=2) + print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}") diff --git a/scripts/llama_pro.py b/scripts/llama_pro.py index 447890f4f..dd10b5255 100644 --- a/scripts/llama_pro.py +++ b/scripts/llama_pro.py @@ -1,4 +1,4 @@ -# Copyright 2024 Tencent Inc. and the LlamaFactory team. +# Copyright 2025 Tencent Inc. and the LlamaFactory team. # # This code is inspired by the Tencent's LLaMA-Pro library. # https://github.com/TencentARC/LLaMA-Pro/blob/main/scripts/block_expansion.py @@ -18,20 +18,15 @@ import json import os from collections import OrderedDict -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Dict import fire import torch +from huggingface_hub import split_torch_state_dict_into_shards from safetensors.torch import save_file from tqdm import tqdm from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel -from transformers.modeling_utils import ( - SAFE_WEIGHTS_INDEX_NAME, - SAFE_WEIGHTS_NAME, - WEIGHTS_INDEX_NAME, - WEIGHTS_NAME, - shard_checkpoint, -) +from transformers.modeling_utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, WEIGHTS_INDEX_NAME, WEIGHTS_NAME if TYPE_CHECKING: @@ -46,41 +41,36 @@ def block_expansion( model_name_or_path: str, output_dir: str, num_expand: int, - shard_size: str = "2GB", + shard_size: str = "5GB", save_safetensors: bool = True, ): r""" - Performs block expansion for LLaMA, Mistral, Qwen1.5 or Yi models. + Performs block expansion for LLaMA, Mistral, Qwen2 or Yi models. Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8 """ - config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path) + config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True) num_layers = getattr(config, "num_hidden_layers") - setattr(config, "num_hidden_layers", num_layers + num_expand) - config.save_pretrained(output_dir) - - tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) - tokenizer.save_pretrained(output_dir) - - config = AutoConfig.from_pretrained(model_name_or_path) # load the original one - if save_safetensors: - setattr(config, "tie_word_embeddings", False) # safetensors does not allow shared weights - - model = AutoModelForCausalLM.from_pretrained( - model_name_or_path, - config=config, - torch_dtype="auto", - trust_remote_code=True, - low_cpu_mem_usage=True, - ) - assert isinstance(model, PreTrainedModel) # type hint - state_dict = model.state_dict() - if num_layers % num_expand != 0: raise ValueError(f"`num_layers` {num_layers} should be divisible by `num_expand` {num_expand}.") + setattr(config, "num_hidden_layers", num_layers + num_expand) + config.save_pretrained(output_dir) + + tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=True) + tokenizer.save_pretrained(output_dir) + + print(f"Expanding model of {num_layers} layers to {num_layers + num_expand} layers.") + model = AutoModelForCausalLM.from_pretrained( + model_name_or_path, torch_dtype="auto", device_map="cpu", trust_remote_code=True, low_cpu_mem_usage=True + ) + assert isinstance(model, PreTrainedModel) # type hint + if save_safetensors and getattr(model.config, "tie_word_embeddings", False): + del model.lm_head # safetensors does not allow shared weights + split = num_layers // num_expand layer_cnt = 0 - output_state_dict = OrderedDict() + state_dict = model.state_dict() + output_state_dict: Dict[str, "torch.Tensor"] = OrderedDict() for i in range(num_layers): for key, value in state_dict.items(): if f".{i:d}." in key: @@ -104,17 +94,24 @@ def block_expansion( output_state_dict[key] = value weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME - shards, index = shard_checkpoint(output_state_dict, max_shard_size=shard_size, weights_name=weights_name) - - for shard_file, shard in tqdm(shards.items(), desc="Save weights"): + filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors") + state_dict_split = split_torch_state_dict_into_shards( + output_state_dict, filename_pattern=filename_pattern, max_shard_size=shard_size + ) + for shard_file, tensors in tqdm(state_dict_split.filename_to_tensors.items(), desc="Save weights"): + shard = {tensor: output_state_dict[tensor].contiguous() for tensor in tensors} if save_safetensors: save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}) else: torch.save(shard, os.path.join(output_dir, shard_file)) - if index is None: + if not state_dict_split.is_sharded: print(f"Model weights saved in {os.path.join(output_dir, weights_name)}.") else: + index = { + "metadata": state_dict_split.metadata, + "weight_map": state_dict_split.tensor_to_filename, + } index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f: json.dump(index, f, indent=2, sort_keys=True) diff --git a/scripts/loftq_init.py b/scripts/loftq_init.py index 5e640843d..83e38e882 100644 --- a/scripts/loftq_init.py +++ b/scripts/loftq_init.py @@ -1,4 +1,4 @@ -# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# Copyright 2025 HuggingFace Inc. and the LlamaFactory team. # # This code is based on the HuggingFace's PEFT library. # https://github.com/huggingface/peft/blob/v0.10.0/examples/loftq_finetuning/quantize_save_load.py diff --git a/scripts/pissa_init.py b/scripts/pissa_init.py index f7092f0e6..3be11fbf0 100644 --- a/scripts/pissa_init.py +++ b/scripts/pissa_init.py @@ -1,4 +1,4 @@ -# Copyright 2024 HuggingFace Inc. and the LlamaFactory team. +# Copyright 2025 HuggingFace Inc. and the LlamaFactory team. # # This code is based on the HuggingFace's PEFT library. # https://github.com/huggingface/peft/blob/v0.11.0/examples/pissa_finetuning/preprocess.py diff --git a/scripts/stat_utils/cal_flops.py b/scripts/stat_utils/cal_flops.py index 2bcf697bb..a9eb033f5 100644 --- a/scripts/stat_utils/cal_flops.py +++ b/scripts/stat_utils/cal_flops.py @@ -1,4 +1,4 @@ -# Copyright 2024 Microsoft Corporation and the LlamaFactory team. +# Copyright 2025 Microsoft Corporation and the LlamaFactory team. # # This code is inspired by the Microsoft's DeepSpeed library. # https://www.deepspeed.ai/tutorials/flops-profiler/ diff --git a/scripts/stat_utils/cal_lr.py b/scripts/stat_utils/cal_lr.py index 21206a28e..85921d906 100644 --- a/scripts/stat_utils/cal_lr.py +++ b/scripts/stat_utils/cal_lr.py @@ -1,4 +1,4 @@ -# Copyright 2024 imoneoi and the LlamaFactory team. +# Copyright 2025 imoneoi and the LlamaFactory team. # # This code is inspired by the imoneoi's OpenChat library. # https://github.com/imoneoi/openchat/blob/3.6.0/ochat/training_deepspeed/train.py diff --git a/scripts/stat_utils/cal_mfu.py b/scripts/stat_utils/cal_mfu.py index edc6e2341..ef5672d26 100644 --- a/scripts/stat_utils/cal_mfu.py +++ b/scripts/stat_utils/cal_mfu.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/stat_utils/cal_ppl.py b/scripts/stat_utils/cal_ppl.py index 32d50e649..768202f97 100644 --- a/scripts/stat_utils/cal_ppl.py +++ b/scripts/stat_utils/cal_ppl.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/stat_utils/length_cdf.py b/scripts/stat_utils/length_cdf.py index 5cf253473..275549baf 100644 --- a/scripts/stat_utils/length_cdf.py +++ b/scripts/stat_utils/length_cdf.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/scripts/vllm_infer.py b/scripts/vllm_infer.py index 796d5b98d..2550f5ba8 100644 --- a/scripts/vllm_infer.py +++ b/scripts/vllm_infer.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/setup.py b/setup.py index 908552da5..0802efbe9 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/api.py b/src/api.py index ad2e8cbba..61215459e 100644 --- a/src/api.py +++ b/src/api.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/__init__.py b/src/llamafactory/__init__.py index 0b5d8cce2..9b8076971 100644 --- a/src/llamafactory/__init__.py +++ b/src/llamafactory/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/api/app.py b/src/llamafactory/api/app.py index 50b53e9e9..b3c136de0 100644 --- a/src/llamafactory/api/app.py +++ b/src/llamafactory/api/app.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/api/chat.py b/src/llamafactory/api/chat.py index c467a3e6b..6959f4d84 100644 --- a/src/llamafactory/api/chat.py +++ b/src/llamafactory/api/chat.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/api/common.py b/src/llamafactory/api/common.py index d1ac94de4..59c84de63 100644 --- a/src/llamafactory/api/common.py +++ b/src/llamafactory/api/common.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/api/protocol.py b/src/llamafactory/api/protocol.py index c6fe6f757..310e743e2 100644 --- a/src/llamafactory/api/protocol.py +++ b/src/llamafactory/api/protocol.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/chat/__init__.py b/src/llamafactory/chat/__init__.py index 07276d483..15d8b9ba2 100644 --- a/src/llamafactory/chat/__init__.py +++ b/src/llamafactory/chat/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/chat/base_engine.py b/src/llamafactory/chat/base_engine.py index 700e1eef6..60a8e0073 100644 --- a/src/llamafactory/chat/base_engine.py +++ b/src/llamafactory/chat/base_engine.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/chat/hf_engine.py b/src/llamafactory/chat/hf_engine.py index 7b5e3bbef..88e22aaad 100644 --- a/src/llamafactory/chat/hf_engine.py +++ b/src/llamafactory/chat/hf_engine.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/chat/vllm_engine.py b/src/llamafactory/chat/vllm_engine.py index ee9c4c8c1..86c36f0a9 100644 --- a/src/llamafactory/chat/vllm_engine.py +++ b/src/llamafactory/chat/vllm_engine.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/cli.py b/src/llamafactory/cli.py index 72085e2d0..5ce7a9645 100644 --- a/src/llamafactory/cli.py +++ b/src/llamafactory/cli.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/__init__.py b/src/llamafactory/data/__init__.py index ea1a02f20..247d8cf0f 100644 --- a/src/llamafactory/data/__init__.py +++ b/src/llamafactory/data/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py index 82bbfafb2..aafe15366 100644 --- a/src/llamafactory/data/aligner.py +++ b/src/llamafactory/data/aligner.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/data_utils.py b/src/llamafactory/data/data_utils.py index bd5d3587d..9c2f527cb 100644 --- a/src/llamafactory/data/data_utils.py +++ b/src/llamafactory/data/data_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/formatter.py b/src/llamafactory/data/formatter.py index f6c244689..ac558770b 100644 --- a/src/llamafactory/data/formatter.py +++ b/src/llamafactory/data/formatter.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py index 3c7e34a4d..7e972c886 100644 --- a/src/llamafactory/data/loader.py +++ b/src/llamafactory/data/loader.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/parser.py b/src/llamafactory/data/parser.py index 709d0c900..493eba68e 100644 --- a/src/llamafactory/data/parser.py +++ b/src/llamafactory/data/parser.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/preprocess.py b/src/llamafactory/data/preprocess.py index c5a10ec99..27363791a 100644 --- a/src/llamafactory/data/preprocess.py +++ b/src/llamafactory/data/preprocess.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/feedback.py b/src/llamafactory/data/processors/feedback.py index b670da440..37b8154dc 100644 --- a/src/llamafactory/data/processors/feedback.py +++ b/src/llamafactory/data/processors/feedback.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/pairwise.py b/src/llamafactory/data/processors/pairwise.py index a594c9846..242febfca 100644 --- a/src/llamafactory/data/processors/pairwise.py +++ b/src/llamafactory/data/processors/pairwise.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/processor_utils.py b/src/llamafactory/data/processors/processor_utils.py index 8e13d100b..951986231 100644 --- a/src/llamafactory/data/processors/processor_utils.py +++ b/src/llamafactory/data/processors/processor_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/supervised.py b/src/llamafactory/data/processors/supervised.py index 83bd8ba2a..d7dc7379a 100644 --- a/src/llamafactory/data/processors/supervised.py +++ b/src/llamafactory/data/processors/supervised.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/processors/unsupervised.py b/src/llamafactory/data/processors/unsupervised.py index e21ebd42f..d85f28dda 100644 --- a/src/llamafactory/data/processors/unsupervised.py +++ b/src/llamafactory/data/processors/unsupervised.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index e87643da4..5615557c1 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -642,7 +642,7 @@ _register_template( _register_template( name="empty", - efficient_eos=True, + format_assistant=StringFormatter(slots=["{{content}}"]), ) diff --git a/src/llamafactory/data/tool_utils.py b/src/llamafactory/data/tool_utils.py index 44cf86ccc..6132e9826 100644 --- a/src/llamafactory/data/tool_utils.py +++ b/src/llamafactory/data/tool_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/eval/evaluator.py b/src/llamafactory/eval/evaluator.py index 82e37f41a..99758dd2c 100644 --- a/src/llamafactory/eval/evaluator.py +++ b/src/llamafactory/eval/evaluator.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # This code is inspired by the Dan's test library. # https://github.com/hendrycks/test/blob/master/evaluate_flan.py diff --git a/src/llamafactory/eval/template.py b/src/llamafactory/eval/template.py index e6ab46a5d..e14540970 100644 --- a/src/llamafactory/eval/template.py +++ b/src/llamafactory/eval/template.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index d570cc7a0..90cd21e63 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/extras/ploting.py b/src/llamafactory/extras/ploting.py index 3e372a389..d05970d24 100644 --- a/src/llamafactory/extras/ploting.py +++ b/src/llamafactory/extras/ploting.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/__init__.py b/src/llamafactory/hparams/__init__.py index 254a845ec..9bcc4295c 100644 --- a/src/llamafactory/hparams/__init__.py +++ b/src/llamafactory/hparams/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/evaluation_args.py b/src/llamafactory/hparams/evaluation_args.py index a7f221ca6..ec1867e81 100644 --- a/src/llamafactory/hparams/evaluation_args.py +++ b/src/llamafactory/hparams/evaluation_args.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/finetuning_args.py b/src/llamafactory/hparams/finetuning_args.py index fa71907e3..52e37a3b4 100644 --- a/src/llamafactory/hparams/finetuning_args.py +++ b/src/llamafactory/hparams/finetuning_args.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/hparams/generating_args.py b/src/llamafactory/hparams/generating_args.py index 377fea152..db3306d68 100644 --- a/src/llamafactory/hparams/generating_args.py +++ b/src/llamafactory/hparams/generating_args.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/launcher.py b/src/llamafactory/launcher.py index b93f2ad1b..169b042ab 100644 --- a/src/llamafactory/launcher.py +++ b/src/llamafactory/launcher.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/__init__.py b/src/llamafactory/model/__init__.py index 48cfe76c4..1957ff8d4 100644 --- a/src/llamafactory/model/__init__.py +++ b/src/llamafactory/model/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/adapter.py b/src/llamafactory/model/adapter.py index 9edd87dd2..2602d5a3b 100644 --- a/src/llamafactory/model/adapter.py +++ b/src/llamafactory/model/adapter.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 113ddafa8..19e570998 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/attention.py b/src/llamafactory/model/model_utils/attention.py index 8ec74351d..3302de2e2 100644 --- a/src/llamafactory/model/model_utils/attention.py +++ b/src/llamafactory/model/model_utils/attention.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/embedding.py b/src/llamafactory/model/model_utils/embedding.py index 497bac168..199b53c3e 100644 --- a/src/llamafactory/model/model_utils/embedding.py +++ b/src/llamafactory/model/model_utils/embedding.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/liger_kernel.py b/src/llamafactory/model/model_utils/liger_kernel.py index 294e828cc..166238737 100644 --- a/src/llamafactory/model/model_utils/liger_kernel.py +++ b/src/llamafactory/model/model_utils/liger_kernel.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/misc.py b/src/llamafactory/model/model_utils/misc.py index f32286383..e5f8ce5f7 100644 --- a/src/llamafactory/model/model_utils/misc.py +++ b/src/llamafactory/model/model_utils/misc.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/mod.py b/src/llamafactory/model/model_utils/mod.py index ec73af005..5f67cd50d 100644 --- a/src/llamafactory/model/model_utils/mod.py +++ b/src/llamafactory/model/model_utils/mod.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/moe.py b/src/llamafactory/model/model_utils/moe.py index cd5558d32..4e520d5c0 100644 --- a/src/llamafactory/model/model_utils/moe.py +++ b/src/llamafactory/model/model_utils/moe.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/unsloth.py b/src/llamafactory/model/model_utils/unsloth.py index 4f4fd4997..899cc9714 100644 --- a/src/llamafactory/model/model_utils/unsloth.py +++ b/src/llamafactory/model/model_utils/unsloth.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/model_utils/valuehead.py b/src/llamafactory/model/model_utils/valuehead.py index a1eed1794..ace90f750 100644 --- a/src/llamafactory/model/model_utils/valuehead.py +++ b/src/llamafactory/model/model_utils/valuehead.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/model/patcher.py b/src/llamafactory/model/patcher.py index cf110af98..aab2052d2 100644 --- a/src/llamafactory/model/patcher.py +++ b/src/llamafactory/model/patcher.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/callbacks.py b/src/llamafactory/train/callbacks.py index 5906a4a6f..41c83819c 100644 --- a/src/llamafactory/train/callbacks.py +++ b/src/llamafactory/train/callbacks.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/dpo/__init__.py b/src/llamafactory/train/dpo/__init__.py index 9ce0d0895..73c1a4a6b 100644 --- a/src/llamafactory/train/dpo/__init__.py +++ b/src/llamafactory/train/dpo/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/kto/__init__.py b/src/llamafactory/train/kto/__init__.py index a19003685..491b067e4 100644 --- a/src/llamafactory/train/kto/__init__.py +++ b/src/llamafactory/train/kto/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/ppo/__init__.py b/src/llamafactory/train/ppo/__init__.py index 161f6f5de..ed9bc4d27 100644 --- a/src/llamafactory/train/ppo/__init__.py +++ b/src/llamafactory/train/ppo/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/ppo/ppo_utils.py b/src/llamafactory/train/ppo/ppo_utils.py index 11ce25a9c..55b79b4ed 100644 --- a/src/llamafactory/train/ppo/ppo_utils.py +++ b/src/llamafactory/train/ppo/ppo_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/pt/__init__.py b/src/llamafactory/train/pt/__init__.py index d80e6f226..1f5c28983 100644 --- a/src/llamafactory/train/pt/__init__.py +++ b/src/llamafactory/train/pt/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/pt/trainer.py b/src/llamafactory/train/pt/trainer.py index 1e692204d..3024004d0 100644 --- a/src/llamafactory/train/pt/trainer.py +++ b/src/llamafactory/train/pt/trainer.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/rm/__init__.py b/src/llamafactory/train/rm/__init__.py index 482783159..f0e8a45c0 100644 --- a/src/llamafactory/train/rm/__init__.py +++ b/src/llamafactory/train/rm/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/rm/metric.py b/src/llamafactory/train/rm/metric.py index 17e47cc18..6f08b107d 100644 --- a/src/llamafactory/train/rm/metric.py +++ b/src/llamafactory/train/rm/metric.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/sft/__init__.py b/src/llamafactory/train/sft/__init__.py index 475dfe5f9..6107a9ae7 100644 --- a/src/llamafactory/train/sft/__init__.py +++ b/src/llamafactory/train/sft/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/test_utils.py b/src/llamafactory/train/test_utils.py index 55e6c1991..9404c2490 100644 --- a/src/llamafactory/train/test_utils.py +++ b/src/llamafactory/train/test_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/train/tuner.py b/src/llamafactory/train/tuner.py index bbbef1cf5..4e60e2f07 100644 --- a/src/llamafactory/train/tuner.py +++ b/src/llamafactory/train/tuner.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/chatter.py b/src/llamafactory/webui/chatter.py index 3c598cf98..39602947d 100644 --- a/src/llamafactory/webui/chatter.py +++ b/src/llamafactory/webui/chatter.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/common.py b/src/llamafactory/webui/common.py index e8f1e097c..dc30196ab 100644 --- a/src/llamafactory/webui/common.py +++ b/src/llamafactory/webui/common.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/__init__.py b/src/llamafactory/webui/components/__init__.py index 715fb6e47..eb3c9d4c7 100644 --- a/src/llamafactory/webui/components/__init__.py +++ b/src/llamafactory/webui/components/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/chatbot.py b/src/llamafactory/webui/components/chatbot.py index 840c190d9..5c2e75864 100644 --- a/src/llamafactory/webui/components/chatbot.py +++ b/src/llamafactory/webui/components/chatbot.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/data.py b/src/llamafactory/webui/components/data.py index e62e1823d..1dbc68d54 100644 --- a/src/llamafactory/webui/components/data.py +++ b/src/llamafactory/webui/components/data.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/eval.py b/src/llamafactory/webui/components/eval.py index 39a12026b..7be0a5b4f 100644 --- a/src/llamafactory/webui/components/eval.py +++ b/src/llamafactory/webui/components/eval.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/export.py b/src/llamafactory/webui/components/export.py index 7f4b46e6b..c5034222b 100644 --- a/src/llamafactory/webui/components/export.py +++ b/src/llamafactory/webui/components/export.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/infer.py b/src/llamafactory/webui/components/infer.py index 6647cd7a4..c2d63ee63 100644 --- a/src/llamafactory/webui/components/infer.py +++ b/src/llamafactory/webui/components/infer.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/top.py b/src/llamafactory/webui/components/top.py index 467e31125..978f93cd5 100644 --- a/src/llamafactory/webui/components/top.py +++ b/src/llamafactory/webui/components/top.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/components/train.py b/src/llamafactory/webui/components/train.py index 28aa3a8c0..4f0373135 100644 --- a/src/llamafactory/webui/components/train.py +++ b/src/llamafactory/webui/components/train.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/control.py b/src/llamafactory/webui/control.py index b8087af67..abadd0457 100644 --- a/src/llamafactory/webui/control.py +++ b/src/llamafactory/webui/control.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/css.py b/src/llamafactory/webui/css.py index c4445e8eb..a48a21c53 100644 --- a/src/llamafactory/webui/css.py +++ b/src/llamafactory/webui/css.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/engine.py b/src/llamafactory/webui/engine.py index 3b18eeb9c..2708139d2 100644 --- a/src/llamafactory/webui/engine.py +++ b/src/llamafactory/webui/engine.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/interface.py b/src/llamafactory/webui/interface.py index d6387339d..6dff348e4 100644 --- a/src/llamafactory/webui/interface.py +++ b/src/llamafactory/webui/interface.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/locales.py b/src/llamafactory/webui/locales.py index f7846c531..30f4517bc 100644 --- a/src/llamafactory/webui/locales.py +++ b/src/llamafactory/webui/locales.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/manager.py b/src/llamafactory/webui/manager.py index 18332ac0f..3b6f5a9ae 100644 --- a/src/llamafactory/webui/manager.py +++ b/src/llamafactory/webui/manager.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/llamafactory/webui/runner.py b/src/llamafactory/webui/runner.py index cc8c6cc5c..a587befdd 100644 --- a/src/llamafactory/webui/runner.py +++ b/src/llamafactory/webui/runner.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/train.py b/src/train.py index 6703ffdb0..eba846a02 100644 --- a/src/train.py +++ b/src/train.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/src/webui.py b/src/webui.py index d0f00ea69..16b8335ba 100644 --- a/src/webui.py +++ b/src/webui.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_feedback.py b/tests/data/processors/test_feedback.py index c04e823b7..a70c6e1d3 100644 --- a/tests/data/processors/test_feedback.py +++ b/tests/data/processors/test_feedback.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_pairwise.py b/tests/data/processors/test_pairwise.py index da50ca242..7602d070b 100644 --- a/tests/data/processors/test_pairwise.py +++ b/tests/data/processors/test_pairwise.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_processor_utils.py b/tests/data/processors/test_processor_utils.py index 692fcaa10..9cf312205 100644 --- a/tests/data/processors/test_processor_utils.py +++ b/tests/data/processors/test_processor_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_supervised.py b/tests/data/processors/test_supervised.py index 965429a63..2a988e843 100644 --- a/tests/data/processors/test_supervised.py +++ b/tests/data/processors/test_supervised.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/processors/test_unsupervised.py b/tests/data/processors/test_unsupervised.py index c59fa5b29..c3f3159fb 100644 --- a/tests/data/processors/test_unsupervised.py +++ b/tests/data/processors/test_unsupervised.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/test_collator.py b/tests/data/test_collator.py index dcb53d6bb..23a045aee 100644 --- a/tests/data/test_collator.py +++ b/tests/data/test_collator.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/test_formatter.py b/tests/data/test_formatter.py index 4eb2072bc..2aaf48a2f 100644 --- a/tests/data/test_formatter.py +++ b/tests/data/test_formatter.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/test_mm_plugin.py b/tests/data/test_mm_plugin.py index c9084af04..7dce81180 100644 --- a/tests/data/test_mm_plugin.py +++ b/tests/data/test_mm_plugin.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/data/test_template.py b/tests/data/test_template.py index e6b6ed2bd..d6eaeda34 100644 --- a/tests/data/test_template.py +++ b/tests/data/test_template.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/e2e/test_chat.py b/tests/e2e/test_chat.py index b95646d7b..98818f275 100644 --- a/tests/e2e/test_chat.py +++ b/tests/e2e/test_chat.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/e2e/test_train.py b/tests/e2e/test_train.py index 71cda495a..46d7813c4 100644 --- a/tests/e2e/test_train.py +++ b/tests/e2e/test_train.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/eval/test_eval_template.py b/tests/eval/test_eval_template.py index f85d9d57b..eddc16400 100644 --- a/tests/eval/test_eval_template.py +++ b/tests/eval/test_eval_template.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/model_utils/test_attention.py b/tests/model/model_utils/test_attention.py index 35f3284dd..a3deda29a 100644 --- a/tests/model/model_utils/test_attention.py +++ b/tests/model/model_utils/test_attention.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/model_utils/test_checkpointing.py b/tests/model/model_utils/test_checkpointing.py index cdf628077..ef38d0d5e 100644 --- a/tests/model/model_utils/test_checkpointing.py +++ b/tests/model/model_utils/test_checkpointing.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/model_utils/test_misc.py b/tests/model/model_utils/test_misc.py new file mode 100644 index 000000000..b2c8b3bf9 --- /dev/null +++ b/tests/model/model_utils/test_misc.py @@ -0,0 +1,43 @@ +# Copyright 2025 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import torch +from transformers import AutoConfig, AutoModelForCausalLM + +from llamafactory.model.model_utils.misc import find_expanded_modules + + +HF_TOKEN = os.getenv("HF_TOKEN") + + +@pytest.mark.skipif(not HF_TOKEN, reason="Gated model.") +def test_expanded_modules(): + config = AutoConfig.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") + with torch.device("meta"): + model = AutoModelForCausalLM.from_config(config) + + expanded_modules = find_expanded_modules(model, ["q_proj", "v_proj"], num_layer_trainable=4) + assert expanded_modules == [ + "model.layers.7.self_attn.q_proj", + "model.layers.7.self_attn.v_proj", + "model.layers.15.self_attn.q_proj", + "model.layers.15.self_attn.v_proj", + "model.layers.23.self_attn.q_proj", + "model.layers.23.self_attn.v_proj", + "model.layers.31.self_attn.q_proj", + "model.layers.31.self_attn.v_proj", + ] diff --git a/tests/model/model_utils/test_packing.py b/tests/model/model_utils/test_packing.py index bee216917..81e0d66a5 100644 --- a/tests/model/model_utils/test_packing.py +++ b/tests/model/model_utils/test_packing.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/model_utils/test_visual.py b/tests/model/model_utils/test_visual.py index b4e23defa..66d91ca6b 100644 --- a/tests/model/model_utils/test_visual.py +++ b/tests/model/model_utils/test_visual.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_base.py b/tests/model/test_base.py index 6b6aa8b88..9e8c50489 100644 --- a/tests/model/test_base.py +++ b/tests/model/test_base.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_freeze.py b/tests/model/test_freeze.py index 964f52c98..cd9fc61c4 100644 --- a/tests/model/test_freeze.py +++ b/tests/model/test_freeze.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_full.py b/tests/model/test_full.py index 6990a0e9d..3bd9c9e89 100644 --- a/tests/model/test_full.py +++ b/tests/model/test_full.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_lora.py b/tests/model/test_lora.py index e1d2148e5..2a4177ce6 100644 --- a/tests/model/test_lora.py +++ b/tests/model/test_lora.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/model/test_pissa.py b/tests/model/test_pissa.py index 7bfdac51c..d1e4114cd 100644 --- a/tests/model/test_pissa.py +++ b/tests/model/test_pissa.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/train/test_sft_trainer.py b/tests/train/test_sft_trainer.py index 75231d205..bb67a31e3 100644 --- a/tests/train/test_sft_trainer.py +++ b/tests/train/test_sft_trainer.py @@ -1,4 +1,4 @@ -# Copyright 2024 the LlamaFactory team. +# Copyright 2025 the LlamaFactory team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.