[misc] upgrade format to py39 (#7256)
This commit is contained in:
@@ -14,7 +14,7 @@
|
||||
|
||||
import json
|
||||
import os
|
||||
from typing import Sequence
|
||||
from collections.abc import Sequence
|
||||
|
||||
from openai import OpenAI
|
||||
from transformers.utils.versions import require_version
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
import json
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
|
||||
import fire
|
||||
import torch
|
||||
@@ -29,13 +29,13 @@ CONFIG_NAME = "config.json"
|
||||
|
||||
|
||||
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool):
|
||||
baichuan2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
||||
baichuan2_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
|
||||
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".bin"):
|
||||
shard_weight = torch.load(os.path.join(input_dir, filepath), map_location="cpu")
|
||||
baichuan2_state_dict.update(shard_weight)
|
||||
|
||||
llama_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
||||
llama_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
for key, value in tqdm(baichuan2_state_dict.items(), desc="Convert format"):
|
||||
if "W_pack" in key:
|
||||
proj_size = value.size(0) // 3
|
||||
@@ -75,7 +75,7 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso
|
||||
|
||||
def save_config(input_dir: str, output_dir: str):
|
||||
with open(os.path.join(input_dir, CONFIG_NAME), encoding="utf-8") as f:
|
||||
llama2_config_dict: Dict[str, Any] = json.load(f)
|
||||
llama2_config_dict: dict[str, Any] = json.load(f)
|
||||
|
||||
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
|
||||
llama2_config_dict.pop("auto_map", None)
|
||||
@@ -94,8 +94,8 @@ def llamafy_baichuan2(
|
||||
shard_size: str = "2GB",
|
||||
save_safetensors: bool = True,
|
||||
):
|
||||
r"""
|
||||
Converts the Baichuan2-7B model in the same format as LLaMA2-7B.
|
||||
r"""Convert the Baichuan2-7B model in the same format as LLaMA2-7B.
|
||||
|
||||
Usage: python llamafy_baichuan2.py --input_dir input --output_dir output
|
||||
Converted model: https://huggingface.co/hiyouga/Baichuan2-7B-Base-LLaMAfied
|
||||
"""
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
import json
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
|
||||
import fire
|
||||
import torch
|
||||
@@ -37,14 +37,14 @@ CONFIG_NAME = "config.json"
|
||||
|
||||
|
||||
def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
|
||||
qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
||||
qwen_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
|
||||
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
|
||||
with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
|
||||
for key in f.keys():
|
||||
qwen_state_dict[key] = f.get_tensor(key)
|
||||
|
||||
llama_state_dict: Dict[str, torch.Tensor] = OrderedDict()
|
||||
llama_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
torch_dtype = None
|
||||
for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
|
||||
if torch_dtype is None:
|
||||
@@ -112,9 +112,9 @@ def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetenso
|
||||
|
||||
def save_config(input_dir: str, output_dir: str, torch_dtype: str):
|
||||
with open(os.path.join(input_dir, CONFIG_NAME), encoding="utf-8") as f:
|
||||
qwen_config_dict: Dict[str, Any] = json.load(f)
|
||||
qwen_config_dict: dict[str, Any] = json.load(f)
|
||||
|
||||
llama2_config_dict: Dict[str, Any] = OrderedDict()
|
||||
llama2_config_dict: dict[str, Any] = OrderedDict()
|
||||
llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
|
||||
llama2_config_dict["hidden_act"] = "silu"
|
||||
llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
|
||||
@@ -147,8 +147,8 @@ def llamafy_qwen(
|
||||
shard_size: str = "2GB",
|
||||
save_safetensors: bool = False,
|
||||
):
|
||||
r"""
|
||||
Converts the Qwen models in the same format as LLaMA2.
|
||||
r"""Convert the Qwen models in the same format as LLaMA2.
|
||||
|
||||
Usage: python llamafy_qwen.py --input_dir input --output_dir output
|
||||
Converted model: https://huggingface.co/hiyouga/Qwen-14B-Chat-LLaMAfied
|
||||
"""
|
||||
|
||||
@@ -18,7 +18,7 @@
|
||||
import json
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from typing import TYPE_CHECKING, Dict
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import fire
|
||||
import torch
|
||||
@@ -44,11 +44,11 @@ def block_expansion(
|
||||
shard_size: str = "5GB",
|
||||
save_safetensors: bool = True,
|
||||
):
|
||||
r"""
|
||||
Performs block expansion for LLaMA, Mistral, Qwen2 or Yi models.
|
||||
r"""Perform block expansion for LLaMA, Mistral, Qwen2 or Yi models.
|
||||
|
||||
Usage: python llama_pro.py --model_name_or_path meta-llama/Llama-2-7b-hf --output_dir llama2_pro --num_expand 8
|
||||
"""
|
||||
config: "PretrainedConfig" = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
|
||||
config: PretrainedConfig = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
|
||||
num_layers = getattr(config, "num_hidden_layers")
|
||||
if num_layers % num_expand != 0:
|
||||
raise ValueError(f"`num_layers` {num_layers} should be divisible by `num_expand` {num_expand}.")
|
||||
@@ -70,7 +70,7 @@ def block_expansion(
|
||||
split = num_layers // num_expand
|
||||
layer_cnt = 0
|
||||
state_dict = model.state_dict()
|
||||
output_state_dict: Dict[str, "torch.Tensor"] = OrderedDict()
|
||||
output_state_dict: dict[str, torch.Tensor] = OrderedDict()
|
||||
for i in range(num_layers):
|
||||
for key, value in state_dict.items():
|
||||
if f".{i:d}." in key:
|
||||
|
||||
@@ -38,8 +38,8 @@ def quantize_loftq(
|
||||
lora_target: tuple = ("q_proj", "v_proj"),
|
||||
save_safetensors: bool = True,
|
||||
):
|
||||
r"""
|
||||
Initializes LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ)
|
||||
r"""Initialize LoRA weights with LoRA-fine-tuning-aware Quantization (LoftQ).
|
||||
|
||||
Usage: python loftq_init.py --model_name_or_path path_to_model --output_dir output_dir
|
||||
"""
|
||||
if isinstance(lora_target, str):
|
||||
@@ -72,7 +72,7 @@ def quantize_loftq(
|
||||
print(f"Adapter weights saved in {loftq_dir}")
|
||||
|
||||
# Save base model
|
||||
base_model: "PreTrainedModel" = peft_model.unload()
|
||||
base_model: PreTrainedModel = peft_model.unload()
|
||||
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
|
||||
tokenizer.save_pretrained(output_dir)
|
||||
print(f"Model weights saved in {output_dir}")
|
||||
|
||||
@@ -37,8 +37,8 @@ def quantize_pissa(
|
||||
lora_target: tuple = ("q_proj", "v_proj"),
|
||||
save_safetensors: bool = True,
|
||||
):
|
||||
r"""
|
||||
Initializes LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA)
|
||||
r"""Initialize LoRA weights with Principal Singular values and Singular vectors Adaptation (PiSSA).
|
||||
|
||||
Usage: python pissa_init.py --model_name_or_path path_to_model --output_dir output_dir
|
||||
"""
|
||||
if isinstance(lora_target, str):
|
||||
@@ -67,7 +67,7 @@ def quantize_pissa(
|
||||
print(f"Adapter weights saved in {pissa_dir}")
|
||||
|
||||
# Save base model
|
||||
base_model: "PreTrainedModel" = peft_model.unload()
|
||||
base_model: PreTrainedModel = peft_model.unload()
|
||||
base_model.save_pretrained(output_dir, safe_serialization=save_safetensors)
|
||||
tokenizer.save_pretrained(output_dir)
|
||||
print(f"Model weights saved in {output_dir}")
|
||||
|
||||
@@ -29,8 +29,8 @@ def calculate_flops(
|
||||
seq_length: int = 512,
|
||||
flash_attn: str = "auto",
|
||||
):
|
||||
r"""
|
||||
Calculates the flops of pre-trained models.
|
||||
r"""Calculate the flops of pre-trained models.
|
||||
|
||||
Usage: python cal_flops.py --model_name_or_path path_to_model --batch_size 1 --seq_length 512
|
||||
"""
|
||||
with get_accelerator().device(0):
|
||||
|
||||
@@ -45,8 +45,8 @@ def calculate_lr(
|
||||
is_mistral_or_gemma: bool = False, # mistral and gemma models opt for a smaller learning rate,
|
||||
packing: bool = False,
|
||||
):
|
||||
r"""
|
||||
Calculates the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
|
||||
r"""Calculate the optimal learning rate for 7B/13B models using LLaMA's hyper-parameters.
|
||||
|
||||
Usage:
|
||||
python cal_lr.py --model_name_or_path path_to_model --dataset alpaca_en_demo --cutoff_len 1024 --batch_size 16
|
||||
"""
|
||||
@@ -89,9 +89,8 @@ def calculate_lr(
|
||||
lr = BASE_LR * math.sqrt(token_batch_size / BASE_BS) # lr ~ sqrt(batch_size)
|
||||
lr = lr / 6.0 if is_mistral_or_gemma else lr
|
||||
print(
|
||||
"Optimal learning rate is {:.2e} for valid ratio% {:.2f} and effective token batch size {:.2f}".format(
|
||||
lr, valid_ratio * 100, token_batch_size
|
||||
)
|
||||
f"Optimal learning rate is {lr:.2e} for valid ratio% {valid_ratio * 100:.2f} "
|
||||
f"and effective token batch size {token_batch_size:.2f}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -34,9 +34,7 @@ def compute_model_flops(
|
||||
include_recompute: bool = False,
|
||||
include_flashattn: bool = False,
|
||||
) -> int:
|
||||
r"""
|
||||
Calculates the FLOPs of model per forward/backward pass.
|
||||
"""
|
||||
r"""Calculate the FLOPs of model per forward/backward pass."""
|
||||
config = AutoConfig.from_pretrained(model_name_or_path)
|
||||
hidden_size = getattr(config, "hidden_size", None)
|
||||
vocab_size = getattr(config, "vocab_size", None)
|
||||
@@ -86,9 +84,7 @@ def compute_model_flops(
|
||||
|
||||
|
||||
def compute_device_flops(world_size: int) -> float:
|
||||
r"""
|
||||
Calculates the FLOPs of the device capability per second.
|
||||
"""
|
||||
r"""Calculate the FLOPs of the device capability per second."""
|
||||
device_name = torch.cuda.get_device_name()
|
||||
if "H100" in device_name or "H800" in device_name:
|
||||
return 989 * 1e12 * world_size
|
||||
@@ -114,8 +110,8 @@ def calculate_mfu(
|
||||
liger_kernel: bool = False,
|
||||
unsloth_gc: bool = False,
|
||||
) -> float:
|
||||
r"""
|
||||
Calculates MFU for given model and hyper-params.
|
||||
r"""Calculate MFU for given model and hyper-params.
|
||||
|
||||
Usage: python cal_mfu.py --model_name_or_path path_to_model --batch_size 1 --seq_length 1024
|
||||
"""
|
||||
args = {
|
||||
|
||||
@@ -13,8 +13,9 @@
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
from collections.abc import Sequence
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Literal, Optional, Sequence
|
||||
from typing import Any, Literal, Optional
|
||||
|
||||
import fire
|
||||
import torch
|
||||
@@ -30,16 +31,12 @@ from llamafactory.model import load_model, load_tokenizer
|
||||
|
||||
@dataclass
|
||||
class PairwiseDataCollatorWithPadding(MultiModalDataCollatorForSeq2Seq):
|
||||
r"""
|
||||
Data collator for pairwise data.
|
||||
"""
|
||||
r"""Data collator for pairwise data."""
|
||||
|
||||
train_on_prompt: bool = False
|
||||
|
||||
def __call__(self, features: Sequence[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
|
||||
r"""
|
||||
Pads batched data to the longest sequence in the batch.
|
||||
"""
|
||||
def __call__(self, features: Sequence[dict[str, Any]]) -> dict[str, torch.Tensor]:
|
||||
r"""Pad batched data to the longest sequence in the batch."""
|
||||
chosen_features = []
|
||||
for feature in features:
|
||||
chosen_features.append(
|
||||
@@ -68,8 +65,8 @@ def calculate_ppl(
|
||||
max_samples: Optional[int] = None,
|
||||
train_on_prompt: bool = False,
|
||||
):
|
||||
r"""
|
||||
Calculates the ppl on the dataset of the pre-trained models.
|
||||
r"""Calculate the ppl on the dataset of the pre-trained models.
|
||||
|
||||
Usage: export CUDA_VISIBLE_DEVICES=0
|
||||
python cal_ppl.py --model_name_or_path path_to_model --dataset alpaca_en_demo --save_name ppl.json
|
||||
"""
|
||||
@@ -111,17 +108,17 @@ def calculate_ppl(
|
||||
criterion = torch.nn.CrossEntropyLoss(reduction="none")
|
||||
total_ppl = 0
|
||||
perplexities = []
|
||||
batch: Dict[str, "torch.Tensor"]
|
||||
batch: dict[str, torch.Tensor]
|
||||
with torch.no_grad():
|
||||
for batch in tqdm(dataloader, desc="Computing perplexities"):
|
||||
batch = batch.to(model.device)
|
||||
outputs = model(**batch)
|
||||
shift_logits: "torch.Tensor" = outputs["logits"][..., :-1, :]
|
||||
shift_labels: "torch.Tensor" = batch["labels"][..., 1:]
|
||||
shift_logits: torch.Tensor = outputs["logits"][..., :-1, :]
|
||||
shift_labels: torch.Tensor = batch["labels"][..., 1:]
|
||||
loss_mask = shift_labels != IGNORE_INDEX
|
||||
flatten_logits = shift_logits.contiguous().view(shift_labels.size(0) * shift_labels.size(1), -1)
|
||||
flatten_labels = shift_labels.contiguous().view(-1)
|
||||
token_logps: "torch.Tensor" = criterion(flatten_logits, flatten_labels)
|
||||
token_logps: torch.Tensor = criterion(flatten_logits, flatten_labels)
|
||||
token_logps = token_logps.contiguous().view(shift_logits.size(0), -1)
|
||||
sentence_logps = (token_logps * loss_mask).sum(-1) / loss_mask.sum(-1)
|
||||
total_ppl += sentence_logps.exp().sum().item()
|
||||
|
||||
@@ -29,8 +29,8 @@ def length_cdf(
|
||||
template: str = "default",
|
||||
interval: int = 1000,
|
||||
):
|
||||
r"""
|
||||
Calculates the distribution of the input lengths in the dataset.
|
||||
r"""Calculate the distribution of the input lengths in the dataset.
|
||||
|
||||
Usage: export CUDA_VISIBLE_DEVICES=0
|
||||
python length_cdf.py --model_name_or_path path_to_model --dataset alpaca_en_demo --template default
|
||||
"""
|
||||
|
||||
@@ -52,8 +52,8 @@ def vllm_infer(
|
||||
image_max_pixels: int = 768 * 768,
|
||||
image_min_pixels: int = 32 * 32,
|
||||
):
|
||||
r"""
|
||||
Performs batch generation using vLLM engine, which supports tensor parallelism.
|
||||
r"""Perform batch generation using vLLM engine, which supports tensor parallelism.
|
||||
|
||||
Usage: python vllm_infer.py --model_name_or_path meta-llama/Llama-2-7b-hf --template llama --dataset alpaca_en_demo
|
||||
"""
|
||||
check_version("vllm>=0.4.3,<=0.7.3")
|
||||
|
||||
Reference in New Issue
Block a user