support vllm
Former-commit-id: 889f6e910e654d8ec3922c2185042d737ffbf1c3
This commit is contained in:
@@ -16,35 +16,35 @@ class DataArguments:
|
||||
default=None,
|
||||
metadata={"help": "The name of provided dataset(s) to use. Use commas to separate multiple datasets."},
|
||||
)
|
||||
dataset_dir: Optional[str] = field(
|
||||
dataset_dir: str = field(
|
||||
default="data",
|
||||
metadata={"help": "Path to the folder containing the datasets."},
|
||||
)
|
||||
split: Optional[str] = field(
|
||||
split: str = field(
|
||||
default="train",
|
||||
metadata={"help": "Which dataset split to use for training and evaluation."},
|
||||
)
|
||||
cutoff_len: Optional[int] = field(
|
||||
cutoff_len: int = field(
|
||||
default=1024,
|
||||
metadata={"help": "The cutoff length of the model inputs after tokenization."},
|
||||
)
|
||||
reserved_label_len: Optional[int] = field(
|
||||
reserved_label_len: int = field(
|
||||
default=1,
|
||||
metadata={"help": "The minimum cutoff length reserved for label after tokenization."},
|
||||
)
|
||||
train_on_prompt: Optional[bool] = field(
|
||||
train_on_prompt: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether to disable the mask on the prompt or not."},
|
||||
)
|
||||
streaming: Optional[bool] = field(
|
||||
streaming: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Enable dataset streaming."},
|
||||
)
|
||||
buffer_size: Optional[int] = field(
|
||||
buffer_size: int = field(
|
||||
default=16384,
|
||||
metadata={"help": "Size of the buffer to randomly sample examples from in dataset streaming."},
|
||||
)
|
||||
mix_strategy: Optional[Literal["concat", "interleave_under", "interleave_over"]] = field(
|
||||
mix_strategy: Literal["concat", "interleave_under", "interleave_over"] = field(
|
||||
default="concat",
|
||||
metadata={"help": "Strategy to use in dataset mixing (concat/interleave) (undersampling/oversampling)."},
|
||||
)
|
||||
@@ -52,13 +52,13 @@ class DataArguments:
|
||||
default=None,
|
||||
metadata={"help": "Probabilities to sample data from datasets. Use commas to separate multiple datasets."},
|
||||
)
|
||||
overwrite_cache: Optional[bool] = field(
|
||||
overwrite_cache: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Overwrite the cached training and evaluation sets."},
|
||||
)
|
||||
preprocessing_num_workers: Optional[int] = field(
|
||||
default=None,
|
||||
metadata={"help": "The number of processes to use for the preprocessing."},
|
||||
metadata={"help": "The number of processes to use for the pre-processing."},
|
||||
)
|
||||
max_samples: Optional[int] = field(
|
||||
default=None,
|
||||
@@ -68,23 +68,23 @@ class DataArguments:
|
||||
default=None,
|
||||
metadata={"help": "Number of beams to use for evaluation. This argument will be passed to `model.generate`"},
|
||||
)
|
||||
ignore_pad_token_for_loss: Optional[bool] = field(
|
||||
ignore_pad_token_for_loss: bool = field(
|
||||
default=True,
|
||||
metadata={
|
||||
"help": "Whether or not to ignore the tokens corresponding to padded labels in the loss computation."
|
||||
},
|
||||
)
|
||||
val_size: Optional[float] = field(
|
||||
default=0,
|
||||
val_size: float = field(
|
||||
default=0.0,
|
||||
metadata={"help": "Size of the development set, should be an integer or a float in range `[0,1)`."},
|
||||
)
|
||||
sft_packing: Optional[bool] = field(
|
||||
sft_packing: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Packing the questions and answers in the supervised fine-tuning stage."},
|
||||
)
|
||||
cache_path: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Path to save or load the preprocessed datasets."},
|
||||
metadata={"help": "Path to save or load the pre-processed datasets."},
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
|
||||
@@ -14,23 +14,23 @@ class EvaluationArguments:
|
||||
task: str = field(
|
||||
metadata={"help": "Name of the evaluation task."},
|
||||
)
|
||||
task_dir: Optional[str] = field(
|
||||
task_dir: str = field(
|
||||
default="evaluation",
|
||||
metadata={"help": "Path to the folder containing the evaluation datasets."},
|
||||
)
|
||||
batch_size: Optional[int] = field(
|
||||
batch_size: int = field(
|
||||
default=4,
|
||||
metadata={"help": "The batch size per GPU for evaluation."},
|
||||
)
|
||||
seed: Optional[int] = field(
|
||||
seed: int = field(
|
||||
default=42,
|
||||
metadata={"help": "Random seed to be used with data loaders."},
|
||||
)
|
||||
lang: Optional[Literal["en", "zh"]] = field(
|
||||
lang: Literal["en", "zh"] = field(
|
||||
default="en",
|
||||
metadata={"help": "Language used at evaluation."},
|
||||
)
|
||||
n_shot: Optional[int] = field(
|
||||
n_shot: int = field(
|
||||
default=5,
|
||||
metadata={"help": "Number of examplars for few-shot learning."},
|
||||
)
|
||||
@@ -38,7 +38,7 @@ class EvaluationArguments:
|
||||
default=None,
|
||||
metadata={"help": "Path to save the evaluation results."},
|
||||
)
|
||||
download_mode: Optional[DownloadMode] = field(
|
||||
download_mode: DownloadMode = field(
|
||||
default=DownloadMode.REUSE_DATASET_IF_EXISTS,
|
||||
metadata={"help": "Download mode used for the evaluation datasets."},
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ class FreezeArguments:
|
||||
Others choices: the same as LLaMA."""
|
||||
},
|
||||
)
|
||||
num_layer_trainable: Optional[int] = field(
|
||||
num_layer_trainable: int = field(
|
||||
default=3,
|
||||
metadata={"help": "The number of trainable layers for partial-parameter (freeze) fine-tuning."},
|
||||
)
|
||||
@@ -44,11 +44,11 @@ class LoraArguments:
|
||||
default=None,
|
||||
metadata={"help": "The scale factor for LoRA fine-tuning (default: lora_rank * 2)."},
|
||||
)
|
||||
lora_dropout: Optional[float] = field(
|
||||
lora_dropout: float = field(
|
||||
default=0.0,
|
||||
metadata={"help": "Dropout rate for the LoRA fine-tuning."},
|
||||
)
|
||||
lora_rank: Optional[int] = field(
|
||||
lora_rank: int = field(
|
||||
default=8,
|
||||
metadata={"help": "The intrinsic dimension for LoRA fine-tuning."},
|
||||
)
|
||||
@@ -66,18 +66,19 @@ class LoraArguments:
|
||||
Others choices: the same as LLaMA."""
|
||||
},
|
||||
)
|
||||
lora_bf16_mode: Optional[bool] = field(
|
||||
lora_bf16_mode: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether or not to train lora adapters in bf16 precision."},
|
||||
)
|
||||
use_rslora: Optional[bool] = field(
|
||||
use_rslora: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether or not to use the rank stabilization scaling factor for LoRA layer."},
|
||||
)
|
||||
use_dora: Optional[bool] = field(
|
||||
default=False, metadata={"help": "Whether or not to use the weight-decomposed lora method (DoRA)."}
|
||||
use_dora: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether or not to use the weight-decomposed lora method (DoRA)."},
|
||||
)
|
||||
create_new_adapter: Optional[bool] = field(
|
||||
create_new_adapter: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether or not to create a new adapter with randomly initialized weight."},
|
||||
)
|
||||
@@ -89,23 +90,23 @@ class RLHFArguments:
|
||||
Arguments pertaining to the PPO and DPO training.
|
||||
"""
|
||||
|
||||
dpo_beta: Optional[float] = field(
|
||||
dpo_beta: float = field(
|
||||
default=0.1,
|
||||
metadata={"help": "The beta parameter for the DPO loss."},
|
||||
)
|
||||
dpo_loss: Optional[Literal["sigmoid", "hinge", "ipo", "kto_pair"]] = field(
|
||||
dpo_loss: Literal["sigmoid", "hinge", "ipo", "kto_pair"] = field(
|
||||
default="sigmoid",
|
||||
metadata={"help": "The type of DPO loss to use."},
|
||||
)
|
||||
dpo_ftx: Optional[float] = field(
|
||||
default=0,
|
||||
dpo_ftx: float = field(
|
||||
default=0.0,
|
||||
metadata={"help": "The supervised fine-tuning loss coefficient in DPO training."},
|
||||
)
|
||||
ppo_buffer_size: Optional[int] = field(
|
||||
ppo_buffer_size: int = field(
|
||||
default=1,
|
||||
metadata={"help": "The number of mini-batches to make experience buffer in a PPO optimization step."},
|
||||
)
|
||||
ppo_epochs: Optional[int] = field(
|
||||
ppo_epochs: int = field(
|
||||
default=4,
|
||||
metadata={"help": "The number of epochs to perform in a PPO optimization step."},
|
||||
)
|
||||
@@ -113,15 +114,15 @@ class RLHFArguments:
|
||||
default=None,
|
||||
metadata={"help": 'Log with either "wandb" or "tensorboard" in PPO training.'},
|
||||
)
|
||||
ppo_score_norm: Optional[bool] = field(
|
||||
ppo_score_norm: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Use score normalization in PPO training."},
|
||||
)
|
||||
ppo_target: Optional[float] = field(
|
||||
ppo_target: float = field(
|
||||
default=6.0,
|
||||
metadata={"help": "Target KL value for adaptive KL control in PPO training."},
|
||||
)
|
||||
ppo_whiten_rewards: Optional[bool] = field(
|
||||
ppo_whiten_rewards: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whiten the rewards before compute advantages in PPO training."},
|
||||
)
|
||||
@@ -149,7 +150,7 @@ class RLHFArguments:
|
||||
default=None,
|
||||
metadata={"help": "The number of bits to quantize the reward model."},
|
||||
)
|
||||
reward_model_type: Optional[Literal["lora", "full", "api"]] = field(
|
||||
reward_model_type: Literal["lora", "full", "api"] = field(
|
||||
default="lora",
|
||||
metadata={"help": "The type of the reward model in PPO training. Lora model only supports lora training."},
|
||||
)
|
||||
@@ -161,19 +162,19 @@ class FinetuningArguments(FreezeArguments, LoraArguments, RLHFArguments):
|
||||
Arguments pertaining to which techniques we are going to fine-tuning with.
|
||||
"""
|
||||
|
||||
stage: Optional[Literal["pt", "sft", "rm", "ppo", "dpo"]] = field(
|
||||
stage: Literal["pt", "sft", "rm", "ppo", "dpo"] = field(
|
||||
default="sft",
|
||||
metadata={"help": "Which stage will be performed in training."},
|
||||
)
|
||||
finetuning_type: Optional[Literal["lora", "freeze", "full"]] = field(
|
||||
finetuning_type: Literal["lora", "freeze", "full"] = field(
|
||||
default="lora",
|
||||
metadata={"help": "Which fine-tuning method to use."},
|
||||
)
|
||||
use_llama_pro: Optional[bool] = field(
|
||||
use_llama_pro: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether or not to make only the parameters in the expanded blocks trainable."},
|
||||
)
|
||||
plot_loss: Optional[bool] = field(
|
||||
plot_loss: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "Whether or not to save the training loss curves."},
|
||||
)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -8,41 +8,41 @@ class GeneratingArguments:
|
||||
Arguments pertaining to specify the decoding parameters.
|
||||
"""
|
||||
|
||||
do_sample: Optional[bool] = field(
|
||||
do_sample: bool = field(
|
||||
default=True,
|
||||
metadata={"help": "Whether or not to use sampling, use greedy decoding otherwise."},
|
||||
)
|
||||
temperature: Optional[float] = field(
|
||||
temperature: float = field(
|
||||
default=0.95,
|
||||
metadata={"help": "The value used to modulate the next token probabilities."},
|
||||
)
|
||||
top_p: Optional[float] = field(
|
||||
top_p: float = field(
|
||||
default=0.7,
|
||||
metadata={
|
||||
"help": "The smallest set of most probable tokens with probabilities that add up to top_p or higher are kept."
|
||||
},
|
||||
)
|
||||
top_k: Optional[int] = field(
|
||||
top_k: int = field(
|
||||
default=50,
|
||||
metadata={"help": "The number of highest probability vocabulary tokens to keep for top-k filtering."},
|
||||
)
|
||||
num_beams: Optional[int] = field(
|
||||
num_beams: int = field(
|
||||
default=1,
|
||||
metadata={"help": "Number of beams for beam search. 1 means no beam search."},
|
||||
)
|
||||
max_length: Optional[int] = field(
|
||||
max_length: int = field(
|
||||
default=512,
|
||||
metadata={"help": "The maximum length the generated tokens can have. It can be overridden by max_new_tokens."},
|
||||
)
|
||||
max_new_tokens: Optional[int] = field(
|
||||
max_new_tokens: int = field(
|
||||
default=512,
|
||||
metadata={"help": "The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt."},
|
||||
)
|
||||
repetition_penalty: Optional[float] = field(
|
||||
repetition_penalty: float = field(
|
||||
default=1.0,
|
||||
metadata={"help": "The parameter for repetition penalty. 1.0 means no penalty."},
|
||||
)
|
||||
length_penalty: Optional[float] = field(
|
||||
length_penalty: float = field(
|
||||
default=1.0,
|
||||
metadata={"help": "Exponential penalty to the length that is used with beam-based generation."},
|
||||
)
|
||||
|
||||
@@ -85,6 +85,10 @@ class ModelArguments:
|
||||
default="hf",
|
||||
metadata={"help": "Backend engine used at inference."},
|
||||
)
|
||||
vllm_maxlen: int = field(
|
||||
default=2048,
|
||||
metadata={"help": "Maximum input length of the vLLM engine."},
|
||||
)
|
||||
hf_hub_token: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Auth token to log in with Hugging Face Hub."},
|
||||
|
||||
@@ -9,8 +9,8 @@ from transformers import HfArgumentParser, Seq2SeqTrainingArguments
|
||||
from transformers.trainer_utils import get_last_checkpoint
|
||||
|
||||
from ..extras.logging import get_logger
|
||||
from ..extras.packages import is_unsloth_available
|
||||
from ..extras.misc import check_dependencies
|
||||
from ..extras.packages import is_unsloth_available
|
||||
from .data_args import DataArguments
|
||||
from .evaluation_args import EvaluationArguments
|
||||
from .finetuning_args import FinetuningArguments
|
||||
@@ -59,6 +59,9 @@ def _set_transformers_logging(log_level: Optional[int] = logging.INFO) -> None:
|
||||
|
||||
|
||||
def _verify_model_args(model_args: "ModelArguments", finetuning_args: "FinetuningArguments") -> None:
|
||||
if model_args.adapter_name_or_path is not None and finetuning_args.finetuning_type != "lora":
|
||||
raise ValueError("Adapter is only valid for the LoRA method.")
|
||||
|
||||
if model_args.quantization_bit is not None:
|
||||
if finetuning_args.finetuning_type != "lora":
|
||||
raise ValueError("Quantization is only compatible with the LoRA method.")
|
||||
@@ -69,8 +72,18 @@ def _verify_model_args(model_args: "ModelArguments", finetuning_args: "Finetunin
|
||||
if model_args.adapter_name_or_path is not None and len(model_args.adapter_name_or_path) != 1:
|
||||
raise ValueError("Quantized model only accepts a single adapter. Merge them first.")
|
||||
|
||||
if model_args.adapter_name_or_path is not None and finetuning_args.finetuning_type != "lora":
|
||||
raise ValueError("Adapter is only valid for the LoRA method.")
|
||||
if model_args.infer_backend == "vllm":
|
||||
if finetuning_args.stage != "sft":
|
||||
raise ValueError("vLLM engine only supports auto-regressive models.")
|
||||
|
||||
if model_args.adapter_name_or_path is not None:
|
||||
raise ValueError("vLLM engine does not support LoRA adapters. Merge them first.")
|
||||
|
||||
if model_args.quantization_bit is not None:
|
||||
raise ValueError("vLLM engine does not support quantization.")
|
||||
|
||||
if model_args.rope_scaling is not None:
|
||||
raise ValueError("vLLM engine does not support RoPE scaling.")
|
||||
|
||||
|
||||
def _parse_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
|
||||
|
||||
Reference in New Issue
Block a user