support LongLoRA
Former-commit-id: 0832ed37e7947d699f17375648a52f80752c2b6b
This commit is contained in:
@@ -90,6 +90,10 @@ class DataArguments:
|
||||
default=0,
|
||||
metadata={"help": "Size of the development set, should be an integer or a float in range `[0,1)`."}
|
||||
)
|
||||
sft_packing: Optional[bool] = field(
|
||||
default=False,
|
||||
metadata={"help": "Packing the questions and answers in the supervised fine-tuning stage."}
|
||||
)
|
||||
|
||||
def init_for_training(self): # support mixing multiple datasets
|
||||
dataset_names = [ds.strip() for ds in self.dataset.split(",")]
|
||||
|
||||
@@ -45,7 +45,11 @@ class ModelArguments:
|
||||
)
|
||||
flash_attn: Optional[bool] = field(
|
||||
default=False,
|
||||
metadata={"help": "Enable flash attention for faster training."}
|
||||
metadata={"help": "Enable FlashAttention-2 for faster training."}
|
||||
)
|
||||
shift_attn: Optional[bool] = field(
|
||||
default=False,
|
||||
metadata={"help": "Enable shift short attention (S^2-Attn) proposed by LongLoRA."}
|
||||
)
|
||||
checkpoint_dir: Optional[str] = field(
|
||||
default=None,
|
||||
|
||||
Reference in New Issue
Block a user