reenable sdpa and fast tok by default

Former-commit-id: 9e00902dbedc71d55743d1bf237843506a557891
2024-04-24 02:18:44 +08:00
parent 35c4a2c212
commit d2bb1b3a6b
8 changed files with 64 additions and 27 deletions
--- a/src/llmtuner/hparams/model_args.py
+++ b/src/llmtuner/hparams/model_args.py
@@ -22,7 +22,7 @@ class ModelArguments:
        metadata={"help": "Where to store the pre-trained models downloaded from huggingface.co or modelscope.cn."},
    )
    use_fast_tokenizer: bool = field(
-        default=False,
+        default=True,
        metadata={"help": "Whether or not to use one of the fast tokenizer (backed by the tokenizers library)."},
    )
    resize_vocab: bool = field(
@@ -61,9 +61,9 @@ class ModelArguments:
        default=None,
        metadata={"help": "Which scaling strategy should be adopted for the RoPE embeddings."},
    )
-    flash_attn: bool = field(
-        default=False,
-        metadata={"help": "Enable FlashAttention for faster training."},
+    flash_attn: Literal["off", "sdpa", "fa2", "auto"] = field(
+        default="auto",
+        metadata={"help": "Enable FlashAttention for faster training and inference."},
    )
    shift_attn: bool = field(
        default=False,