[model] add mistral small models (#6786)

Former-commit-id: e5e95c39bc4199fa89c67e34f9adaaa987058744
2025-02-01 04:31:38 +08:00
parent 800de98dc8
commit a28261a866
10 changed files with 106 additions and 32 deletions
--- a/src/llamafactory/model/model_utils/packing.py
+++ b/src/llamafactory/model/model_utils/packing.py
@@ -118,6 +118,6 @@ def configure_packing(model_args: "ModelArguments", is_trainable: bool) -> None:
    if not is_trainable or not model_args.block_diag_attn:
        return

-    check_version("transformers>=4.43.0,<=4.48.1")
+    check_version("transformers>=4.43.0,<=4.48.2")
    transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
    logger.info_rank0("Using block diagonal attention for sequence packing without cross-attention.")