[model] add mistral small models (#6786)

Former-commit-id: e5e95c39bc4199fa89c67e34f9adaaa987058744
This commit is contained in:
hoshi-hiyouga
2025-02-01 04:31:38 +08:00
committed by GitHub
parent 800de98dc8
commit a28261a866
10 changed files with 106 additions and 32 deletions

View File

@@ -118,6 +118,6 @@ def configure_packing(model_args: "ModelArguments", is_trainable: bool) -> None:
if not is_trainable or not model_args.block_diag_attn:
return
check_version("transformers>=4.43.0,<=4.48.1")
check_version("transformers>=4.43.0,<=4.48.2")
transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
logger.info_rank0("Using block diagonal attention for sequence packing without cross-attention.")