[model] add mistral small models (#6786)
Former-commit-id: e5e95c39bc4199fa89c67e34f9adaaa987058744
This commit is contained in:
@@ -118,6 +118,6 @@ def configure_packing(model_args: "ModelArguments", is_trainable: bool) -> None:
|
||||
if not is_trainable or not model_args.block_diag_attn:
|
||||
return
|
||||
|
||||
check_version("transformers>=4.43.0,<=4.48.1")
|
||||
check_version("transformers>=4.43.0,<=4.48.2")
|
||||
transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
|
||||
logger.info_rank0("Using block diagonal attention for sequence packing without cross-attention.")
|
||||
|
||||
Reference in New Issue
Block a user