[breaking] support transformers 4.48 (#6628)

Former-commit-id: f154ab175c513a4d7bb866bf2cffc34b77b50508
This commit is contained in:
hoshi-hiyouga
2025-01-31 01:36:33 +08:00
committed by GitHub
parent e71737351f
commit 222423bcef
17 changed files with 53 additions and 105 deletions

View File

@@ -350,7 +350,7 @@ def llama_sdpa_attention_forward(
def _apply_llama_patch() -> None:
check_version("transformers>=4.41.2,<=4.46.1")
check_version("transformers>=4.41.2,<4.48.0")
LlamaAttention.forward = llama_attention_forward
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
LlamaSdpaAttention.forward = llama_sdpa_attention_forward

View File

@@ -118,6 +118,6 @@ def configure_packing(model_args: "ModelArguments", is_trainable: bool) -> None:
if not is_trainable or not model_args.block_diag_attn:
return
check_version("transformers>=4.43.0,<=4.46.1")
check_version("transformers>=4.43.0,<=4.48.1")
transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
logger.info_rank0("Using block diagonal attention for sequence packing without cross-attention.")