[breaking] bump transformers to 4.45.0 & improve ci (#7746)
* update ci * fix * fix * fix * fix * fix
This commit is contained in:
@@ -19,7 +19,6 @@ import torch
|
||||
from transformers import (
|
||||
AutoConfig,
|
||||
AutoModelForCausalLM,
|
||||
AutoModelForImageTextToText,
|
||||
AutoModelForSeq2SeqLM,
|
||||
AutoModelForTextToWaveform,
|
||||
AutoModelForVision2Seq,
|
||||
@@ -30,6 +29,7 @@ from trl import AutoModelForCausalLMWithValueHead
|
||||
|
||||
from ..extras import logging
|
||||
from ..extras.misc import count_parameters, skip_check_imports, try_download_model_from_other_hub
|
||||
from ..extras.packages import is_transformers_version_greater_than
|
||||
from .adapter import init_adapter
|
||||
from .model_utils.liger_kernel import apply_liger_kernel
|
||||
from .model_utils.misc import register_autoclass
|
||||
@@ -39,6 +39,10 @@ from .model_utils.valuehead import load_valuehead_params
|
||||
from .patcher import patch_config, patch_model, patch_processor, patch_tokenizer, patch_valuehead_model
|
||||
|
||||
|
||||
if is_transformers_version_greater_than("4.46.0"):
|
||||
from transformers import AutoModelForImageTextToText
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PretrainedConfig, PreTrainedModel, PreTrainedTokenizer, ProcessorMixin
|
||||
|
||||
@@ -145,7 +149,10 @@ def load_model(
|
||||
else:
|
||||
if type(config) in AutoModelForVision2Seq._model_mapping.keys(): # image-text
|
||||
load_class = AutoModelForVision2Seq
|
||||
elif type(config) in AutoModelForImageTextToText._model_mapping.keys(): # image-text
|
||||
elif (
|
||||
is_transformers_version_greater_than("4.46.0")
|
||||
and type(config) in AutoModelForImageTextToText._model_mapping.keys()
|
||||
): # image-text
|
||||
load_class = AutoModelForImageTextToText
|
||||
elif type(config) in AutoModelForSeq2SeqLM._model_mapping.keys(): # audio-text
|
||||
load_class = AutoModelForSeq2SeqLM
|
||||
|
||||
@@ -18,7 +18,6 @@ from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_availabl
|
||||
|
||||
from ...extras import logging
|
||||
from ...extras.constants import AttentionFunction
|
||||
from ...extras.misc import check_version
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -36,8 +35,6 @@ def configure_attn_implementation(
|
||||
if getattr(config, "model_type", None) == "gemma2" and is_trainable:
|
||||
if model_args.flash_attn == AttentionFunction.AUTO or model_args.flash_attn == AttentionFunction.FA2:
|
||||
if is_flash_attn_2_available():
|
||||
check_version("transformers>=4.42.4")
|
||||
check_version("flash_attn>=2.6.3")
|
||||
if model_args.flash_attn != AttentionFunction.FA2:
|
||||
logger.warning_rank0("Gemma 2 should use flash attention 2, change `flash_attn` to fa2.")
|
||||
model_args.flash_attn = AttentionFunction.FA2
|
||||
|
||||
@@ -350,7 +350,7 @@ def llama_sdpa_attention_forward(
|
||||
|
||||
|
||||
def _apply_llama_patch() -> None:
|
||||
check_version("transformers>=4.41.2,<4.48.0")
|
||||
check_version("transformers>=4.43.0,<4.48.0", mandatory=True)
|
||||
LlamaAttention.forward = llama_attention_forward
|
||||
LlamaFlashAttention2.forward = llama_flash_attention_2_forward
|
||||
LlamaSdpaAttention.forward = llama_sdpa_attention_forward
|
||||
|
||||
@@ -43,7 +43,6 @@ import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
from ...extras import logging
|
||||
from ...extras.misc import check_version
|
||||
from ...extras.packages import is_transformers_version_greater_than
|
||||
|
||||
|
||||
@@ -117,6 +116,5 @@ def configure_packing(model_args: "ModelArguments", is_trainable: bool) -> None:
|
||||
if not is_trainable or not model_args.block_diag_attn:
|
||||
return
|
||||
|
||||
check_version("transformers>=4.43.0")
|
||||
transformers.modeling_flash_attention_utils._get_unpad_data = get_unpad_data
|
||||
logger.info_rank0("Using block diagonal attention for sequence packing without cross-attention.")
|
||||
|
||||
Reference in New Issue
Block a user