add nf4 qlora support on Ascend NPU (#6601)
* add nf4 qlora support on Ascend NPU * add transformers version check * add python>=3.10 requirement description for npu * tiny fix --------- Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn> Former-commit-id: 7912d1acac5f10dab22145fe729a90c57aad8d85
This commit is contained in:
@@ -23,14 +23,7 @@ from typing import TYPE_CHECKING, Optional, Tuple
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import transformers
|
||||
from transformers.models.llama.modeling_llama import (
|
||||
Cache,
|
||||
LlamaAttention,
|
||||
LlamaFlashAttention2,
|
||||
LlamaSdpaAttention,
|
||||
apply_rotary_pos_emb,
|
||||
repeat_kv,
|
||||
)
|
||||
from transformers.models.llama.modeling_llama import Cache, apply_rotary_pos_emb, repeat_kv
|
||||
|
||||
from ...extras import logging
|
||||
from ...extras.constants import SUPPORTED_CLASS_FOR_S2ATTN
|
||||
@@ -38,6 +31,10 @@ from ...extras.misc import check_version
|
||||
from ...extras.packages import is_transformers_version_greater_than
|
||||
|
||||
|
||||
if not is_transformers_version_greater_than("4.48.0"):
|
||||
from transformers.models.llama.modeling_llama import LlamaAttention, LlamaFlashAttention2, LlamaSdpaAttention
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
|
||||
Reference in New Issue
Block a user