add nf4 qlora support on Ascend NPU (#6601)

* add nf4 qlora support on Ascend NPU

* add transformers version check

* add python>=3.10 requirement description for npu

* tiny fix

---------

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
Former-commit-id: 7912d1acac5f10dab22145fe729a90c57aad8d85
This commit is contained in:
codingma
2025-01-13 19:43:36 +08:00
committed by GitHub
parent 73c1c15b62
commit 11c38b9173
4 changed files with 104 additions and 10 deletions

View File

@@ -23,14 +23,7 @@ from typing import TYPE_CHECKING, Optional, Tuple
import torch
import torch.nn as nn
import transformers
from transformers.models.llama.modeling_llama import (
Cache,
LlamaAttention,
LlamaFlashAttention2,
LlamaSdpaAttention,
apply_rotary_pos_emb,
repeat_kv,
)
from transformers.models.llama.modeling_llama import Cache, apply_rotary_pos_emb, repeat_kv
from ...extras import logging
from ...extras.constants import SUPPORTED_CLASS_FOR_S2ATTN
@@ -38,6 +31,10 @@ from ...extras.misc import check_version
from ...extras.packages import is_transformers_version_greater_than
if not is_transformers_version_greater_than("4.48.0"):
from transformers.models.llama.modeling_llama import LlamaAttention, LlamaFlashAttention2, LlamaSdpaAttention
if TYPE_CHECKING:
from transformers import PretrainedConfig