Feature BAdam
Former-commit-id: d8d2807fbcf587c37f7fd34a23e9397d2775ceed
This commit is contained in:
@@ -9,7 +9,8 @@ from transformers import Seq2SeqTrainer
|
||||
from ...extras.constants import IGNORE_INDEX
|
||||
from ...extras.logging import get_logger
|
||||
from ..utils import create_custom_optimzer, create_custom_scheduler
|
||||
|
||||
from types import MethodType
|
||||
from packaging import version
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers.trainer import PredictionOutput
|
||||
@@ -28,6 +29,9 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
|
||||
def __init__(self, finetuning_args: "FinetuningArguments", **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
self.finetuning_args = finetuning_args
|
||||
if version.parse(torch.__version__) >= version.parse("1.13"):
|
||||
from badam import clip_grad_norm_for_sparse_tensor
|
||||
self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator)
|
||||
|
||||
def create_optimizer(self) -> "torch.optim.Optimizer":
|
||||
if self.optimizer is None:
|
||||
|
||||
Reference in New Issue
Block a user