[breaking] support transformers 4.48 (#6628)
Former-commit-id: f154ab175c513a4d7bb866bf2cffc34b77b50508
This commit is contained in:
@@ -29,7 +29,7 @@ from trl.trainer import disable_dropout_in_model
|
||||
from typing_extensions import override
|
||||
|
||||
from ...extras.constants import IGNORE_INDEX
|
||||
from ...extras.packages import is_transformers_version_equal_to_4_46, is_transformers_version_greater_than
|
||||
from ...extras.packages import is_transformers_version_greater_than
|
||||
from ..callbacks import SaveProcessorCallback
|
||||
from ..trainer_utils import create_custom_optimizer, create_custom_scheduler, get_batch_logps, nested_detach
|
||||
|
||||
@@ -282,19 +282,12 @@ class CustomDPOTrainer(DPOTrainer):
|
||||
self, model: "PreTrainedModel", inputs: Dict[str, "torch.Tensor"], return_outputs: bool = False, **kwargs
|
||||
) -> Union["torch.Tensor", Tuple["torch.Tensor", List["torch.Tensor"]]]:
|
||||
r"""
|
||||
Fixes the loss value. See https://github.com/huggingface/transformers/pull/35438 for details.
|
||||
Subclass and override to accept extra kwargs.
|
||||
"""
|
||||
loss = super().compute_loss(model, inputs, return_outputs)
|
||||
if is_transformers_version_equal_to_4_46() and kwargs.get("num_items_in_batch"):
|
||||
if return_outputs:
|
||||
loss = (loss[0] / self.args.gradient_accumulation_steps, *loss[1:])
|
||||
else:
|
||||
loss = loss / self.args.gradient_accumulation_steps
|
||||
|
||||
return loss
|
||||
return super().compute_loss(model, inputs, return_outputs)
|
||||
|
||||
@override
|
||||
def log(self, logs: Dict[str, float]) -> None:
|
||||
def log(self, logs: Dict[str, float], *args, **kwargs) -> None:
|
||||
r"""
|
||||
Log `logs` on the various objects watching training, including stored metrics.
|
||||
"""
|
||||
@@ -318,4 +311,4 @@ class CustomDPOTrainer(DPOTrainer):
|
||||
if not key.startswith("dummy_"):
|
||||
logs[key] = metric
|
||||
|
||||
return Trainer.log(self, logs)
|
||||
return Trainer.log(self, logs, *args, **kwargs)
|
||||
|
||||
@@ -28,7 +28,7 @@ from trl.trainer import disable_dropout_in_model
|
||||
from typing_extensions import override
|
||||
|
||||
from ...extras.constants import IGNORE_INDEX
|
||||
from ...extras.packages import is_transformers_version_equal_to_4_46, is_transformers_version_greater_than
|
||||
from ...extras.packages import is_transformers_version_greater_than
|
||||
from ..callbacks import SaveProcessorCallback
|
||||
from ..trainer_utils import create_custom_optimizer, create_custom_scheduler, get_batch_logps, nested_detach
|
||||
|
||||
@@ -256,19 +256,12 @@ class CustomKTOTrainer(KTOTrainer):
|
||||
self, model: "PreTrainedModel", inputs: Dict[str, "torch.Tensor"], return_outputs: bool = False, **kwargs
|
||||
) -> Union["torch.Tensor", Tuple["torch.Tensor", List["torch.Tensor"]]]:
|
||||
r"""
|
||||
Fixes the loss value. See https://github.com/huggingface/transformers/pull/35438 for details.
|
||||
Subclass and override to accept extra kwargs.
|
||||
"""
|
||||
loss = super().compute_loss(model, inputs, return_outputs)
|
||||
if is_transformers_version_equal_to_4_46() and kwargs.get("num_items_in_batch"):
|
||||
if return_outputs:
|
||||
loss = (loss[0] / self.args.gradient_accumulation_steps, *loss[1:])
|
||||
else:
|
||||
loss = loss / self.args.gradient_accumulation_steps
|
||||
|
||||
return loss
|
||||
return super().compute_loss(model, inputs, return_outputs)
|
||||
|
||||
@override
|
||||
def log(self, logs: Dict[str, float]) -> None:
|
||||
def log(self, logs: Dict[str, float], *args, **kwargs) -> None:
|
||||
r"""
|
||||
Log `logs` on the various objects watching training, including stored metrics.
|
||||
"""
|
||||
@@ -304,4 +297,4 @@ class CustomKTOTrainer(KTOTrainer):
|
||||
if not key.startswith("dummy_"):
|
||||
logs[key] = metric
|
||||
|
||||
return Trainer.log(self, logs)
|
||||
return Trainer.log(self, logs, *args, **kwargs)
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
from types import MethodType
|
||||
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
import torch
|
||||
from transformers import Trainer
|
||||
@@ -25,7 +25,7 @@ from ..trainer_utils import create_custom_optimizer, create_custom_scheduler
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from transformers import PreTrainedModel, ProcessorMixin
|
||||
from transformers import ProcessorMixin
|
||||
|
||||
from ...hparams import FinetuningArguments
|
||||
|
||||
@@ -72,21 +72,3 @@ class CustomTrainer(Trainer):
|
||||
return torch.utils.data.SequentialSampler(self.train_dataset)
|
||||
|
||||
return super()._get_train_sampler()
|
||||
|
||||
@override
|
||||
def compute_loss(
|
||||
self, model: "PreTrainedModel", inputs: Dict[str, "torch.Tensor"], return_outputs: bool = False, **kwargs
|
||||
) -> Union["torch.Tensor", Tuple["torch.Tensor", List["torch.Tensor"]]]:
|
||||
r"""
|
||||
Fixes the loss value. See https://github.com/huggingface/transformers/pull/35438 for details.
|
||||
|
||||
It should be removed after https://github.com/huggingface/transformers/pull/35651 is merged.
|
||||
"""
|
||||
loss = super().compute_loss(model, inputs, return_outputs, **kwargs)
|
||||
if kwargs.get("num_items_in_batch") and not getattr(self, "model_accepts_loss_kwargs", False):
|
||||
if return_outputs:
|
||||
loss = (loss[0] / self.args.gradient_accumulation_steps, *loss[1:])
|
||||
else:
|
||||
loss = loss / self.args.gradient_accumulation_steps
|
||||
|
||||
return loss
|
||||
|
||||
@@ -25,7 +25,7 @@ from transformers import Trainer
|
||||
from typing_extensions import override
|
||||
|
||||
from ...extras import logging
|
||||
from ...extras.packages import is_transformers_version_equal_to_4_46, is_transformers_version_greater_than
|
||||
from ...extras.packages import is_transformers_version_greater_than
|
||||
from ..callbacks import FixValueHeadModelCallback, SaveProcessorCallback
|
||||
from ..trainer_utils import create_custom_optimizer, create_custom_scheduler
|
||||
|
||||
@@ -107,10 +107,6 @@ class PairwiseTrainer(Trainer):
|
||||
chosen_scores, rejected_scores = chosen_scores.squeeze(), rejected_scores.squeeze()
|
||||
|
||||
loss = -torch.nn.functional.logsigmoid(chosen_scores.float() - rejected_scores.float()).mean()
|
||||
|
||||
if is_transformers_version_equal_to_4_46() and kwargs.get("num_items_in_batch"):
|
||||
loss /= self.args.gradient_accumulation_steps # fixes the loss value for transformers 4.46.0-4.46.1
|
||||
|
||||
if return_outputs:
|
||||
return loss, (loss, chosen_scores, rejected_scores)
|
||||
else:
|
||||
|
||||
@@ -34,7 +34,7 @@ from ..trainer_utils import create_custom_optimizer, create_custom_scheduler
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from torch.utils.data import Dataset
|
||||
from transformers import PreTrainedModel, PreTrainedTokenizer, ProcessorMixin
|
||||
from transformers import PreTrainedTokenizer, ProcessorMixin
|
||||
from transformers.trainer import PredictionOutput
|
||||
|
||||
from ...hparams import FinetuningArguments
|
||||
@@ -88,24 +88,6 @@ class CustomSeq2SeqTrainer(Seq2SeqTrainer):
|
||||
|
||||
return super()._get_train_sampler()
|
||||
|
||||
@override
|
||||
def compute_loss(
|
||||
self, model: "PreTrainedModel", inputs: Dict[str, "torch.Tensor"], return_outputs: bool = False, **kwargs
|
||||
) -> Union["torch.Tensor", Tuple["torch.Tensor", List["torch.Tensor"]]]:
|
||||
r"""
|
||||
Fixes the loss value. See https://github.com/huggingface/transformers/pull/35438 for details.
|
||||
|
||||
It should be removed after https://github.com/huggingface/transformers/pull/35651 is merged.
|
||||
"""
|
||||
loss = super().compute_loss(model, inputs, return_outputs, **kwargs)
|
||||
if kwargs.get("num_items_in_batch") and not getattr(self, "model_accepts_loss_kwargs", False):
|
||||
if return_outputs:
|
||||
loss = (loss[0] / self.args.gradient_accumulation_steps, *loss[1:])
|
||||
else:
|
||||
loss = loss / self.args.gradient_accumulation_steps
|
||||
|
||||
return loss
|
||||
|
||||
@override
|
||||
def prediction_step(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user