Cleaner integration.

Former-commit-id: 26d4b05d424bd71f570195dd433258caf6465d92
This commit is contained in:
Jonery
2024-06-19 12:29:40 +08:00
parent c7479751e8
commit fa3150548e
8 changed files with 24 additions and 64 deletions

View File

@@ -166,15 +166,9 @@ class CustomPPOTrainer(PPOTrainer, Trainer):
self.reward_model = self.accelerator.prepare_model(self.reward_model, evaluation_mode=True)
if finetuning_args.use_badam:
from badam import clip_grad_norm_for_sparse_tensor
self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_for_sparse_tensor, self.accelerator)
if (self.args.deepspeed_plugin is not None
and self.args.deepspeed_plugin.zero_stage == 3
):
from badam.utils import BAdamZeRO3Callback
self.callback_handler.add_callback(BAdamZeRO3Callback)
from badam import clip_grad_norm_old_version, BAdamCallback
self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator)
self.callback_handler.add_callback(BAdamCallback)
def ppo_train(self, resume_from_checkpoint: Optional[str] = None) -> None:
r"""