fix ppo args
Former-commit-id: 0f12899951808f53a482082eb116bda309775930
This commit is contained in:
@@ -42,15 +42,14 @@ def run_ppo(
|
||||
ppo_epochs=1,
|
||||
max_grad_norm=training_args.max_grad_norm,
|
||||
seed=training_args.seed,
|
||||
log_with=training_args.report_to,
|
||||
optimize_cuda_cache=True,
|
||||
target=finetuning_args.ppo_target,
|
||||
log_with=finetuning_args.ppo_logger,
|
||||
use_score_scaling=finetuning_args.ppo_score_norm,
|
||||
use_score_norm=finetuning_args.ppo_score_norm,
|
||||
accelerator_kwargs={"step_scheduler_with_optimizer": False}
|
||||
)
|
||||
|
||||
if finetuning_args.ppo_score_norm:
|
||||
ppo_config.use_score_scaling = True
|
||||
ppo_config.use_score_norm = True
|
||||
|
||||
optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=training_args.learning_rate)
|
||||
total_train_batch_size = (
|
||||
training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps * training_args.world_size
|
||||
|
||||
Reference in New Issue
Block a user