Merge pull request #1553 from hannlp/hans

Change the default argument settings for PPO training Former-commit-id: 1b64678fa4979485f67c3bb1420dfdff6fcbc6e7
2023-11-20 20:32:55 +08:00
parent bcd661afa6 992be39f90
commit e585950c54
4 changed files with 9 additions and 1 deletions
--- a/src/llmtuner/train/ppo/workflow.py
+++ b/src/llmtuner/train/ppo/workflow.py
@@ -45,7 +45,7 @@ def run_ppo(
        mini_batch_size=training_args.per_device_train_batch_size,
        batch_size=training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps,
        gradient_accumulation_steps=training_args.gradient_accumulation_steps,
-        ppo_epochs=1,
+        ppo_epochs=finetuning_args.ppo_epochs,
        max_grad_norm=training_args.max_grad_norm,
        seed=training_args.seed,
        optimize_device_cache=True,