fix dpo trainer

Former-commit-id: c160dd7cd86e296e32775ace2e4258a473449c41
This commit is contained in:
hiyouga
2023-12-23 01:51:55 +08:00
parent 0fdd6074c3
commit d358d955e5
2 changed files with 5 additions and 2 deletions

View File

@@ -130,6 +130,9 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
if finetuning_args.stage == "ppo" and model_args.shift_attn:
raise ValueError("PPO training is incompatible with S^2-Attn.")
if finetuning_args.stage == "ppo" and finetuning_args.reward_model_type == "lora" and model_args.use_unsloth:
raise ValueError("Unsloth does not support lora reward model.")
if training_args.max_steps == -1 and data_args.streaming:
raise ValueError("Please specify `max_steps` in streaming mode.")