fix PPO trainer #551 , update readme

Former-commit-id: faead74849470cebae9e37cde5fab2a71b32aa43
This commit is contained in:
hiyouga
2023-08-18 11:43:10 +08:00
parent 736ddd0319
commit be4d2822ea
6 changed files with 42 additions and 53 deletions

View File

@@ -171,11 +171,12 @@ Currently the web UI only supports training on **a single GPU**.
```bash
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--stage pt \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--do_train \
--dataset wiki_demo \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--output_dir path_to_pt_checkpoint \
--overwrite_cache \
--per_device_train_batch_size 4 \
@@ -194,11 +195,12 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
```bash
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--stage sft \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--do_train \
--dataset alpaca_gpt4_en \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--output_dir path_to_sft_checkpoint \
--overwrite_cache \
--per_device_train_batch_size 4 \
@@ -217,11 +219,12 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
```bash
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--stage rm \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--do_train \
--dataset comparison_gpt4_en \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--resume_lora_training False \
--checkpoint_dir path_to_sft_checkpoint \
--output_dir path_to_rm_checkpoint \
@@ -230,7 +233,7 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--lr_scheduler_type cosine \
--logging_steps 10 \
--save_steps 1000 \
--learning_rate 1e-5 \
--learning_rate 1e-6 \
--num_train_epochs 1.0 \
--plot_loss \
--fp16
@@ -241,11 +244,12 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
```bash
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--stage ppo \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--do_train \
--dataset alpaca_gpt4_en \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--resume_lora_training False \
--checkpoint_dir path_to_sft_checkpoint \
--reward_model path_to_rm_checkpoint \
@@ -266,11 +270,12 @@ CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
```bash
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--stage dpo \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--do_train \
--dataset comparison_gpt4_en \
--template default \
--finetuning_type lora \
--lora_target q_proj,v_proj \
--resume_lora_training False \
--checkpoint_dir path_to_sft_checkpoint \
--output_dir path_to_dpo_checkpoint \
@@ -364,7 +369,7 @@ deepspeed --num_gpus 8 --master_port=9901 src/train_bash.py \
```bash
python src/export_model.py \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--template default \
--finetuning_type lora \
--checkpoint_dir path_to_checkpoint \
@@ -375,7 +380,7 @@ python src/export_model.py \
```bash
python src/api_demo.py \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--template default \
--finetuning_type lora \
--checkpoint_dir path_to_checkpoint
@@ -387,7 +392,7 @@ Visit `http://localhost:8000/docs` for API documentation.
```bash
python src/cli_demo.py \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--template default \
--finetuning_type lora \
--checkpoint_dir path_to_checkpoint
@@ -397,7 +402,7 @@ python src/cli_demo.py \
```bash
python src/web_demo.py \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--template default \
--finetuning_type lora \
--checkpoint_dir path_to_checkpoint
@@ -408,7 +413,7 @@ python src/web_demo.py \
```bash
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--stage sft \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--do_eval \
--dataset alpaca_gpt4_en \
--template default \
@@ -427,7 +432,7 @@ We recommend using `--per_device_eval_batch_size=1` and `--max_target_length 128
```bash
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \
--stage sft \
--model_name_or_path path_to_your_model \
--model_name_or_path path_to_llama_model \
--do_predict \
--dataset alpaca_gpt4_en \
--template default \