update examples

Former-commit-id: 19681f93db399d695aa8e35f8ec2a9e720875baa
2024-06-13 03:15:06 +08:00
parent 49b58fd6af
commit 46f441dd37
27 changed files with 128 additions and 199 deletions
--- a/examples/train_lora/llama3_lora_ppo.yaml
+++ b/examples/train_lora/llama3_lora_ppo.yaml
@@ -0,0 +1,39 @@
+### model
+model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+reward_model: saves/llama3-8b/lora/reward
+
+### method
+stage: ppo
+do_train: true
+finetuning_type: lora
+lora_target: all
+
+### dataset
+dataset: identity,alpaca_en_demo
+template: llama3
+cutoff_len: 1024
+max_samples: 1000
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+### output
+output_dir: saves/llama3-8b/lora/ppo
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-5
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+fp16: true
+ddp_timeout: 180000000
+
+### generate
+max_new_tokens: 512
+top_k: 0
+top_p: 0.9