add adam_mini to readme

Former-commit-id: d610c6bcf8a8ba6f4236f5d11f79571b83f4fb11
2024-08-09 20:02:03 +08:00
parent 7e755e9cac
commit 59cbce1a46
12 changed files with 94 additions and 34 deletions
--- a/examples/extras/adam_mini/llama3_full_sft.yaml
+++ b/examples/extras/adam_mini/llama3_full_sft.yaml
@@ -0,0 +1,39 @@
+### model
+model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+
+### method
+stage: sft
+do_train: true
+finetuning_type: full
+use_adam_mini: true
+
+### dataset
+dataset: identity,alpaca_en_demo
+template: llama3
+cutoff_len: 1024
+max_samples: 1000
+overwrite_cache: true
+preprocessing_num_workers: 16
+
+### output
+output_dir: saves/llama3-8b/full/sft
+logging_steps: 10
+save_steps: 500
+plot_loss: true
+overwrite_output_dir: true
+
+### train
+per_device_train_batch_size: 1
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-5
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 500
--- a/examples/extras/galore/llama3_full_sft.yaml
+++ b/examples/extras/galore/llama3_full_sft.yaml
@@ -34,6 +34,7 @@ num_train_epochs: 3.0
 lr_scheduler_type: cosine
 warmup_ratio: 0.1
 pure_bf16: true
+ddp_timeout: 180000000

 ### eval
 val_size: 0.1
--- a/examples/extras/llama_pro/expand.sh
+++ b/examples/extras/llama_pro/expand.sh
@@ -2,5 +2,5 @@

 python scripts/llama_pro.py \
    --model_name_or_path meta-llama/Meta-Llama-3-8B-Instruct \
-    --output_dir models/llama3-8b-instruct-pro \
+    --output_dir models/llama3-8b-pro \
    --num_expand 8
--- a/examples/extras/llama_pro/llama3_freeze_sft.yaml
+++ b/examples/extras/llama_pro/llama3_freeze_sft.yaml
@@ -1,5 +1,5 @@
 ### model
-model_name_or_path: models/llama3-8b-instruct-pro
+model_name_or_path: models/llama3-8b-pro

 ### method
 stage: sft
@@ -18,7 +18,7 @@ overwrite_cache: true
 preprocessing_num_workers: 16

 ### output
-output_dir: saves/llama3-8b-instruct-pro/freeze/sft
+output_dir: saves/llama3-8b-pro/freeze/sft
 logging_steps: 10
 save_steps: 500
 plot_loss: true