[misc] fix packing and eval plot (#7623)

2025-04-07 18:20:57 +08:00
parent 5115dc8c7f
commit c3c0efbaa0
70 changed files with 288 additions and 194 deletions
--- a/examples/train_full/llama3_full_sft.yaml
+++ b/examples/train_full/llama3_full_sft.yaml
@@ -24,6 +24,7 @@ save_steps: 500
 plot_loss: true
 overwrite_output_dir: true
 save_only_model: false
+report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]

 ### train
 per_device_train_batch_size: 1
--- a/examples/train_full/qwen2vl_full_sft.yaml
+++ b/examples/train_full/qwen2vl_full_sft.yaml
@@ -8,10 +8,10 @@ trust_remote_code: true
 stage: sft
 do_train: true
 finetuning_type: full
-freeze_vision_tower: true  # choices: [true, false]
-freeze_multi_modal_projector: true  # choices: [true, false]
-freeze_language_model: false  # choices: [true, false]
-deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
+freeze_vision_tower: true
+freeze_multi_modal_projector: true
+freeze_language_model: false
+deepspeed: examples/deepspeed/ds_z3_config.json

 ### dataset
 dataset: mllm_demo,identity,alpaca_en_demo
@@ -29,6 +29,7 @@ save_steps: 500
 plot_loss: true
 overwrite_output_dir: true
 save_only_model: false
+report_to: none  # choices: [none, wandb, tensorboard, swanlab, mlflow]

 ### train
 per_device_train_batch_size: 1