implement efficient packing without cross-contamination attention

Former-commit-id: a64a5305c0da5ef092d4cc26faf829bb44de65d1
2024-06-12 11:56:01 +07:00
parent 6d9fbb3fa9
commit c7ab302c69
9 changed files with 287 additions and 8 deletions
--- a/src/llamafactory/webui/components/train.py
+++ b/src/llamafactory/webui/components/train.py
@@ -83,6 +83,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
            with gr.Column():
                resize_vocab = gr.Checkbox()
                packing = gr.Checkbox()
+                efficient_packing = gr.Checkbox()

            with gr.Column():
                upcast_layernorm = gr.Checkbox()
@@ -101,6 +102,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
            optim,
            resize_vocab,
            packing,
+            efficient_packing,
            upcast_layernorm,
            use_llama_pro,
            shift_attn,
@@ -117,6 +119,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
            optim=optim,
            resize_vocab=resize_vocab,
            packing=packing,
+            efficient_packing=efficient_packing,
            upcast_layernorm=upcast_layernorm,
            use_llama_pro=use_llama_pro,
            shift_attn=shift_attn,
@@ -313,7 +316,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
    )

    dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
-    training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
+    training_stage.change(change_stage, [training_stage], [dataset, packing, efficient_packing], queue=False)
    reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
    model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
    finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)