implement efficient packing without cross-contamination attention
Former-commit-id: a64a5305c0da5ef092d4cc26faf829bb44de65d1
This commit is contained in:
@@ -83,6 +83,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
with gr.Column():
|
||||
resize_vocab = gr.Checkbox()
|
||||
packing = gr.Checkbox()
|
||||
efficient_packing = gr.Checkbox()
|
||||
|
||||
with gr.Column():
|
||||
upcast_layernorm = gr.Checkbox()
|
||||
@@ -101,6 +102,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
optim,
|
||||
resize_vocab,
|
||||
packing,
|
||||
efficient_packing,
|
||||
upcast_layernorm,
|
||||
use_llama_pro,
|
||||
shift_attn,
|
||||
@@ -117,6 +119,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
optim=optim,
|
||||
resize_vocab=resize_vocab,
|
||||
packing=packing,
|
||||
efficient_packing=efficient_packing,
|
||||
upcast_layernorm=upcast_layernorm,
|
||||
use_llama_pro=use_llama_pro,
|
||||
shift_attn=shift_attn,
|
||||
@@ -313,7 +316,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
)
|
||||
|
||||
dataset.focus(list_datasets, [dataset_dir, training_stage], [dataset], queue=False)
|
||||
training_stage.change(change_stage, [training_stage], [dataset, packing], queue=False)
|
||||
training_stage.change(change_stage, [training_stage], [dataset, packing, efficient_packing], queue=False)
|
||||
reward_model.focus(list_checkpoints, [model_name, finetuning_type], [reward_model], queue=False)
|
||||
model_name.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
|
||||
finetuning_type.change(list_output_dirs, [model_name, finetuning_type, current_time], [output_dir], queue=False)
|
||||
|
||||
@@ -494,6 +494,20 @@ LOCALES = {
|
||||
"info": "将序列打包为等长样本。",
|
||||
},
|
||||
},
|
||||
"efficient_packing": {
|
||||
"en": {
|
||||
"label": "Pack sequences for efficient training",
|
||||
"info": "Pack sequences into samples of fixed length without cross-contamination attention for efficient training.",
|
||||
},
|
||||
"ru": {
|
||||
"label": "Пакетные последовательности для эффективного обучения",
|
||||
"info": "Упакуйте последовательности в образцы фиксированной длины без учета перекрестного загрязнения для эффективного обучения.",
|
||||
},
|
||||
"zh": {
|
||||
"label": "打包序列以实现高效训练",
|
||||
"info": "为了提高训练效率,将序列打包成固定长度的样本,无需注意交叉污染。",
|
||||
},
|
||||
},
|
||||
"upcast_layernorm": {
|
||||
"en": {
|
||||
"label": "Upcast LayerNorm",
|
||||
|
||||
@@ -120,6 +120,7 @@ class Runner:
|
||||
optim=get("train.optim"),
|
||||
resize_vocab=get("train.resize_vocab"),
|
||||
packing=get("train.packing"),
|
||||
efficient_packing=get("train.efficient_packing"),
|
||||
upcast_layernorm=get("train.upcast_layernorm"),
|
||||
use_llama_pro=get("train.use_llama_pro"),
|
||||
shift_attn=get("train.shift_attn"),
|
||||
|
||||
Reference in New Issue
Block a user