@@ -28,12 +28,16 @@ def create_eval_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
max_source_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1)
|
||||
max_target_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1)
|
||||
cutoff_len = gr.Slider(value=1024, minimum=4, maximum=8192, step=1)
|
||||
max_samples = gr.Textbox(value="100000")
|
||||
batch_size = gr.Slider(value=8, minimum=1, maximum=512, step=1)
|
||||
predict = gr.Checkbox(value=True)
|
||||
|
||||
with gr.Row():
|
||||
max_new_tokens = gr.Slider(10, 2048, value=128, step=1)
|
||||
top_p = gr.Slider(0.01, 1, value=0.7, step=0.01)
|
||||
temperature = gr.Slider(0.01, 1.5, value=0.95, step=0.01)
|
||||
|
||||
with gr.Row():
|
||||
cmd_preview_btn = gr.Button()
|
||||
start_btn = gr.Button()
|
||||
@@ -55,11 +59,13 @@ def create_eval_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict
|
||||
top_elems["system_prompt"],
|
||||
dataset_dir,
|
||||
dataset,
|
||||
max_source_length,
|
||||
max_target_length,
|
||||
cutoff_len,
|
||||
max_samples,
|
||||
batch_size,
|
||||
predict
|
||||
predict,
|
||||
max_new_tokens,
|
||||
top_p,
|
||||
temperature
|
||||
]
|
||||
|
||||
output_components = [
|
||||
@@ -78,11 +84,13 @@ def create_eval_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dict
|
||||
preview_count=preview_count,
|
||||
preview_samples=preview_samples,
|
||||
close_btn=close_btn,
|
||||
max_source_length=max_source_length,
|
||||
max_target_length=max_target_length,
|
||||
cutoff_len=cutoff_len,
|
||||
max_samples=max_samples,
|
||||
batch_size=batch_size,
|
||||
predict=predict,
|
||||
max_new_tokens=max_new_tokens,
|
||||
top_p=top_p,
|
||||
temperature=temperature,
|
||||
cmd_preview_btn=cmd_preview_btn,
|
||||
start_btn=start_btn,
|
||||
stop_btn=stop_btn,
|
||||
|
||||
@@ -35,11 +35,11 @@ def create_train_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dic
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
max_source_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1)
|
||||
max_target_length = gr.Slider(value=512, minimum=4, maximum=4096, step=1)
|
||||
cutoff_len = gr.Slider(value=1024, minimum=4, maximum=8192, step=1)
|
||||
learning_rate = gr.Textbox(value="5e-5")
|
||||
num_train_epochs = gr.Textbox(value="3.0")
|
||||
max_samples = gr.Textbox(value="100000")
|
||||
compute_type = gr.Radio(choices=["fp16", "bf16"], value="fp16")
|
||||
|
||||
with gr.Row():
|
||||
batch_size = gr.Slider(value=4, minimum=1, maximum=512, step=1)
|
||||
@@ -55,7 +55,8 @@ def create_train_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dic
|
||||
logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
|
||||
save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
|
||||
warmup_steps = gr.Slider(value=0, minimum=0, maximum=5000, step=1)
|
||||
compute_type = gr.Radio(choices=["fp16", "bf16"], value="fp16")
|
||||
flash_attn = gr.Checkbox(value=False)
|
||||
rope_scaling = gr.Checkbox(value=False)
|
||||
|
||||
with gr.Accordion(label="LoRA config", open=False) as lora_tab:
|
||||
with gr.Row():
|
||||
@@ -107,11 +108,11 @@ def create_train_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dic
|
||||
training_stage,
|
||||
dataset_dir,
|
||||
dataset,
|
||||
max_source_length,
|
||||
max_target_length,
|
||||
cutoff_len,
|
||||
learning_rate,
|
||||
num_train_epochs,
|
||||
max_samples,
|
||||
compute_type,
|
||||
batch_size,
|
||||
gradient_accumulation_steps,
|
||||
lr_scheduler_type,
|
||||
@@ -120,7 +121,8 @@ def create_train_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dic
|
||||
logging_steps,
|
||||
save_steps,
|
||||
warmup_steps,
|
||||
compute_type,
|
||||
flash_attn,
|
||||
rope_scaling,
|
||||
lora_rank,
|
||||
lora_dropout,
|
||||
lora_target,
|
||||
@@ -151,11 +153,11 @@ def create_train_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dic
|
||||
preview_count=preview_count,
|
||||
preview_samples=preview_samples,
|
||||
close_btn=close_btn,
|
||||
max_source_length=max_source_length,
|
||||
max_target_length=max_target_length,
|
||||
cutoff_len=cutoff_len,
|
||||
learning_rate=learning_rate,
|
||||
num_train_epochs=num_train_epochs,
|
||||
max_samples=max_samples,
|
||||
compute_type=compute_type,
|
||||
batch_size=batch_size,
|
||||
gradient_accumulation_steps=gradient_accumulation_steps,
|
||||
lr_scheduler_type=lr_scheduler_type,
|
||||
@@ -165,7 +167,8 @@ def create_train_tab(top_elems: Dict[str, "Component"], runner: "Runner") -> Dic
|
||||
logging_steps=logging_steps,
|
||||
save_steps=save_steps,
|
||||
warmup_steps=warmup_steps,
|
||||
compute_type=compute_type,
|
||||
flash_attn=flash_attn,
|
||||
rope_scaling=rope_scaling,
|
||||
lora_tab=lora_tab,
|
||||
lora_rank=lora_rank,
|
||||
lora_dropout=lora_dropout,
|
||||
|
||||
@@ -147,26 +147,16 @@ LOCALES = {
|
||||
"value": "关闭"
|
||||
}
|
||||
},
|
||||
"max_source_length": {
|
||||
"cutoff_len": {
|
||||
"en": {
|
||||
"label": "Max source length",
|
||||
"info": "Max tokens in source sequence."
|
||||
"label": "Cutoff length",
|
||||
"info": "Max tokens in input sequence."
|
||||
},
|
||||
"zh": {
|
||||
"label": "输入序列最大长度",
|
||||
"label": "截断长度",
|
||||
"info": "输入序列分词后的最大长度。"
|
||||
}
|
||||
},
|
||||
"max_target_length": {
|
||||
"en": {
|
||||
"label": "Max target length",
|
||||
"info": "Max tokens in target sequence."
|
||||
},
|
||||
"zh": {
|
||||
"label": "输出序列最大长度",
|
||||
"info": "输出序列分词后的最大长度。"
|
||||
}
|
||||
},
|
||||
"learning_rate": {
|
||||
"en": {
|
||||
"label": "Learning rate",
|
||||
@@ -197,6 +187,16 @@ LOCALES = {
|
||||
"info": "每个数据集最多使用的样本数。"
|
||||
}
|
||||
},
|
||||
"compute_type": {
|
||||
"en": {
|
||||
"label": "Compute type",
|
||||
"info": "Whether to use fp16 or bf16 mixed precision training."
|
||||
},
|
||||
"zh": {
|
||||
"label": "计算类型",
|
||||
"info": "是否启用 FP16 或 BF16 混合精度训练。"
|
||||
}
|
||||
},
|
||||
"batch_size": {
|
||||
"en": {
|
||||
"label": "Batch size",
|
||||
@@ -277,14 +277,20 @@ LOCALES = {
|
||||
"info": "学习率预热采用的步数。"
|
||||
}
|
||||
},
|
||||
"compute_type": {
|
||||
"flash_attn": {
|
||||
"en": {
|
||||
"label": "Compute type",
|
||||
"info": "Whether to use fp16 or bf16 mixed precision training."
|
||||
"label": "Use FlashAttention-2"
|
||||
},
|
||||
"zh": {
|
||||
"label": "计算类型",
|
||||
"info": "是否启用 FP16 或 BF16 混合精度训练。"
|
||||
"label": "使用 FlashAttention-2"
|
||||
}
|
||||
},
|
||||
"rope_scaling": {
|
||||
"en": {
|
||||
"label": "Use RoPE scaling"
|
||||
},
|
||||
"zh": {
|
||||
"label": "使用 RoPE 插值"
|
||||
}
|
||||
},
|
||||
"lora_tab": {
|
||||
|
||||
@@ -73,11 +73,11 @@ class Runner:
|
||||
training_stage: str,
|
||||
dataset_dir: str,
|
||||
dataset: List[str],
|
||||
max_source_length: int,
|
||||
max_target_length: int,
|
||||
cutoff_len: int,
|
||||
learning_rate: str,
|
||||
num_train_epochs: str,
|
||||
max_samples: str,
|
||||
compute_type: str,
|
||||
batch_size: int,
|
||||
gradient_accumulation_steps: int,
|
||||
lr_scheduler_type: str,
|
||||
@@ -86,7 +86,8 @@ class Runner:
|
||||
logging_steps: int,
|
||||
save_steps: int,
|
||||
warmup_steps: int,
|
||||
compute_type: str,
|
||||
flash_attn: bool,
|
||||
rope_scaling: bool,
|
||||
lora_rank: int,
|
||||
lora_dropout: float,
|
||||
lora_target: str,
|
||||
@@ -120,8 +121,7 @@ class Runner:
|
||||
system_prompt=system_prompt,
|
||||
dataset_dir=dataset_dir,
|
||||
dataset=",".join(dataset),
|
||||
max_source_length=max_source_length,
|
||||
max_target_length=max_target_length,
|
||||
cutoff_len=cutoff_len,
|
||||
learning_rate=float(learning_rate),
|
||||
num_train_epochs=float(num_train_epochs),
|
||||
max_samples=int(max_samples),
|
||||
@@ -132,6 +132,8 @@ class Runner:
|
||||
logging_steps=logging_steps,
|
||||
save_steps=save_steps,
|
||||
warmup_steps=warmup_steps,
|
||||
flash_attn=flash_attn,
|
||||
rope_scaling="linear" if rope_scaling else None,
|
||||
lora_rank=lora_rank,
|
||||
lora_dropout=lora_dropout,
|
||||
lora_target=lora_target or DEFAULT_MODULE.get(model_name.split("-")[0], "q_proj,v_proj"),
|
||||
@@ -168,11 +170,13 @@ class Runner:
|
||||
system_prompt: str,
|
||||
dataset_dir: str,
|
||||
dataset: List[str],
|
||||
max_source_length: int,
|
||||
max_target_length: int,
|
||||
cutoff_len: int,
|
||||
max_samples: str,
|
||||
batch_size: int,
|
||||
predict: bool
|
||||
predict: bool,
|
||||
max_new_tokens: int,
|
||||
top_p: float,
|
||||
temperature: float
|
||||
) -> Tuple[str, str, List[str], str, Dict[str, Any]]:
|
||||
if checkpoints:
|
||||
checkpoint_dir = ",".join(
|
||||
@@ -200,10 +204,12 @@ class Runner:
|
||||
system_prompt=system_prompt,
|
||||
dataset_dir=dataset_dir,
|
||||
dataset=",".join(dataset),
|
||||
max_source_length=max_source_length,
|
||||
max_target_length=max_target_length,
|
||||
cutoff_len=cutoff_len,
|
||||
max_samples=int(max_samples),
|
||||
per_device_eval_batch_size=batch_size,
|
||||
max_new_tokens=max_new_tokens,
|
||||
top_p=top_p,
|
||||
temperature=temperature,
|
||||
output_dir=output_dir
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user