refactor adapter hparam
Former-commit-id: f82aece9ebd6df83a7a005cc7cbbcec07fa6e14d
This commit is contained in:
@@ -63,17 +63,17 @@ class WebChatModel(ChatModel):
|
||||
yield error
|
||||
return
|
||||
|
||||
if get("top.checkpoints"):
|
||||
checkpoint_dir = ",".join([
|
||||
get_save_dir(get("top.model_name"), get("top.finetuning_type"), ckpt) for ckpt in get("top.checkpoints")
|
||||
])
|
||||
if get("top.adapter_path"):
|
||||
adapter_name_or_path = ",".join([
|
||||
get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
|
||||
for adapter in get("top.adapter_path")])
|
||||
else:
|
||||
checkpoint_dir = None
|
||||
adapter_name_or_path = None
|
||||
|
||||
yield ALERTS["info_loading"][lang]
|
||||
args = dict(
|
||||
model_name_or_path=get("top.model_path"),
|
||||
checkpoint_dir=checkpoint_dir,
|
||||
adapter_name_or_path=adapter_name_or_path,
|
||||
finetuning_type=get("top.finetuning_type"),
|
||||
quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
|
||||
template=get("top.template"),
|
||||
|
||||
@@ -2,14 +2,7 @@ import os
|
||||
import json
|
||||
import gradio as gr
|
||||
from typing import Any, Dict, Optional
|
||||
from transformers.utils import (
|
||||
WEIGHTS_NAME,
|
||||
WEIGHTS_INDEX_NAME,
|
||||
SAFE_WEIGHTS_NAME,
|
||||
SAFE_WEIGHTS_INDEX_NAME,
|
||||
ADAPTER_WEIGHTS_NAME,
|
||||
ADAPTER_SAFE_WEIGHTS_NAME
|
||||
)
|
||||
from peft.utils import WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME
|
||||
|
||||
from llmtuner.extras.constants import (
|
||||
DEFAULT_MODULE,
|
||||
@@ -22,18 +15,11 @@ from llmtuner.extras.misc import use_modelscope
|
||||
from llmtuner.hparams.data_args import DATA_CONFIG
|
||||
|
||||
|
||||
ADAPTER_NAMES = {WEIGHTS_NAME, SAFETENSORS_WEIGHTS_NAME}
|
||||
DEFAULT_CACHE_DIR = "cache"
|
||||
DEFAULT_DATA_DIR = "data"
|
||||
DEFAULT_SAVE_DIR = "saves"
|
||||
USER_CONFIG = "user.config"
|
||||
CKPT_NAMES = [
|
||||
WEIGHTS_NAME,
|
||||
WEIGHTS_INDEX_NAME,
|
||||
SAFE_WEIGHTS_NAME,
|
||||
SAFE_WEIGHTS_INDEX_NAME,
|
||||
ADAPTER_WEIGHTS_NAME,
|
||||
ADAPTER_SAFE_WEIGHTS_NAME
|
||||
]
|
||||
|
||||
|
||||
def get_save_dir(*args) -> os.PathLike:
|
||||
@@ -90,18 +76,18 @@ def get_template(model_name: str) -> str:
|
||||
return "default"
|
||||
|
||||
|
||||
def list_checkpoint(model_name: str, finetuning_type: str) -> Dict[str, Any]:
|
||||
checkpoints = []
|
||||
if model_name:
|
||||
def list_adapters(model_name: str, finetuning_type: str) -> Dict[str, Any]:
|
||||
adapters = []
|
||||
if model_name and finetuning_type == "lora": # full and freeze have no adapter
|
||||
save_dir = get_save_dir(model_name, finetuning_type)
|
||||
if save_dir and os.path.isdir(save_dir):
|
||||
for checkpoint in os.listdir(save_dir):
|
||||
for adapter in os.listdir(save_dir):
|
||||
if (
|
||||
os.path.isdir(os.path.join(save_dir, checkpoint))
|
||||
and any([os.path.isfile(os.path.join(save_dir, checkpoint, name)) for name in CKPT_NAMES])
|
||||
os.path.isdir(os.path.join(save_dir, adapter))
|
||||
and any([os.path.isfile(os.path.join(save_dir, adapter, name)) for name in ADAPTER_NAMES])
|
||||
):
|
||||
checkpoints.append(checkpoint)
|
||||
return gr.update(value=[], choices=checkpoints)
|
||||
adapters.append(adapter)
|
||||
return gr.update(value=[], choices=adapters)
|
||||
|
||||
|
||||
def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
|
||||
|
||||
@@ -14,7 +14,7 @@ def save_model(
|
||||
lang: str,
|
||||
model_name: str,
|
||||
model_path: str,
|
||||
checkpoints: List[str],
|
||||
adapter_path: List[str],
|
||||
finetuning_type: str,
|
||||
template: str,
|
||||
max_shard_size: int,
|
||||
@@ -25,8 +25,8 @@ def save_model(
|
||||
error = ALERTS["err_no_model"][lang]
|
||||
elif not model_path:
|
||||
error = ALERTS["err_no_path"][lang]
|
||||
elif not checkpoints:
|
||||
error = ALERTS["err_no_checkpoint"][lang]
|
||||
elif not adapter_path:
|
||||
error = ALERTS["err_no_adapter"][lang]
|
||||
elif not export_dir:
|
||||
error = ALERTS["err_no_export_dir"][lang]
|
||||
|
||||
@@ -37,7 +37,7 @@ def save_model(
|
||||
|
||||
args = dict(
|
||||
model_name_or_path=model_path,
|
||||
checkpoint_dir=",".join([get_save_dir(model_name, finetuning_type, ckpt) for ckpt in checkpoints]),
|
||||
adapter_name_or_path=",".join([get_save_dir(model_name, finetuning_type, adapter) for adapter in adapter_path]),
|
||||
finetuning_type=finetuning_type,
|
||||
template=template,
|
||||
export_dir=export_dir,
|
||||
@@ -63,7 +63,7 @@ def create_export_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
engine.manager.get_elem_by_name("top.lang"),
|
||||
engine.manager.get_elem_by_name("top.model_name"),
|
||||
engine.manager.get_elem_by_name("top.model_path"),
|
||||
engine.manager.get_elem_by_name("top.checkpoints"),
|
||||
engine.manager.get_elem_by_name("top.adapter_path"),
|
||||
engine.manager.get_elem_by_name("top.finetuning_type"),
|
||||
engine.manager.get_elem_by_name("top.template"),
|
||||
max_shard_size,
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Dict
|
||||
|
||||
from llmtuner.data.template import templates
|
||||
from llmtuner.extras.constants import METHODS, SUPPORTED_MODELS
|
||||
from llmtuner.webui.common import get_model_path, get_template, list_checkpoint, save_config
|
||||
from llmtuner.webui.common import get_model_path, get_template, list_adapters, save_config
|
||||
from llmtuner.webui.utils import can_quantize
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -20,7 +20,7 @@ def create_top() -> Dict[str, "Component"]:
|
||||
|
||||
with gr.Row():
|
||||
finetuning_type = gr.Dropdown(choices=METHODS, value="lora", scale=1)
|
||||
checkpoints = gr.Dropdown(multiselect=True, scale=5)
|
||||
adapter_path = gr.Dropdown(multiselect=True, scale=5)
|
||||
refresh_btn = gr.Button(scale=1)
|
||||
|
||||
with gr.Accordion(label="Advanced config", open=False) as advanced_tab:
|
||||
@@ -34,7 +34,7 @@ def create_top() -> Dict[str, "Component"]:
|
||||
shift_attn = gr.Checkbox(value=False)
|
||||
|
||||
model_name.change(
|
||||
list_checkpoint, [model_name, finetuning_type], [checkpoints], queue=False
|
||||
list_adapters, [model_name, finetuning_type], [adapter_path], queue=False
|
||||
).then(
|
||||
get_model_path, [model_name], [model_path], queue=False
|
||||
).then(
|
||||
@@ -44,13 +44,13 @@ def create_top() -> Dict[str, "Component"]:
|
||||
model_path.change(save_config, inputs=[lang, model_name, model_path], queue=False)
|
||||
|
||||
finetuning_type.change(
|
||||
list_checkpoint, [model_name, finetuning_type], [checkpoints], queue=False
|
||||
list_adapters, [model_name, finetuning_type], [adapter_path], queue=False
|
||||
).then(
|
||||
can_quantize, [finetuning_type], [quantization_bit], queue=False
|
||||
)
|
||||
|
||||
refresh_btn.click(
|
||||
list_checkpoint, [model_name, finetuning_type], [checkpoints], queue=False
|
||||
list_adapters, [model_name, finetuning_type], [adapter_path], queue=False
|
||||
)
|
||||
|
||||
return dict(
|
||||
@@ -58,7 +58,7 @@ def create_top() -> Dict[str, "Component"]:
|
||||
model_name=model_name,
|
||||
model_path=model_path,
|
||||
finetuning_type=finetuning_type,
|
||||
checkpoints=checkpoints,
|
||||
adapter_path=adapter_path,
|
||||
refresh_btn=refresh_btn,
|
||||
advanced_tab=advanced_tab,
|
||||
quantization_bit=quantization_bit,
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Dict
|
||||
from transformers.trainer_utils import SchedulerType
|
||||
|
||||
from llmtuner.extras.constants import TRAINING_STAGES
|
||||
from llmtuner.webui.common import list_checkpoint, list_dataset, DEFAULT_DATA_DIR
|
||||
from llmtuner.webui.common import list_adapters, list_dataset, DEFAULT_DATA_DIR
|
||||
from llmtuner.webui.components.data import create_preview_box
|
||||
from llmtuner.webui.utils import gen_plot
|
||||
|
||||
@@ -60,7 +60,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
lr_scheduler_type=lr_scheduler_type, max_grad_norm=max_grad_norm, val_size=val_size
|
||||
))
|
||||
|
||||
with gr.Accordion(label="Advanced config", open=False) as advanced_tab:
|
||||
with gr.Accordion(label="Extra config", open=False) as extra_tab:
|
||||
with gr.Row():
|
||||
logging_steps = gr.Slider(value=5, minimum=5, maximum=1000, step=5)
|
||||
save_steps = gr.Slider(value=100, minimum=10, maximum=5000, step=10)
|
||||
@@ -73,7 +73,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
|
||||
input_elems.update({logging_steps, save_steps, warmup_steps, neftune_alpha, train_on_prompt, upcast_layernorm})
|
||||
elem_dict.update(dict(
|
||||
advanced_tab=advanced_tab, logging_steps=logging_steps, save_steps=save_steps, warmup_steps=warmup_steps,
|
||||
extra_tab=extra_tab, logging_steps=logging_steps, save_steps=save_steps, warmup_steps=warmup_steps,
|
||||
neftune_alpha=neftune_alpha, train_on_prompt=train_on_prompt, upcast_layernorm=upcast_layernorm
|
||||
))
|
||||
|
||||
@@ -83,12 +83,12 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
lora_dropout = gr.Slider(value=0.1, minimum=0, maximum=1, step=0.01, scale=1)
|
||||
lora_target = gr.Textbox(scale=1)
|
||||
additional_target = gr.Textbox(scale=1)
|
||||
resume_lora_training = gr.Checkbox(value=True, scale=1)
|
||||
create_new_adapter = gr.Checkbox(scale=1)
|
||||
|
||||
input_elems.update({lora_rank, lora_dropout, lora_target, additional_target, resume_lora_training})
|
||||
input_elems.update({lora_rank, lora_dropout, lora_target, additional_target, create_new_adapter})
|
||||
elem_dict.update(dict(
|
||||
lora_tab=lora_tab, lora_rank=lora_rank, lora_dropout=lora_dropout, lora_target=lora_target,
|
||||
additional_target=additional_target, resume_lora_training=resume_lora_training,
|
||||
additional_target=additional_target, create_new_adapter=create_new_adapter
|
||||
))
|
||||
|
||||
with gr.Accordion(label="RLHF config", open=False) as rlhf_tab:
|
||||
@@ -98,7 +98,7 @@ def create_train_tab(engine: "Engine") -> Dict[str, "Component"]:
|
||||
refresh_btn = gr.Button(scale=1)
|
||||
|
||||
refresh_btn.click(
|
||||
list_checkpoint,
|
||||
list_adapters,
|
||||
[engine.manager.get_elem_by_name("top.model_name"), engine.manager.get_elem_by_name("top.finetuning_type")],
|
||||
[reward_model],
|
||||
queue=False
|
||||
|
||||
@@ -33,20 +33,20 @@ LOCALES = {
|
||||
"label": "微调方法"
|
||||
}
|
||||
},
|
||||
"checkpoints": {
|
||||
"adapter_path": {
|
||||
"en": {
|
||||
"label": "Checkpoints"
|
||||
"label": "Adapter path"
|
||||
},
|
||||
"zh": {
|
||||
"label": "模型断点"
|
||||
"label": "适配器路径"
|
||||
}
|
||||
},
|
||||
"refresh_btn": {
|
||||
"en": {
|
||||
"value": "Refresh checkpoints"
|
||||
"value": "Refresh adapters"
|
||||
},
|
||||
"zh": {
|
||||
"value": "刷新断点"
|
||||
"value": "刷新适配器"
|
||||
}
|
||||
},
|
||||
"advanced_tab": {
|
||||
@@ -285,6 +285,14 @@ LOCALES = {
|
||||
"info": "验证集占全部样本的百分比。"
|
||||
}
|
||||
},
|
||||
"extra_tab": {
|
||||
"en": {
|
||||
"label": "Extra configurations"
|
||||
},
|
||||
"zh": {
|
||||
"label": "其它参数设置"
|
||||
}
|
||||
},
|
||||
"logging_steps": {
|
||||
"en": {
|
||||
"label": "Logging steps",
|
||||
@@ -393,14 +401,14 @@ LOCALES = {
|
||||
"info": "除 LoRA 层以外的可训练模块名称。使用英文逗号分隔多个名称。"
|
||||
}
|
||||
},
|
||||
"resume_lora_training": {
|
||||
"create_new_adapter": {
|
||||
"en": {
|
||||
"label": "Resume LoRA training",
|
||||
"info": "Whether to resume training from the last LoRA weights or create new lora weights."
|
||||
"label": "Create new adapter",
|
||||
"info": "Whether to create a new adapter with randomly initialized weight or not."
|
||||
},
|
||||
"zh": {
|
||||
"label": "继续上次的训练",
|
||||
"info": "接着上次的 LoRA 权重训练或创建一个新的 LoRA 权重。"
|
||||
"label": "新建适配器",
|
||||
"info": "是否创建一个经过随机初始化的新适配器。"
|
||||
}
|
||||
},
|
||||
"rlhf_tab": {
|
||||
@@ -629,9 +637,9 @@ ALERTS = {
|
||||
"en": "Please choose a dataset.",
|
||||
"zh": "请选择数据集。"
|
||||
},
|
||||
"err_no_checkpoint": {
|
||||
"en": "Please select a checkpoint.",
|
||||
"zh": "请选择断点。"
|
||||
"err_no_adapter": {
|
||||
"en": "Please select an adapter.",
|
||||
"zh": "请选择一个适配器。"
|
||||
},
|
||||
"err_no_export_dir": {
|
||||
"en": "Please provide export dir.",
|
||||
|
||||
@@ -21,7 +21,7 @@ class Manager:
|
||||
self.all_elems["top"]["lang"],
|
||||
self.all_elems["top"]["model_name"],
|
||||
self.all_elems["top"]["model_path"],
|
||||
self.all_elems["top"]["checkpoints"],
|
||||
self.all_elems["top"]["adapter_path"],
|
||||
self.all_elems["top"]["finetuning_type"],
|
||||
self.all_elems["top"]["quantization_bit"],
|
||||
self.all_elems["top"]["template"],
|
||||
|
||||
@@ -86,19 +86,19 @@ class Runner:
|
||||
get = lambda name: data[self.manager.get_elem_by_name(name)]
|
||||
user_config = load_config()
|
||||
|
||||
if get("top.checkpoints"):
|
||||
checkpoint_dir = ",".join([
|
||||
get_save_dir(get("top.model_name"), get("top.finetuning_type"), ckpt) for ckpt in get("top.checkpoints")
|
||||
])
|
||||
if get("top.adapter_path"):
|
||||
adapter_name_or_path = ",".join([
|
||||
get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
|
||||
for adapter in get("top.adapter_path")])
|
||||
else:
|
||||
checkpoint_dir = None
|
||||
adapter_name_or_path = None
|
||||
|
||||
args = dict(
|
||||
stage=TRAINING_STAGES[get("train.training_stage")],
|
||||
model_name_or_path=get("top.model_path"),
|
||||
do_train=True,
|
||||
model_name_or_path=get("top.model_path"),
|
||||
adapter_name_or_path=adapter_name_or_path,
|
||||
cache_dir=user_config.get("cache_dir", None),
|
||||
checkpoint_dir=checkpoint_dir,
|
||||
finetuning_type=get("top.finetuning_type"),
|
||||
quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
|
||||
template=get("top.template"),
|
||||
@@ -125,17 +125,14 @@ class Runner:
|
||||
lora_dropout=get("train.lora_dropout"),
|
||||
lora_target=get("train.lora_target") or get_module(get("top.model_name")),
|
||||
additional_target=get("train.additional_target") if get("train.additional_target") else None,
|
||||
resume_lora_training=get("train.resume_lora_training"),
|
||||
create_new_adapter=get("train.create_new_adapter"),
|
||||
output_dir=get_save_dir(get("top.model_name"), get("top.finetuning_type"), get("train.output_dir"))
|
||||
)
|
||||
args[get("train.compute_type")] = True
|
||||
args["disable_tqdm"] = True
|
||||
|
||||
if TRAINING_STAGES[get("train.training_stage")] in ["rm", "ppo", "dpo"]:
|
||||
args["resume_lora_training"] = (args["quantization_bit"] is not None)
|
||||
|
||||
if args["quantization_bit"] is not None:
|
||||
args["upcast_layernorm"] = True
|
||||
args["create_new_adapter"] = (args["quantization_bit"] is None)
|
||||
|
||||
if args["stage"] == "ppo":
|
||||
args["reward_model"] = get_save_dir(
|
||||
@@ -158,20 +155,19 @@ class Runner:
|
||||
get = lambda name: data[self.manager.get_elem_by_name(name)]
|
||||
user_config = load_config()
|
||||
|
||||
if get("top.checkpoints"):
|
||||
checkpoint_dir = ",".join([
|
||||
get_save_dir(get("top.model_name"), get("top.finetuning_type"), ckpt) for ckpt in get("top.checkpoints")
|
||||
])
|
||||
if get("top.adapter_path"):
|
||||
adapter_name_or_path = ",".join([
|
||||
get_save_dir(get("top.model_name"), get("top.finetuning_type"), adapter)
|
||||
for adapter in get("top.adapter_path")])
|
||||
else:
|
||||
checkpoint_dir = None
|
||||
adapter_name_or_path = None
|
||||
|
||||
args = dict(
|
||||
stage="sft",
|
||||
model_name_or_path=get("top.model_path"),
|
||||
do_eval=True,
|
||||
predict_with_generate=True,
|
||||
model_name_or_path=get("top.model_path"),
|
||||
adapter_name_or_path=adapter_name_or_path,
|
||||
cache_dir=user_config.get("cache_dir", None),
|
||||
checkpoint_dir=checkpoint_dir,
|
||||
finetuning_type=get("top.finetuning_type"),
|
||||
quantization_bit=int(get("top.quantization_bit")) if get("top.quantization_bit") in ["8", "4"] else None,
|
||||
template=get("top.template"),
|
||||
@@ -183,6 +179,7 @@ class Runner:
|
||||
cutoff_len=get("eval.cutoff_len"),
|
||||
max_samples=int(get("eval.max_samples")),
|
||||
per_device_eval_batch_size=get("eval.batch_size"),
|
||||
predict_with_generate=True,
|
||||
max_new_tokens=get("eval.max_new_tokens"),
|
||||
top_p=get("eval.top_p"),
|
||||
temperature=get("eval.temperature"),
|
||||
|
||||
@@ -47,7 +47,7 @@ def gen_cmd(args: Dict[str, Any]) -> str:
|
||||
current_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
|
||||
cmd_lines = ["CUDA_VISIBLE_DEVICES={} python src/train_bash.py ".format(current_devices)]
|
||||
for k, v in args.items():
|
||||
if v is not None and v != "":
|
||||
if v is not None and v is not False and v != "":
|
||||
cmd_lines.append(" --{} {} ".format(k, str(v)))
|
||||
cmd_text = "\\\n".join(cmd_lines)
|
||||
cmd_text = "```bash\n{}\n```".format(cmd_text)
|
||||
|
||||
Reference in New Issue
Block a user