Merge branch 'hiyouga:main' into pixtral-patch
Former-commit-id: 28696e2f945a9f55e4ca9e9dc5ebd8af9df45d8b
This commit is contained in:
@@ -38,6 +38,7 @@ Force check imports: FORCE_CHECK_IMPORTS=1
|
||||
Force using torchrun: FORCE_TORCHRUN=1
|
||||
Set logging verbosity: LLAMAFACTORY_VERBOSITY=WARN
|
||||
Use modelscope: USE_MODELSCOPE_HUB=1
|
||||
Use openmind: USE_OPENMIND_HUB=1
|
||||
"""
|
||||
|
||||
from .extras.env import VERSION
|
||||
|
||||
@@ -53,7 +53,7 @@ def _load_single_dataset(
|
||||
"""
|
||||
logger.info("Loading dataset {}...".format(dataset_attr))
|
||||
data_path, data_name, data_dir, data_files = None, None, None, None
|
||||
if dataset_attr.load_from in ["hf_hub", "ms_hub"]:
|
||||
if dataset_attr.load_from in ["hf_hub", "ms_hub", "om_hub"]:
|
||||
data_path = dataset_attr.dataset_name
|
||||
data_name = dataset_attr.subset
|
||||
data_dir = dataset_attr.folder
|
||||
@@ -102,6 +102,23 @@ def _load_single_dataset(
|
||||
)
|
||||
if isinstance(dataset, MsDataset):
|
||||
dataset = dataset.to_hf_dataset()
|
||||
|
||||
elif dataset_attr.load_from == "om_hub":
|
||||
require_version("openmind>=0.8.0", "To fix: pip install openmind>=0.8.0")
|
||||
from openmind import OmDataset
|
||||
from openmind.utils.hub import OM_DATASETS_CACHE
|
||||
|
||||
cache_dir = model_args.cache_dir or OM_DATASETS_CACHE
|
||||
dataset = OmDataset.load_dataset(
|
||||
path=data_path,
|
||||
name=data_name,
|
||||
data_dir=data_dir,
|
||||
data_files=data_files,
|
||||
split=dataset_attr.split,
|
||||
cache_dir=cache_dir,
|
||||
token=model_args.om_hub_token,
|
||||
streaming=(data_args.streaming and (dataset_attr.load_from != "file")),
|
||||
)
|
||||
else:
|
||||
dataset = load_dataset(
|
||||
path=data_path,
|
||||
|
||||
@@ -20,7 +20,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence
|
||||
from transformers.utils import cached_file
|
||||
|
||||
from ..extras.constants import DATA_CONFIG
|
||||
from ..extras.misc import use_modelscope
|
||||
from ..extras.misc import use_modelscope, use_openmind
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -30,7 +30,7 @@ class DatasetAttr:
|
||||
"""
|
||||
|
||||
# basic configs
|
||||
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
|
||||
load_from: Literal["hf_hub", "ms_hub", "om_hub", "script", "file"]
|
||||
dataset_name: str
|
||||
formatting: Literal["alpaca", "sharegpt"] = "alpaca"
|
||||
ranking: bool = False
|
||||
@@ -98,7 +98,12 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -
|
||||
dataset_list: List["DatasetAttr"] = []
|
||||
for name in dataset_names:
|
||||
if dataset_info is None: # dataset_dir is ONLINE
|
||||
load_from = "ms_hub" if use_modelscope() else "hf_hub"
|
||||
if use_modelscope():
|
||||
load_from = "ms_hub"
|
||||
elif use_openmind():
|
||||
load_from = "om_hub"
|
||||
else:
|
||||
load_from = "hf_hub"
|
||||
dataset_attr = DatasetAttr(load_from, dataset_name=name)
|
||||
dataset_list.append(dataset_attr)
|
||||
continue
|
||||
@@ -108,10 +113,13 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -
|
||||
|
||||
has_hf_url = "hf_hub_url" in dataset_info[name]
|
||||
has_ms_url = "ms_hub_url" in dataset_info[name]
|
||||
has_om_url = "om_hub_url" in dataset_info[name]
|
||||
|
||||
if has_hf_url or has_ms_url:
|
||||
if (use_modelscope() and has_ms_url) or (not has_hf_url):
|
||||
if has_hf_url or has_ms_url or has_om_url:
|
||||
if has_ms_url and (use_modelscope() or not has_hf_url):
|
||||
dataset_attr = DatasetAttr("ms_hub", dataset_name=dataset_info[name]["ms_hub_url"])
|
||||
elif has_om_url and (use_openmind() or not has_hf_url):
|
||||
dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"])
|
||||
else:
|
||||
dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"])
|
||||
elif "script_url" in dataset_info[name]:
|
||||
|
||||
@@ -107,6 +107,7 @@ VISION_MODELS = set()
|
||||
class DownloadSource(str, Enum):
|
||||
DEFAULT = "hf"
|
||||
MODELSCOPE = "ms"
|
||||
OPENMIND = "om"
|
||||
|
||||
|
||||
def register_model_group(
|
||||
@@ -163,14 +164,17 @@ register_model_group(
|
||||
"Baichuan2-13B-Base": {
|
||||
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base",
|
||||
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base",
|
||||
DownloadSource.OPENMIND: "Baichuan/Baichuan2_13b_base_pt",
|
||||
},
|
||||
"Baichuan2-7B-Chat": {
|
||||
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat",
|
||||
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat",
|
||||
DownloadSource.OPENMIND: "Baichuan/Baichuan2_7b_chat_pt",
|
||||
},
|
||||
"Baichuan2-13B-Chat": {
|
||||
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat",
|
||||
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat",
|
||||
DownloadSource.OPENMIND: "Baichuan/Baichuan2_13b_chat_pt",
|
||||
},
|
||||
},
|
||||
template="baichuan2",
|
||||
@@ -555,10 +559,12 @@ register_model_group(
|
||||
"Gemma-2-2B-Instruct": {
|
||||
DownloadSource.DEFAULT: "google/gemma-2-2b-it",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-2b-it",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/gemma-2-2b-it",
|
||||
},
|
||||
"Gemma-2-9B-Instruct": {
|
||||
DownloadSource.DEFAULT: "google/gemma-2-9b-it",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/gemma-2-9b-it",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/gemma-2-9b-it",
|
||||
},
|
||||
"Gemma-2-27B-Instruct": {
|
||||
DownloadSource.DEFAULT: "google/gemma-2-27b-it",
|
||||
@@ -578,6 +584,7 @@ register_model_group(
|
||||
"GLM-4-9B-Chat": {
|
||||
DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat",
|
||||
DownloadSource.MODELSCOPE: "ZhipuAI/glm-4-9b-chat",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/glm-4-9b-chat",
|
||||
},
|
||||
"GLM-4-9B-1M-Chat": {
|
||||
DownloadSource.DEFAULT: "THUDM/glm-4-9b-chat-1m",
|
||||
@@ -632,6 +639,7 @@ register_model_group(
|
||||
"InternLM2.5-1.8B": {
|
||||
DownloadSource.DEFAULT: "internlm/internlm2_5-1_8b",
|
||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-1_8b",
|
||||
DownloadSource.OPENMIND: "Intern/internlm2_5-1_8b",
|
||||
},
|
||||
"InternLM2.5-7B": {
|
||||
DownloadSource.DEFAULT: "internlm/internlm2_5-7b",
|
||||
@@ -640,22 +648,27 @@ register_model_group(
|
||||
"InternLM2.5-20B": {
|
||||
DownloadSource.DEFAULT: "internlm/internlm2_5-20b",
|
||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b",
|
||||
DownloadSource.OPENMIND: "Intern/internlm2_5-20b",
|
||||
},
|
||||
"InternLM2.5-1.8B-Chat": {
|
||||
DownloadSource.DEFAULT: "internlm/internlm2_5-1_8b-chat",
|
||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-1_8b-chat",
|
||||
DownloadSource.OPENMIND: "Intern/internlm2_5-1_8b-chat",
|
||||
},
|
||||
"InternLM2.5-7B-Chat": {
|
||||
DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat",
|
||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat",
|
||||
DownloadSource.OPENMIND: "Intern/internlm2_5-7b-chat",
|
||||
},
|
||||
"InternLM2.5-7B-1M-Chat": {
|
||||
DownloadSource.DEFAULT: "internlm/internlm2_5-7b-chat-1m",
|
||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-7b-chat-1m",
|
||||
DownloadSource.OPENMIND: "Intern/internlm2_5-7b-chat-1m",
|
||||
},
|
||||
"InternLM2.5-20B-Chat": {
|
||||
DownloadSource.DEFAULT: "internlm/internlm2_5-20b-chat",
|
||||
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm2_5-20b-chat",
|
||||
DownloadSource.OPENMIND: "Intern/internlm2_5-20b-chat",
|
||||
},
|
||||
},
|
||||
template="intern2",
|
||||
@@ -756,6 +769,7 @@ register_model_group(
|
||||
"Llama-3-8B-Chinese-Chat": {
|
||||
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-8B-Chinese-Chat",
|
||||
DownloadSource.MODELSCOPE: "LLM-Research/Llama3-8B-Chinese-Chat",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/Llama3-Chinese-8B-Instruct",
|
||||
},
|
||||
"Llama-3-70B-Chinese-Chat": {
|
||||
DownloadSource.DEFAULT: "shenzhi-wang/Llama3-70B-Chinese-Chat",
|
||||
@@ -960,6 +974,7 @@ register_model_group(
|
||||
"MiniCPM3-4B-Chat": {
|
||||
DownloadSource.DEFAULT: "openbmb/MiniCPM3-4B",
|
||||
DownloadSource.MODELSCOPE: "OpenBMB/MiniCPM3-4B",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/MiniCPM3-4B",
|
||||
},
|
||||
},
|
||||
template="cpm3",
|
||||
@@ -1421,14 +1436,17 @@ register_model_group(
|
||||
"Qwen2-0.5B-Instruct": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen2-0.5B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-0.5B-Instruct",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-0.5B-Instruct",
|
||||
},
|
||||
"Qwen2-1.5B-Instruct": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen2-1.5B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-1.5B-Instruct",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-1.5B-Instruct",
|
||||
},
|
||||
"Qwen2-7B-Instruct": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen2-7B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-7B-Instruct",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-7B-Instruct",
|
||||
},
|
||||
"Qwen2-72B-Instruct": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen2-72B-Instruct",
|
||||
@@ -1711,10 +1729,12 @@ register_model_group(
|
||||
"Qwen2-VL-2B-Instruct": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-2B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-2B-Instruct",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-VL-2B-Instruct",
|
||||
},
|
||||
"Qwen2-VL-7B-Instruct": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-7B-Instruct",
|
||||
DownloadSource.MODELSCOPE: "qwen/Qwen2-VL-7B-Instruct",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/Qwen2-VL-7B-Instruct",
|
||||
},
|
||||
"Qwen2-VL-72B-Instruct": {
|
||||
DownloadSource.DEFAULT: "Qwen/Qwen2-VL-72B-Instruct",
|
||||
@@ -1813,10 +1833,12 @@ register_model_group(
|
||||
"TeleChat-7B-Chat": {
|
||||
DownloadSource.DEFAULT: "Tele-AI/telechat-7B",
|
||||
DownloadSource.MODELSCOPE: "TeleAI/telechat-7B",
|
||||
DownloadSource.OPENMIND: "TeleAI/TeleChat-7B-pt",
|
||||
},
|
||||
"TeleChat-12B-Chat": {
|
||||
DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B",
|
||||
DownloadSource.MODELSCOPE: "TeleAI/TeleChat-12B",
|
||||
DownloadSource.OPENMIND: "TeleAI/TeleChat-12B-pt",
|
||||
},
|
||||
"TeleChat-12B-v2-Chat": {
|
||||
DownloadSource.DEFAULT: "Tele-AI/TeleChat-12B-v2",
|
||||
@@ -2035,6 +2057,7 @@ register_model_group(
|
||||
"Yi-1.5-6B-Chat": {
|
||||
DownloadSource.DEFAULT: "01-ai/Yi-1.5-6B-Chat",
|
||||
DownloadSource.MODELSCOPE: "01ai/Yi-1.5-6B-Chat",
|
||||
DownloadSource.OPENMIND: "LlamaFactory/Yi-1.5-6B-Chat",
|
||||
},
|
||||
"Yi-1.5-9B-Chat": {
|
||||
DownloadSource.DEFAULT: "01-ai/Yi-1.5-9B-Chat",
|
||||
|
||||
@@ -231,18 +231,35 @@ def torch_gc() -> None:
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
|
||||
def try_download_model_from_ms(model_args: "ModelArguments") -> str:
|
||||
if not use_modelscope() or os.path.exists(model_args.model_name_or_path):
|
||||
def try_download_model_from_other_hub(model_args: "ModelArguments") -> str:
|
||||
if (not use_modelscope() and not use_openmind()) or os.path.exists(model_args.model_name_or_path):
|
||||
return model_args.model_name_or_path
|
||||
|
||||
try:
|
||||
if use_modelscope():
|
||||
require_version("modelscope>=1.11.0", "To fix: pip install modelscope>=1.11.0")
|
||||
from modelscope import snapshot_download
|
||||
|
||||
revision = "master" if model_args.model_revision == "main" else model_args.model_revision
|
||||
return snapshot_download(model_args.model_name_or_path, revision=revision, cache_dir=model_args.cache_dir)
|
||||
except ImportError:
|
||||
raise ImportError("Please install modelscope via `pip install modelscope -U`")
|
||||
return snapshot_download(
|
||||
model_args.model_name_or_path,
|
||||
revision=revision,
|
||||
cache_dir=model_args.cache_dir,
|
||||
)
|
||||
|
||||
if use_openmind():
|
||||
require_version("openmind>=0.8.0", "To fix: pip install openmind>=0.8.0")
|
||||
from openmind.utils.hub import snapshot_download
|
||||
|
||||
return snapshot_download(
|
||||
model_args.model_name_or_path,
|
||||
revision=model_args.model_revision,
|
||||
cache_dir=model_args.cache_dir,
|
||||
)
|
||||
|
||||
|
||||
def use_modelscope() -> bool:
|
||||
return os.environ.get("USE_MODELSCOPE_HUB", "0").lower() in ["true", "1"]
|
||||
|
||||
|
||||
def use_openmind() -> bool:
|
||||
return os.environ.get("USE_OPENMIND_HUB", "0").lower() in ["true", "1"]
|
||||
|
||||
@@ -267,6 +267,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
|
||||
default=None,
|
||||
metadata={"help": "Auth token to log in with ModelScope Hub."},
|
||||
)
|
||||
om_hub_token: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Auth token to log in with Modelers Hub."},
|
||||
)
|
||||
print_param_status: bool = field(
|
||||
default=False,
|
||||
metadata={"help": "For debugging purposes, print the status of the parameters in the model."},
|
||||
|
||||
@@ -123,7 +123,7 @@ def _check_extra_dependencies(
|
||||
require_version("mixture-of-depth>=1.1.6", "To fix: pip install mixture-of-depth>=1.1.6")
|
||||
|
||||
if model_args.infer_backend == "vllm":
|
||||
require_version("vllm>=0.4.3,<=0.6.2", "To fix: pip install vllm>=0.4.3,<=0.6.2")
|
||||
require_version("vllm>=0.4.3,<=0.6.3", "To fix: pip install vllm>=0.4.3,<=0.6.3")
|
||||
|
||||
if finetuning_args.use_galore:
|
||||
require_version("galore_torch", "To fix: pip install galore_torch")
|
||||
|
||||
@@ -19,7 +19,7 @@ from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForVision2Se
|
||||
from trl import AutoModelForCausalLMWithValueHead
|
||||
|
||||
from ..extras.logging import get_logger
|
||||
from ..extras.misc import count_parameters, skip_check_imports, try_download_model_from_ms
|
||||
from ..extras.misc import count_parameters, skip_check_imports, try_download_model_from_other_hub
|
||||
from .adapter import init_adapter
|
||||
from .model_utils.liger_kernel import apply_liger_kernel
|
||||
from .model_utils.misc import register_autoclass
|
||||
@@ -50,7 +50,7 @@ def _get_init_kwargs(model_args: "ModelArguments") -> Dict[str, Any]:
|
||||
Note: including inplace operation of model_args.
|
||||
"""
|
||||
skip_check_imports()
|
||||
model_args.model_name_or_path = try_download_model_from_ms(model_args)
|
||||
model_args.model_name_or_path = try_download_model_from_other_hub(model_args)
|
||||
return {
|
||||
"trust_remote_code": True,
|
||||
"cache_dir": model_args.cache_dir,
|
||||
|
||||
@@ -31,7 +31,7 @@ from ..extras.constants import (
|
||||
DownloadSource,
|
||||
)
|
||||
from ..extras.logging import get_logger
|
||||
from ..extras.misc import use_modelscope
|
||||
from ..extras.misc import use_modelscope, use_openmind
|
||||
from ..extras.packages import is_gradio_available
|
||||
|
||||
|
||||
@@ -109,9 +109,16 @@ def get_model_path(model_name: str) -> str:
|
||||
use_modelscope()
|
||||
and path_dict.get(DownloadSource.MODELSCOPE)
|
||||
and model_path == path_dict.get(DownloadSource.DEFAULT)
|
||||
): # replace path
|
||||
): # replace hf path with ms path
|
||||
model_path = path_dict.get(DownloadSource.MODELSCOPE)
|
||||
|
||||
if (
|
||||
use_openmind()
|
||||
and path_dict.get(DownloadSource.OPENMIND)
|
||||
and model_path == path_dict.get(DownloadSource.DEFAULT)
|
||||
): # replace hf path with om path
|
||||
model_path = path_dict.get(DownloadSource.OPENMIND)
|
||||
|
||||
return model_path
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user