1. add modelers hub support

Former-commit-id: 14678eb444d8181176745d18d4a6865fd6860f58
This commit is contained in:
huniu20
2024-10-09 17:21:37 +08:00
parent b2dc6dc59a
commit c42dcab32b
7 changed files with 92 additions and 21 deletions

View File

@@ -20,7 +20,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence
from transformers.utils import cached_file
from ..extras.constants import DATA_CONFIG
from ..extras.misc import use_modelscope
from ..extras.misc import use_openmind, use_modelscope
@dataclass
@@ -97,8 +97,13 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -
dataset_list: List["DatasetAttr"] = []
for name in dataset_names:
if dataset_info is None: # dataset_dir is ONLINE
load_from = "ms_hub" if use_modelscope() else "hf_hub"
if dataset_info is None: # dataset_dir is ONLINE
if use_openmind():
load_from = "om_hub"
if use_modelscope():
load_from = "ms_hub"
if load_from is None:
load_from = "hf_hub"
dataset_attr = DatasetAttr(load_from, dataset_name=name)
dataset_list.append(dataset_attr)
continue
@@ -106,11 +111,14 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -
if name not in dataset_info:
raise ValueError("Undefined dataset {} in {}.".format(name, DATA_CONFIG))
has_om_url = "om_hub_url" in dataset_info[name]
has_hf_url = "hf_hub_url" in dataset_info[name]
has_ms_url = "ms_hub_url" in dataset_info[name]
if has_hf_url or has_ms_url:
if (use_modelscope() and has_ms_url) or (not has_hf_url):
if has_om_url or has_hf_url or has_ms_url:
if (use_openmind() and has_om_url) or (not has_hf_url):
dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"])
elif (use_modelscope() and has_ms_url) or (not has_hf_url):
dataset_attr = DatasetAttr("ms_hub", dataset_name=dataset_info[name]["ms_hub_url"])
else:
dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"])