tiny fix
Former-commit-id: 1fe424323b212094856f423351dc2a15774d39c3
This commit is contained in:
@@ -20,7 +20,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence
|
||||
from transformers.utils import cached_file
|
||||
|
||||
from ..extras.constants import DATA_CONFIG
|
||||
from ..extras.misc import use_openmind, use_modelscope
|
||||
from ..extras.misc import use_modelscope, use_openmind
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -30,7 +30,7 @@ class DatasetAttr:
|
||||
"""
|
||||
|
||||
# basic configs
|
||||
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
|
||||
load_from: Literal["hf_hub", "ms_hub", "om_hub", "script", "file"]
|
||||
dataset_name: str
|
||||
formatting: Literal["alpaca", "sharegpt"] = "alpaca"
|
||||
ranking: bool = False
|
||||
@@ -97,11 +97,11 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -
|
||||
|
||||
dataset_list: List["DatasetAttr"] = []
|
||||
for name in dataset_names:
|
||||
if dataset_info is None: # dataset_dir is ONLINE
|
||||
if use_openmind():
|
||||
load_from = "om_hub"
|
||||
elif use_modelscope():
|
||||
if dataset_info is None: # dataset_dir is ONLINE
|
||||
if use_modelscope():
|
||||
load_from = "ms_hub"
|
||||
elif use_openmind():
|
||||
load_from = "om_hub"
|
||||
else:
|
||||
load_from = "hf_hub"
|
||||
dataset_attr = DatasetAttr(load_from, dataset_name=name)
|
||||
@@ -111,15 +111,15 @@ def get_dataset_list(dataset_names: Optional[Sequence[str]], dataset_dir: str) -
|
||||
if name not in dataset_info:
|
||||
raise ValueError("Undefined dataset {} in {}.".format(name, DATA_CONFIG))
|
||||
|
||||
has_om_url = "om_hub_url" in dataset_info[name]
|
||||
has_hf_url = "hf_hub_url" in dataset_info[name]
|
||||
has_ms_url = "ms_hub_url" in dataset_info[name]
|
||||
has_om_url = "om_hub_url" in dataset_info[name]
|
||||
|
||||
if has_om_url or has_hf_url or has_ms_url:
|
||||
if has_om_url and (use_openmind() or not has_hf_url):
|
||||
dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"])
|
||||
if has_hf_url or has_ms_url or has_om_url:
|
||||
if has_ms_url and (use_modelscope() or not has_hf_url):
|
||||
dataset_attr = DatasetAttr("ms_hub", dataset_name=dataset_info[name]["ms_hub_url"])
|
||||
elif has_om_url and (use_openmind() or not has_hf_url):
|
||||
dataset_attr = DatasetAttr("om_hub", dataset_name=dataset_info[name]["om_hub_url"])
|
||||
else:
|
||||
dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"])
|
||||
elif "script_url" in dataset_info[name]:
|
||||
|
||||
Reference in New Issue
Block a user