[data] support for specifying a dataset in cloud storage (#7567)
* add support for loading datasets from s3/gcs * add comments to readme * run linter and address comments * add option to pass in kwargs to ray init (i.e. runtime env) * address comment * revert mixed up changes
This commit is contained in:
@@ -141,6 +141,8 @@ def get_dataset_list(dataset_names: Optional[list[str]], dataset_dir: str) -> li
|
||||
dataset_attr = DatasetAttr("hf_hub", dataset_name=dataset_info[name]["hf_hub_url"])
|
||||
elif "script_url" in dataset_info[name]:
|
||||
dataset_attr = DatasetAttr("script", dataset_name=dataset_info[name]["script_url"])
|
||||
elif "cloud_file_name" in dataset_info[name]:
|
||||
dataset_attr = DatasetAttr("cloud_file", dataset_name=dataset_info[name]["cloud_file_name"])
|
||||
else:
|
||||
dataset_attr = DatasetAttr("file", dataset_name=dataset_info[name]["file_name"])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user