add orca_dpo_pairs dataset
Former-commit-id: af683aacbae462a2a37d76d37df583e217664bd5
This commit is contained in:
@@ -3,7 +3,7 @@ import json
|
||||
import datasets
|
||||
from typing import List
|
||||
|
||||
_HF_ENDPOINT = os.getenv("_HF_ENDPOINT", "https://huggingface.co")
|
||||
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
|
||||
|
||||
_DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dialogue Data."
|
||||
|
||||
@@ -18,9 +18,9 @@ _CITATION = """\
|
||||
}
|
||||
"""
|
||||
|
||||
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/stingning/ultrachat"
|
||||
_HOMEPAGE = "{}/datasets/stingning/ultrachat".format(_HF_ENDPOINT)
|
||||
_LICENSE = "cc-by-nc-4.0"
|
||||
_BASE_DATA_URL = "{_HF_ENDPOINT}/datasets/stingning/ultrachat/resolve/main/train_{idx}.jsonl"
|
||||
_BASE_DATA_URL = "{}/datasets/stingning/ultrachat/resolve/main/train_{{idx}}.jsonl".format(_HF_ENDPOINT)
|
||||
|
||||
|
||||
class UltraChat(datasets.GeneratorBasedBuilder):
|
||||
@@ -40,7 +40,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
|
||||
)
|
||||
|
||||
def _split_generators(self, dl_manager: datasets.DownloadManager):
|
||||
file_paths = [dl_manager.download(_BASE_DATA_URL.format(_HF_ENDPOINT=_HF_ENDPOINT,idx=idx)) for idx in range(10)] # multiple shards
|
||||
file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)] # multiple shards
|
||||
return [
|
||||
datasets.SplitGenerator(
|
||||
name=datasets.Split.TRAIN,
|
||||
|
||||
Reference in New Issue
Block a user