add orca_dpo_pairs dataset

Former-commit-id: af683aacbae462a2a37d76d37df583e217664bd5
This commit is contained in:
hiyouga
2024-03-20 20:09:06 +08:00
parent 04884a0911
commit 5ed234ca63
4 changed files with 11 additions and 12 deletions

View File

@@ -3,15 +3,12 @@ import json
import datasets
from typing import List
_HF_ENDPOINT = os.getenv("_HF_ENDPOINT", "https://huggingface.co")
_HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
_DESCRIPTION = "Human preference data about helpfulness and harmlessness."
_CITATION = ""
_HOMEPAGE = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf"
_HOMEPAGE = "{}/datasets/Anthropic/hh-rlhf".format(_HF_ENDPOINT)
_LICENSE = "mit"
_URL = f"{_HF_ENDPOINT}/datasets/Anthropic/hh-rlhf/resolve/main/"
_URL = "{}/datasets/Anthropic/hh-rlhf/resolve/main/".format(_HF_ENDPOINT)
_URLS = {
"train": [
_URL + "harmless-base/train.jsonl.gz",