add dpo mix dataset

Former-commit-id: 6def3f8bfa51b2d9d73af112352ce07db972e4c9
2024-04-20 01:31:38 +08:00
parent b3b5b530d1
commit 0cb596fee1
4 changed files with 59 additions and 105 deletions
--- a/data/hh_rlhf_en/hh_rlhf_en.py
+++ b/data/hh_rlhf_en/hh_rlhf_en.py
@@ -1,8 +1,10 @@
-import os
 import json
-import datasets
+import os
 from typing import List

+import datasets
+
+
 _HF_ENDPOINT = os.getenv("HF_ENDPOINT", "https://huggingface.co")
 _DESCRIPTION = "Human preference data about helpfulness and harmlessness."
 _CITATION = ""
@@ -14,50 +16,37 @@ _URLS = {
        _URL + "harmless-base/train.jsonl.gz",
        _URL + "helpful-base/train.jsonl.gz",
        _URL + "helpful-online/train.jsonl.gz",
-        _URL + "helpful-rejection-sampled/train.jsonl.gz"
+        _URL + "helpful-rejection-sampled/train.jsonl.gz",
    ],
    "test": [
        _URL + "harmless-base/test.jsonl.gz",
        _URL + "helpful-base/test.jsonl.gz",
        _URL + "helpful-online/test.jsonl.gz",
-        _URL + "helpful-rejection-sampled/test.jsonl.gz"
-    ]
+        _URL + "helpful-rejection-sampled/test.jsonl.gz",
+    ],
 }


 class HhRlhfEn(datasets.GeneratorBasedBuilder):
-
    VERSION = datasets.Version("0.0.0")

    def _info(self) -> datasets.DatasetInfo:
-        features = datasets.Features({
-            "instruction": datasets.Value("string"),
-            "output": datasets.Sequence(datasets.Value("string")),
-            "history": datasets.Sequence(datasets.Sequence(datasets.Value("string")))
-        })
+        features = datasets.Features(
+            {
+                "instruction": datasets.Value("string"),
+                "output": datasets.Sequence(datasets.Value("string")),
+                "history": datasets.Sequence(datasets.Sequence(datasets.Value("string"))),
+            }
+        )
        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=features,
-            homepage=_HOMEPAGE,
-            license=_LICENSE,
-            citation=_CITATION
+            description=_DESCRIPTION, features=features, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION
        )

    def _split_generators(self, dl_manager: datasets.DownloadManager):
        file_path = dl_manager.download_and_extract(_URLS)
        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={
-                    "filepaths": file_path["train"]
-                }
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST,
-                gen_kwargs={
-                    "filepaths": file_path["test"]
-                }
-            )
+            datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_path["train"]}),
+            datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepaths": file_path["test"]}),
        ]

    def _generate_examples(self, filepaths: List[str]):
@@ -70,12 +59,12 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
                    rejected = data["rejected"]

                    assist_idx = rejected.rfind("\n\nAssistant: ")
-                    r_reject = rejected[assist_idx+13:].strip()
+                    r_reject = rejected[assist_idx + 13 :].strip()
                    assist_idx = chosen.rfind("\n\nAssistant: ")
-                    r_accept = chosen[assist_idx+13:].strip()
+                    r_accept = chosen[assist_idx + 13 :].strip()

                    human_idx = chosen.rfind("\n\nHuman: ")
-                    query = chosen[human_idx+9:assist_idx].strip()
+                    query = chosen[human_idx + 9 : assist_idx].strip()
                    prompt = chosen[:human_idx]
                    history = []

@@ -83,16 +72,12 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
                        assist_idx = prompt.rfind("\n\nAssistant: ")
                        human_idx = prompt.rfind("\n\nHuman: ")
                        if human_idx != -1:
-                            old_query = prompt[human_idx+9:assist_idx].strip()
-                            old_resp = prompt[assist_idx+13:].strip()
+                            old_query = prompt[human_idx + 9 : assist_idx].strip()
+                            old_resp = prompt[assist_idx + 13 :].strip()
                            history.insert(0, (old_query, old_resp))
                        else:
                            break
                        prompt = prompt[:human_idx]

-                    yield key, {
-                        "instruction": query,
-                        "output": [r_accept, r_reject],
-                        "history": history
-                    }
+                    yield key, {"instruction": query, "output": [r_accept, r_reject], "history": history}
                    key += 1