refactor dataset_attr, add eos in pt, fix #757

Former-commit-id: 0feec9a830b917b36686b61938a66e842eccf930
2023-09-01 19:00:45 +08:00
parent 93be211f80
commit e5b72c6a77
19 changed files with 108 additions and 126 deletions
--- a/data/README.md
+++ b/data/README.md
@@ -6,13 +6,13 @@ If you are using a custom dataset, please provide your dataset definition in the
  "script_url": "the name of the directory containing a dataset loading script. (if specified, ignore below 2 arguments)",
  "file_name": "the name of the dataset file in the this directory. (required if above are not specified)",
  "file_sha1": "the SHA-1 hash value of the dataset file. (optional)",
+  "ranking": "whether the examples contains ranked responses or not. (default: false)",
  "columns": {
    "prompt": "the name of the column in the datasets containing the prompts. (default: instruction)",
    "query": "the name of the column in the datasets containing the queries. (default: input)",
    "response": "the name of the column in the datasets containing the responses. (default: output)",
    "history": "the name of the column in the datasets containing the history of chat. (default: None)"
-  },
-  "stage": "The stage at which the data is being used: pt, sft, and rm, which correspond to pre-training, supervised fine-tuning(PPO), and reward model (DPO) training, respectively.(default: None)"
+  }
 }
 ```

@@ -27,7 +27,6 @@ For datasets used in reward modeling or DPO training, the `response` column shou
  "output": [
    "Chosen answer",
    "Rejected answer"
-  ],
-  "stage": "rm"
+  ]
 }
 ```
--- a/data/README_zh.md
+++ b/data/README_zh.md
@@ -6,19 +6,19 @@
  "script_url": "包含数据加载脚本的本地文件夹名称（若指定，则忽略下列两个参数）",
  "file_name": "该目录下数据集文件的名称（若上述参数未指定，则此项必需）",
  "file_sha1": "数据集文件的SHA-1哈希值（可选）",
+  "ranking": "数据集是否包含排序后的回答（默认：false）",
  "columns": {
    "prompt": "数据集代表提示词的表头名称（默认：instruction）",
    "query": "数据集代表请求的表头名称（默认：input）",
    "response": "数据集代表回答的表头名称（默认：output）",
    "history": "数据集代表历史对话的表头名称（默认：None）"
-  },
-  "stage": "数据所应用的训练阶段，可选值有 pt, sft, rm 三个，对应预训练，指令监督微调(PPO)，奖励模型(DPO)训练, 默认为None，表示不限制"
+  }
 }
 ```

 其中 `prompt` 和 `response` 列应当是非空的字符串。`query` 列的内容将会和 `prompt` 列拼接作为模型输入。`history` 列应当是一个列表，其中每个元素是一个字符串二元组，分别代表用户请求和模型答复。

-对于奖励模型或 DPO 训练的数据集，`response` 列应当是一个字符串列表，排在前面的代表更优的答案，例如：
+对于训练奖励模型或 DPO 训练的数据集，`response` 列应当是一个字符串列表，排在前面的代表更优的答案，例如：

 ```json
 {
@@ -27,7 +27,6 @@
  "output": [
    "Chosen answer",
    "Rejected answer"
-  ],
-  "stage": "rm"
+  ]
 }
 ```