add readme for dataset

Former-commit-id: bdcb0ea40e726e4c5752f938b379ed9a18e7e1d0
2023-08-23 19:55:45 +08:00
parent cbc7db3478
commit a6662b73f5
2 changed files with 8 additions and 4 deletions
--- a/data/README.md
+++ b/data/README.md
@@ -11,7 +11,8 @@ If you are using a custom dataset, please provide your dataset definition in the
    "query": "the name of the column in the datasets containing the queries. (default: input)",
    "response": "the name of the column in the datasets containing the responses. (default: output)",
    "history": "the name of the column in the datasets containing the history of chat. (default: None)"
-  }
+  },
+  "stage": "The stage at which the data is being used: pt, sft, and rm, which correspond to pre-training, supervised fine-tuning(PPO), and reward model (DPO) training, respectively.(default: None)"
 }
 ```

@@ -26,6 +27,7 @@ For datasets used in reward modeling or DPO training, the `response` column shou
  "output": [
    "Chosen answer",
    "Rejected answer"
-  ]
+  ],
+  "stage": "rm"
 }
 ```