improve aligner

Former-commit-id: cc7296b92e10c24967fc753393275b71d300683f
This commit is contained in:
hiyouga
2024-02-10 16:39:19 +08:00
parent a41fa6e730
commit 1955a8ea5a
10 changed files with 80 additions and 64 deletions

View File

@@ -11,7 +11,7 @@ If you are using a custom dataset, please provide your dataset definition in the
"folder": "the name of the folder of the dataset repository on the Hugging Face hub. (optional, default: None)",
"ranking": "whether the dataset is a preference dataset or not. (default: false)",
"formatting": "the format of the dataset. (optional, default: alpaca, can be chosen from {alpaca, sharegpt})",
"columns": {
"columns (optional)": {
"prompt": "the column name in the dataset containing the prompts. (default: instruction)",
"query": "the column name in the dataset containing the queries. (default: input)",
"response": "the column name in the dataset containing the responses. (default: output)",
@@ -20,14 +20,14 @@ If you are using a custom dataset, please provide your dataset definition in the
"system": "the column name in the dataset containing the system prompts. (default: None)",
"tools": "the column name in the dataset containing the tool description. (default: None)"
},
"tags": {
"tags (optional, used for the sharegpt format)": {
"role_tag": "the key in the message represents the identity. (default: from)",
"content_tag": "the key in the message represents the content. (default: value)",
"user_tag": "the value of the role_tag represents the user. (default: human)",
"assistant_tag": "the value of the role_tag represents the assistant. (default: gpt)",
"observation_tag": "the value of the role_tag represents the tool results. (default: observation)",
"function_tag": "the value of the role_tag represents the function call. (default: function_call)",
"system_tag": "the value of the role_tag represents the system prompt. (default: None) incompatible with system column"
"system_tag": "the value of the role_tag represents the system prompt. (default: system, can override system column)"
}
}
```

View File

@@ -11,7 +11,7 @@
"folder": "Hugging Face 仓库的文件夹名称可选默认None",
"ranking": "是否为偏好数据集可选默认False",
"formatting": "数据集格式可选默认alpaca可以为 alpaca 或 sharegpt",
"columns": {
"columns(可选)": {
"prompt": "数据集代表提示词的表头名称默认instruction",
"query": "数据集代表请求的表头名称默认input",
"response": "数据集代表回答的表头名称默认output",
@@ -20,13 +20,14 @@
"system": "数据集代表系统提示的表头名称默认None",
"tools": "数据集代表工具描述的表头名称默认None"
},
"tags": {
"tags(可选,用于 sharegpt 格式)": {
"role_tag": "消息中代表发送者身份的键名默认from",
"content_tag": "消息中代表文本内容的键名默认value",
"user_tag": "消息中代表用户的 role_tag默认human",
"assistant_tag": "消息中代表助手的 role_tag默认gpt",
"observation_tag": "消息中代表工具返回结果的 role_tag默认observation",
"function_tag": "消息中代表工具调用的 role_tag默认function_call"
"function_tag": "消息中代表工具调用的 role_tag默认function_call",
"system_tag": "消息中代表系统提示的 role_tag默认system会覆盖 system 列)"
}
}
```