[data] qwen3 fixes (#8109)
This commit is contained in:
@@ -50,7 +50,9 @@ Currently we support datasets in **alpaca** and **sharegpt** format.
|
||||
|
||||
* [Example dataset](alpaca_en_demo.json)
|
||||
|
||||
In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the human prompt, then the human prompt would be `instruction\ninput`. The `output` column represents the model response.
|
||||
In supervised fine-tuning, the `instruction` column will be concatenated with the `input` column and used as the user prompt, then the user prompt would be `instruction\ninput`. The `output` column represents the model response.
|
||||
|
||||
For reasoning models, if the dataset contains chain-of-thought (CoT), the CoT needs to be placed in the model responses, such as `<think>cot</think>output`.
|
||||
|
||||
The `system` column will be used as the system prompt if specified.
|
||||
|
||||
@@ -59,13 +61,13 @@ The `history` column is a list consisting of string tuples representing prompt-r
|
||||
```json
|
||||
[
|
||||
{
|
||||
"instruction": "human instruction (required)",
|
||||
"input": "human input (optional)",
|
||||
"instruction": "user instruction (required)",
|
||||
"input": "user input (optional)",
|
||||
"output": "model response (required)",
|
||||
"system": "system prompt (optional)",
|
||||
"history": [
|
||||
["human instruction in the first round (optional)", "model response in the first round (optional)"],
|
||||
["human instruction in the second round (optional)", "model response in the second round (optional)"]
|
||||
["user instruction in the first round (optional)", "model response in the first round (optional)"],
|
||||
["user instruction in the second round (optional)", "model response in the second round (optional)"]
|
||||
]
|
||||
}
|
||||
]
|
||||
@@ -86,6 +88,9 @@ Regarding the above dataset, the *dataset description* in `dataset_info.json` sh
|
||||
}
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> If the model has reasoning capabilities but the dataset does not contain chain-of-thought (CoT), LLaMA-Factory will automatically add empty CoT to the data. When `enable_thinking` is `True`, the empty CoT will be added to the model responses and loss computation will be considered; otherwise, it will be added to the user prompts and loss computation will be ignored. Please keep the `enable_thinking` parameter consistent during training and inference.
|
||||
|
||||
### Pre-training Dataset
|
||||
|
||||
- [Example dataset](c4_demo.jsonl)
|
||||
@@ -119,8 +124,8 @@ It requires a better response in `chosen` column and a worse response in `reject
|
||||
```json
|
||||
[
|
||||
{
|
||||
"instruction": "human instruction (required)",
|
||||
"input": "human input (optional)",
|
||||
"instruction": "user instruction (required)",
|
||||
"input": "user input (optional)",
|
||||
"chosen": "chosen answer (required)",
|
||||
"rejected": "rejected answer (required)"
|
||||
}
|
||||
@@ -174,7 +179,7 @@ Note that the human and observation should appear in odd positions, while gpt an
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "human instruction"
|
||||
"value": "user instruction"
|
||||
},
|
||||
{
|
||||
"from": "function_call",
|
||||
@@ -225,7 +230,7 @@ Preference datasets in sharegpt format also require a better message in `chosen`
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "human instruction"
|
||||
"value": "user instruction"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -233,7 +238,7 @@ Preference datasets in sharegpt format also require a better message in `chosen`
|
||||
},
|
||||
{
|
||||
"from": "human",
|
||||
"value": "human instruction"
|
||||
"value": "user instruction"
|
||||
}
|
||||
],
|
||||
"chosen": {
|
||||
@@ -275,7 +280,7 @@ KTO datasets require a extra `kto_tag` column containing the boolean human feedb
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "human instruction"
|
||||
"value": "user instruction"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -314,7 +319,7 @@ The number of images should be identical to the `<image>` tokens in the conversa
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "<image>human instruction"
|
||||
"value": "<image>user instruction"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -355,7 +360,7 @@ The number of videos should be identical to the `<video>` tokens in the conversa
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "<video>human instruction"
|
||||
"value": "<video>user instruction"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -396,7 +401,7 @@ The number of audios should be identical to the `<audio>` tokens in the conversa
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "<audio>human instruction"
|
||||
"value": "<audio>user instruction"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -437,7 +442,7 @@ The openai format is simply a special case of the sharegpt format, where the fir
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "human instruction"
|
||||
"content": "user instruction"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
|
||||
@@ -49,7 +49,9 @@
|
||||
|
||||
- [样例数据集](alpaca_zh_demo.json)
|
||||
|
||||
在指令监督微调时,`instruction` 列对应的内容会与 `input` 列对应的内容拼接后作为人类指令,即人类指令为 `instruction\ninput`。而 `output` 列对应的内容为模型回答。
|
||||
在指令监督微调时,`instruction` 列对应的内容会与 `input` 列对应的内容拼接后作为提示词,即提示词为 `instruction\ninput`。而 `output` 列对应的内容为模型回答。
|
||||
|
||||
对于推理类模型的微调,如果数据集包含思维链,则需要把思维链放在模型回答中,例如 `<think>cot</think>output`。
|
||||
|
||||
如果指定,`system` 列对应的内容将被作为系统提示词。
|
||||
|
||||
@@ -58,8 +60,8 @@
|
||||
```json
|
||||
[
|
||||
{
|
||||
"instruction": "人类指令(必填)",
|
||||
"input": "人类输入(选填)",
|
||||
"instruction": "用户指令(必填)",
|
||||
"input": "用户输入(选填)",
|
||||
"output": "模型回答(必填)",
|
||||
"system": "系统提示词(选填)",
|
||||
"history": [
|
||||
@@ -85,6 +87,9 @@
|
||||
}
|
||||
```
|
||||
|
||||
> [!TIP]
|
||||
> 如果模型本身具备推理能力,而数据集不包含思维链,LLaMA-Factory 会自动为数据添加空思维链。当 `enable_thinking` 为 `True` 时,空思维链会添加到模型回答中并且计算损失,否则会添加到用户指令中并且不计算损失。请在训练和推理时保持 `enable_thinking` 参数一致。
|
||||
|
||||
### 预训练数据集
|
||||
|
||||
- [样例数据集](c4_demo.jsonl)
|
||||
@@ -118,8 +123,8 @@
|
||||
```json
|
||||
[
|
||||
{
|
||||
"instruction": "人类指令(必填)",
|
||||
"input": "人类输入(选填)",
|
||||
"instruction": "用户指令(必填)",
|
||||
"input": "用户输入(选填)",
|
||||
"chosen": "优质回答(必填)",
|
||||
"rejected": "劣质回答(必填)"
|
||||
}
|
||||
@@ -173,7 +178,7 @@ KTO 数据集需要提供额外的 `kto_tag` 列。详情请参阅 [sharegpt](#s
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "人类指令"
|
||||
"value": "用户指令"
|
||||
},
|
||||
{
|
||||
"from": "function_call",
|
||||
@@ -224,7 +229,7 @@ Sharegpt 格式的偏好数据集同样需要在 `chosen` 列中提供更优的
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "人类指令"
|
||||
"value": "用户指令"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -232,7 +237,7 @@ Sharegpt 格式的偏好数据集同样需要在 `chosen` 列中提供更优的
|
||||
},
|
||||
{
|
||||
"from": "human",
|
||||
"value": "人类指令"
|
||||
"value": "用户指令"
|
||||
}
|
||||
],
|
||||
"chosen": {
|
||||
@@ -274,7 +279,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "人类指令"
|
||||
"value": "用户指令"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -313,7 +318,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "<image>人类指令"
|
||||
"value": "<image><image>用户指令"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -321,6 +326,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
|
||||
}
|
||||
],
|
||||
"images": [
|
||||
"图像路径(必填)",
|
||||
"图像路径(必填)"
|
||||
]
|
||||
}
|
||||
@@ -354,7 +360,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "<video>人类指令"
|
||||
"value": "<video><video>用户指令"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -362,6 +368,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
|
||||
}
|
||||
],
|
||||
"videos": [
|
||||
"视频路径(必填)",
|
||||
"视频路径(必填)"
|
||||
]
|
||||
}
|
||||
@@ -395,7 +402,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
|
||||
"conversations": [
|
||||
{
|
||||
"from": "human",
|
||||
"value": "<audio>人类指令"
|
||||
"value": "<audio><audio>用户指令"
|
||||
},
|
||||
{
|
||||
"from": "gpt",
|
||||
@@ -403,6 +410,7 @@ KTO 数据集需要额外添加一个 `kto_tag` 列,包含 bool 类型的人
|
||||
}
|
||||
],
|
||||
"audios": [
|
||||
"音频路径(必填)",
|
||||
"音频路径(必填)"
|
||||
]
|
||||
}
|
||||
@@ -437,7 +445,7 @@ OpenAI 格式仅仅是 sharegpt 格式的一种特殊情况,其中第一条消
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "人类指令"
|
||||
"content": "用户指令"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
|
||||
Reference in New Issue
Block a user