improve rlhf

Former-commit-id: e441780e3db256ca09a442ea9254e7ce16898a07
This commit is contained in:
hiyouga
2024-07-02 22:23:08 +08:00
parent f0b01803ea
commit e6ba7ef3e6
8 changed files with 55 additions and 114 deletions

View File

@@ -31,31 +31,31 @@ class DatasetAttr:
Dataset attributes.
"""
""" basic configs """
# basic configs
load_from: Literal["hf_hub", "ms_hub", "script", "file"]
dataset_name: str
formatting: Literal["alpaca", "sharegpt"] = "alpaca"
ranking: bool = False
""" extra configs """
# extra configs
subset: Optional[str] = None
folder: Optional[str] = None
num_samples: Optional[int] = None
""" common columns """
# common columns
system: Optional[str] = None
tools: Optional[str] = None
images: Optional[str] = None
""" rlhf columns """
# rlhf columns
chosen: Optional[str] = None
rejected: Optional[str] = None
kto_tag: Optional[str] = None
""" alpaca columns """
# alpaca columns
prompt: Optional[str] = "instruction"
query: Optional[str] = "input"
response: Optional[str] = "output"
history: Optional[str] = None
""" sharegpt columns """
# sharegpt columns
messages: Optional[str] = "conversations"
""" sharegpt tags """
# sharegpt tags
role_tag: Optional[str] = "from"
content_tag: Optional[str] = "value"
user_tag: Optional[str] = "human"