Merge pull request #5323 from naem1023/feat/add-dataset-map-batch-size-argument

Add batch size of map function in the preprocessed dataset

Former-commit-id: c3428c5807500d087cdee4386798e10e39c9cf30
This commit is contained in:
hoshi-hiyouga
2024-09-04 22:09:36 +08:00
committed by GitHub
2 changed files with 7 additions and 0 deletions

View File

@@ -113,6 +113,10 @@ class DataArguments:
default=None,
metadata={"help": "Path to save or load the tokenized datasets."},
)
dataset_map_batch_size: Optional[int] = field(
default=None,
metadata={"help": "Batch size for dataset mapping."},
)
def __post_init__(self):
def split_arg(arg):