support interleave probs
Former-commit-id: 168d99816f9bdc746c587f7f09753ba7e0a4b19d
This commit is contained in:
@@ -54,6 +54,10 @@ class DataArguments:
|
||||
default="concat",
|
||||
metadata={"help": "Strategy to use in dataset mixing."}
|
||||
)
|
||||
interleave_probs: Optional[str] = field(
|
||||
default=None,
|
||||
metadata={"help": "Probabilities to sample data from datasets. Use commas to separate multiple datasets."}
|
||||
)
|
||||
overwrite_cache: Optional[bool] = field(
|
||||
default=False,
|
||||
metadata={"help": "Overwrite the cached training and evaluation sets."}
|
||||
@@ -103,6 +107,9 @@ class DataArguments:
|
||||
else:
|
||||
prefix_list = [None] * len(dataset_names)
|
||||
|
||||
if self.interleave_probs is not None:
|
||||
self.interleave_probs = [float(prob.strip()) for prob in self.interleave_probs.split(",")]
|
||||
|
||||
self.dataset_list: List[DatasetAttr] = []
|
||||
for i, name in enumerate(dataset_names):
|
||||
if name not in dataset_info:
|
||||
|
||||
Reference in New Issue
Block a user