[data] add eval_on_each_dataset arg (#7912)

This commit is contained in:
hoshi-hiyouga
2025-04-30 06:56:43 +08:00
committed by GitHub
parent 6d2cde43e7
commit d4ee44bdef
3 changed files with 15 additions and 5 deletions

View File

@@ -99,6 +99,10 @@ class DataArguments:
default=0.0,
metadata={"help": "Size of the validation set, should be an integer or a float in range `[0,1)`."},
)
eval_on_each_dataset: bool = field(
default=False,
metadata={"help": "Whether or not to evaluate on each dataset separately."},
)
packing: Optional[bool] = field(
default=None,
metadata={"help": "Enable sequences packing in training. Will automatically enable in pre-training."},

View File

@@ -64,6 +64,7 @@ class RayArguments:
raise ValueError(
f"ray_storage_filesystem must be one of ['s3', 'gs', 'gcs'], got {self.ray_storage_filesystem}"
)
import pyarrow.fs as fs
if self.ray_storage_filesystem == "s3":