Merge branch 'hiyouga:main' into main

Former-commit-id: 131d0bcd554dedd794add7eb3d7b1201cac80e7c
This commit is contained in:
BUAADreamer
2024-04-25 20:02:50 +08:00
committed by GitHub
8 changed files with 58 additions and 23 deletions

View File

@@ -78,9 +78,9 @@ def split_dataset(
if training_args.do_train:
if data_args.val_size > 1e-6: # Split the dataset
if data_args.streaming:
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
val_set = dataset.take(int(data_args.val_size))
train_set = dataset.skip(int(data_args.val_size))
dataset = dataset.shuffle(buffer_size=data_args.buffer_size, seed=training_args.seed)
return {"train_dataset": train_set, "eval_dataset": val_set}
else:
val_size = int(data_args.val_size) if data_args.val_size > 1 else data_args.val_size