support interleave probs

Former-commit-id: 168d99816f9bdc746c587f7f09753ba7e0a4b19d
This commit is contained in:
hiyouga
2023-08-04 21:27:35 +08:00
parent aaa85190eb
commit 76f3ae7bf3
3 changed files with 17 additions and 7 deletions

View File

@@ -111,6 +111,6 @@ def get_dataset(
if not data_args.streaming:
logger.warning("We recommend using `mix_strategy=concat` in non-streaming mode.")
stopping_strategy = "first_exhausted" if data_args.mix_strategy.endswith("under") else "all_exhausted"
return interleave_datasets(all_datasets, stopping_strategy=stopping_strategy)
return interleave_datasets(all_datasets, data_args.interleave_probs, stopping_strategy=stopping_strategy)
else:
raise ValueError("Unknown mixing strategy.")