adapt for badam with ds zero3

Former-commit-id: fff2a020ec8713022bd8145f4a7168168ea07ca4
This commit is contained in:
Jonery
2024-06-17 18:18:10 +08:00
parent 4bd276f58f
commit ba303fd1aa
3 changed files with 28 additions and 6 deletions

View File

@@ -184,12 +184,12 @@ def get_train_args(args: Optional[Dict[str, Any]] = None) -> _TRAIN_CLS:
):
raise ValueError("Distributed training does not support layer-wise GaLore.")
if (
finetuning_args.use_badam
and finetuning_args.badam_mode == "layer"
and training_args.parallel_mode.value == "distributed"
):
raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.")
# if (
# finetuning_args.use_badam
# and finetuning_args.badam_mode == "layer"
# and training_args.parallel_mode.value == "distributed"
# ):
# raise ValueError("Layer-wise BAdam does not yet support distributed training, use ratio-wise BAdam.")
if (finetuning_args.use_galore or finetuning_args.use_badam) and training_args.deepspeed is not None:
raise ValueError("GaLore and BAdam are incompatible with DeepSpeed yet.")