[optim] clean apollo (#6645)

* clean apollo code

* update readme

Former-commit-id: 38b8ec4a99189483124b54df9d6bc6b0d318855a
This commit is contained in:
hoshi-hiyouga
2025-01-15 01:42:50 +08:00
committed by GitHub
parent c2120432db
commit 7638f1070e
14 changed files with 110 additions and 103 deletions

View File

@@ -286,7 +286,7 @@ class ApolloArguments:
default="random",
metadata={"help": "Type of APOLLO low-rank projection algorithm (svd or random)."},
)
apollo_proj_type: Literal["std", "right", "left",] = field(
apollo_proj_type: Literal["std", "right", "left"] = field(
default="std",
metadata={"help": "Type of APOLLO projection."},
)
@@ -475,17 +475,11 @@ class FinetuningArguments(
if self.use_llama_pro and self.finetuning_type == "full":
raise ValueError("`use_llama_pro` is only valid for Freeze or LoRA training.")
if self.finetuning_type == "lora" and (self.use_galore or self.use_badam or self.use_apollo):
raise ValueError("Cannot use LoRA with GaLore or BAdam together.")
if self.finetuning_type == "lora" and (self.use_galore or self.use_apollo or self.use_badam):
raise ValueError("Cannot use LoRA with GaLore, APOLLO or BAdam together.")
if self.use_galore and self.use_badam:
raise ValueError("Cannot use GaLore with BAdam together.")
if self.use_galore and self.use_apollo:
raise ValueError("Cannot use GaLore with APOLLO together.")
if self.use_badam and self.use_apollo:
raise ValueError("Cannot use BAdam with APOLLO together.")
if int(self.use_galore) + int(self.use_apollo) + (self.use_badam) > 1:
raise ValueError("Cannot use GaLore, APOLLO or BAdam together.")
if self.pissa_init and (self.stage in ["ppo", "kto"] or self.use_ref_model):
raise ValueError("Cannot use PiSSA for current training stage.")