Merge pull request #6364 from hiyouga/hiyouga/control_reenterent_gc

[model] support non-reenterent-gc

Former-commit-id: a8a13cb360980bb4acd493e33ed405e07460fe73
This commit is contained in:
hoshi-hiyouga
2024-12-17 19:58:36 +08:00
committed by GitHub
2 changed files with 7 additions and 1 deletions

View File

@@ -237,6 +237,10 @@ class ModelArguments(QuantizationArguments, ProcessorArguments, ExportArguments,
default=False,
metadata={"help": "Whether or not to disable gradient checkpointing."},
)
use_reentrant_gc: bool = field(
default=True,
metadata={"help": "Whether or not to use reentrant gradient checkpointing."},
)
upcast_layernorm: bool = field(
default=False,
metadata={"help": "Whether or not to upcast the layernorm weights in fp32."},