[misc] fix grad ckpt func (#6916)

Former-commit-id: 35e069a52b3d7cfd9b0107574b09265eb2290f0b
This commit is contained in:
hoshi-hiyouga
2025-02-13 00:17:18 +08:00
committed by GitHub
parent 0c0cdc26bc
commit 3a3f4072e5
3 changed files with 17 additions and 13 deletions

View File

@@ -89,6 +89,7 @@ def get_custom_gradient_checkpointing_func(gradient_checkpointing_func: Callable
for arg in args:
if torch.is_tensor(arg) and torch.is_floating_point(arg):
arg.requires_grad_(True)
break # assume the first tensor is always the hidden states
return gradient_checkpointing_func(func, *args, **kwargs)