Fix #294

Former-commit-id: 09762d9849655f5e6c71b9472d55b42489dd944b
2023-08-01 18:13:03 +08:00
parent cb4d1d5ebb
commit 250fecfcd4
2 changed files with 24 additions and 10 deletions
--- a/src/llmtuner/extras/misc.py
+++ b/src/llmtuner/extras/misc.py
@@ -117,3 +117,25 @@ def torch_gc() -> None:
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()
+
+
+def dispatch_model(model: "PreTrainedModel") -> "PreTrainedModel":
+    r"""
+    Dispatches a pre-trained model to GPUs with balanced memory.
+    Borrowed from: https://github.com/huggingface/transformers/blob/v4.31.0/src/transformers/modeling_utils.py#L2803
+    """
+    if torch.cuda.device_count() > 1:
+        from accelerate import dispatch_model
+        from accelerate.utils import infer_auto_device_map, get_balanced_memory
+
+        if model._no_split_modules is None:
+            raise ValueError("The model class needs to implement the `_no_split_modules` attribute.")
+
+        kwargs = {"dtype": model.dtype, "no_split_module_classes": model._no_split_modules}
+        max_memory = get_balanced_memory(model, **kwargs)
+        # Make sure tied weights are tied before creating the device map.
+        model.tie_weights()
+        device_map = infer_auto_device_map(model, max_memory=max_memory, **kwargs)
+        return dispatch_model(model, device_map)
+    else:
+        return model.cuda()