support rope scaling, fix #475 #476 #478

Former-commit-id: 337d5f68b72230e545e7a94ca789187c7a2b7187
2023-08-12 20:46:27 +08:00
parent cde9f3db57
commit fdfb644f0a
12 changed files with 267 additions and 277 deletions
--- a/src/llmtuner/hparams/model_args.py
+++ b/src/llmtuner/hparams/model_args.py
@@ -43,9 +43,9 @@ class ModelArguments:
        default=True,
        metadata={"help": "Whether to use double quantization in int4 training or not."}
    )
-    compute_dtype: Optional[torch.dtype] = field(
+    rope_scaling: Optional[Literal["linear", "dynamic"]] = field(
        default=None,
-        metadata={"help": "Used in quantization configs. Do not specify this argument manually."}
+        metadata={"help": "Adopt scaled rotary positional embeddings."}
    )
    checkpoint_dir: Optional[str] = field(
        default=None,
@@ -63,8 +63,19 @@ class ModelArguments:
        default=None,
        metadata={"help": "Auth token to log in with Hugging Face Hub."}
    )
+    compute_dtype: Optional[torch.dtype] = field(
+        default=None,
+        metadata={"help": "Used in quantization configs. Do not specify this argument manually."}
+    )
+    model_max_length: Optional[int] = field(
+        default=None,
+        metadata={"help": "Used in rope scaling. Do not specify this argument manually."}
+    )

    def __post_init__(self):
+        if self.compute_dtype is not None or self.model_max_length is not None:
+            raise ValueError("These arguments cannot be specified.")
+
        if self.checkpoint_dir is not None: # support merging multiple lora weights
            self.checkpoint_dir = [cd.strip() for cd in self.checkpoint_dir.split(",")]