[example] add bash usage (#7794)

2025-04-22 00:25:51 +08:00
parent 12ada72ed4
commit b07628dea5
13 changed files with 184 additions and 98 deletions
--- a/src/llamafactory/model/model_utils/quantization.py
+++ b/src/llamafactory/model/model_utils/quantization.py
@@ -124,6 +124,7 @@ def configure_quantization(

        try:
            from optimum.gptq import utils as gq_utils
+
            if "language_model.model.layers" not in gq_utils.BLOCK_PATTERNS:
                gq_utils.BLOCK_PATTERNS.insert(0, "language_model.model.layers")
        except ImportError:
--- a/src/llamafactory/model/patcher.py
+++ b/src/llamafactory/model/patcher.py
@@ -54,26 +54,22 @@ def patch_tokenizer(tokenizer: "PreTrainedTokenizer", model_args: "ModelArgument
    if model_args.model_max_length is not None and tokenizer.model_max_length < model_args.model_max_length:
        tokenizer.model_max_length = model_args.model_max_length  # enlarge the tokenizer max length

-    if model_args.new_special_tokens is not None:
-        num_added_special_tokens = tokenizer.add_special_tokens(
-            dict(additional_special_tokens=model_args.new_special_tokens),
-            replace_additional_special_tokens=False,
+    if model_args.add_tokens is not None:
+        num_added_tokens = tokenizer.add_tokens(new_tokens=model_args.add_tokens, special_tokens=False)
+        logger.info_rank0("Add tokens {} to tokenizer's vocabulary.".format(",".join(model_args.add_tokens)))
+        if num_added_tokens > 0 and not model_args.resize_vocab:
+            model_args.resize_vocab = True
+            logger.warning_rank0("New tokens have been added, changed `resize_vocab` to True.")
+
+    if model_args.add_special_tokens is not None:
+        num_added_special_tokens = tokenizer.add_tokens(new_tokens=model_args.add_special_tokens, special_tokens=True)
+        logger.info_rank0(
+            "Add special tokens {} to tokenizer's vocabulary.".format(",".join(model_args.add_special_tokens))
        )
-        logger.info_rank0("Add special tokens {} to vocab.".format(",".join(model_args.new_special_tokens)))
        if num_added_special_tokens > 0 and not model_args.resize_vocab:
            model_args.resize_vocab = True
            logger.warning_rank0("New special tokens have been added, changed `resize_vocab` to True.")

-    if model_args.new_normal_tokens is not None:
-        num_added_normal_tokens = tokenizer.add_tokens(
-            new_tokens=model_args.new_normal_tokens,
-            special_tokens=False,
-        )
-        logger.info_rank0("Add normal tokens {} to vocab.".format(",".join(model_args.new_normal_tokens)))
-        if num_added_normal_tokens > 0 and not model_args.resize_vocab:
-            model_args.resize_vocab = True
-            logger.warning_rank0("New normal tokens have been added, changed `resize_vocab` to True.")
-

 def patch_processor(
    processor: "ProcessorMixin",