[misc] fix new tokens adding (#7253)

Co-authored-by: hoshi-hiyouga <hiyouga@buaa.edu.cn>
This commit is contained in:
flashJd
2025-04-21 23:19:02 +08:00
committed by GitHub
parent c5ba9106ec
commit 0ac641326b
2 changed files with 21 additions and 4 deletions

View File

@@ -69,6 +69,10 @@ class BaseModelArguments:
default=None,
metadata={"help": "Special tokens to be added into the tokenizer. Use commas to separate multiple tokens."},
)
new_normal_tokens: Optional[str] = field(
default=None,
metadata={"help": "Normal tokens to be added into the tokenizer. Use commas to separate multiple tokens."},
)
model_revision: str = field(
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
@@ -176,6 +180,9 @@ class BaseModelArguments:
if self.adapter_name_or_path is not None: # support merging multiple lora weights
self.adapter_name_or_path = [path.strip() for path in self.adapter_name_or_path.split(",")]
if self.new_normal_tokens is not None: # support multiple normal tokens
self.new_normal_tokens = [token.strip() for token in self.new_normal_tokens.split(",")]
if self.new_special_tokens is not None: # support multiple special tokens
self.new_special_tokens = [token.strip() for token in self.new_special_tokens.split(",")]