support baichuan model

Former-commit-id: d683042fbcb2ee43b9823262d0a65b64f4cb54cb
This commit is contained in:
hiyouga
2023-06-15 16:02:01 +08:00
parent 496846e819
commit 194c5d2bee
3 changed files with 10 additions and 1 deletions

View File

@@ -170,6 +170,8 @@ def load_pretrained(
**config_kwargs
)
tokenizer.pad_token_id = 0 if tokenizer.pad_token_id is None else tokenizer.pad_token_id # set as the <unk> token
if tokenizer.pad_token_id == 64000:
tokenizer.pad_token_id = 0 # for baichuan model (need fix)
config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
is_mergeable = True

View File

@@ -83,7 +83,13 @@ def prepare_model_for_training(
param.data = param.data.to(torch.float32)
if use_gradient_checkpointing:
model.enable_input_require_grads()
if hasattr(model, "enable_input_require_grads"):
model.enable_input_require_grads()
else:
def make_inputs_require_grad(module, input, output):
output.requires_grad_(True)
model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
model.gradient_checkpointing_enable()
model.config.use_cache = False # turn off when gradient checkpointing is enabled