Merge branch 'hiyouga:main' into pixtral-patch

Former-commit-id: 2076d00dfbe1279a91207157fd6d9a118427626a
This commit is contained in:
Kingsley
2024-10-08 21:04:08 +08:00
committed by GitHub
13 changed files with 27 additions and 17 deletions

View File

@@ -82,6 +82,8 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
padding_side="right",
**init_kwargs,
)
except Exception as e:
raise OSError("Failed to load tokenizer.") from e
if model_args.new_special_tokens is not None:
num_added_tokens = tokenizer.add_special_tokens(
@@ -97,12 +99,13 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule":
try:
processor = AutoProcessor.from_pretrained(model_args.model_name_or_path, **init_kwargs)
patch_processor(processor, config, tokenizer, model_args)
except Exception:
except Exception as e:
logger.warning("Processor was not found: {}.".format(e))
processor = None
# Avoid load tokenizer, see:
# https://github.com/huggingface/transformers/blob/v4.40.0/src/transformers/models/auto/processing_auto.py#L324
if "Processor" not in processor.__class__.__name__:
if processor is not None and "Processor" not in processor.__class__.__name__:
processor = None
return {"tokenizer": tokenizer, "processor": processor}