[model] support audio (#6701)

* support qwen2_audio

* improve code

* lint

* fix

* fix

* fix

---------

Co-authored-by: hiyouga <hiyouga@buaa.edu.cn>
Former-commit-id: 5eacb5629e4d7733cd992a63747a1335f2c6a929
This commit is contained in:
Zhangchi Feng
2025-02-05 04:59:09 +08:00
committed by GitHub
parent 9feb78e7b4
commit 8f401e37f8
35 changed files with 675 additions and 213 deletions

View File

@@ -26,9 +26,9 @@ from ..extras import logging
from ..extras.constants import (
DATA_CONFIG,
DEFAULT_TEMPLATE,
MULTIMODAL_SUPPORTED_MODELS,
SUPPORTED_MODELS,
TRAINING_ARGS,
VISION_MODELS,
DownloadSource,
)
from ..extras.misc import use_modelscope, use_openmind
@@ -136,13 +136,6 @@ def get_template(model_name: str) -> str:
return DEFAULT_TEMPLATE.get(model_name, "default")
def get_visual(model_name: str) -> bool:
r"""
Judges if the model is a vision language model.
"""
return model_name in VISION_MODELS
def get_time() -> str:
r"""
Gets current date and time.
@@ -150,6 +143,13 @@ def get_time() -> str:
return datetime.now().strftime(r"%Y-%m-%d-%H-%M-%S")
def is_multimodal(model_name: str) -> bool:
r"""
Judges if the model is a vision language model.
"""
return model_name in MULTIMODAL_SUPPORTED_MODELS
def load_dataset_info(dataset_dir: str) -> Dict[str, Dict[str, Any]]:
r"""
Loads dataset_info.json.