[model] support audio (#6701)

* support qwen2_audio * improve code * lint * fix * fix * fix --------- Co-authored-by: hiyouga <hiyouga@buaa.edu.cn> Former-commit-id: 5eacb5629e4d7733cd992a63747a1335f2c6a929
2025-02-05 04:59:09 +08:00
parent 9feb78e7b4
commit 8f401e37f8
35 changed files with 675 additions and 213 deletions
--- a/src/llamafactory/hparams/data_args.py
+++ b/src/llamafactory/hparams/data_args.py
@@ -41,9 +41,9 @@ class DataArguments:
        default="data",
        metadata={"help": "Path to the folder containing the datasets."},
    )
-    image_dir: Optional[str] = field(
+    media_dir: Optional[str] = field(
        default=None,
-        metadata={"help": "Path to the folder containing the images or videos. Defaults to `dataset_dir`."},
+        metadata={"help": "Path to the folder containing the images, videos or audios. Defaults to `dataset_dir`."},
    )
    cutoff_len: int = field(
        default=2048,
@@ -133,8 +133,8 @@ class DataArguments:
        self.dataset = split_arg(self.dataset)
        self.eval_dataset = split_arg(self.eval_dataset)

-        if self.image_dir is None:
-            self.image_dir = self.dataset_dir
+        if self.media_dir is None:
+            self.media_dir = self.dataset_dir

        if self.dataset is None and self.val_size > 1e-6:
            raise ValueError("Cannot specify `val_size` if `dataset` is None.")