support batch infer in vllm

Former-commit-id: 3ef5ed3b9a44eed2f7e3ff221dfc343d0a97c0b5
2024-12-04 13:50:00 +00:00
parent 53edd62f8b
commit c1768cfb14
29 changed files with 148 additions and 407 deletions
--- a/examples/inference/llama3.yaml
+++ b/examples/inference/llama3.yaml
@@ -1,2 +1,3 @@
 model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 template: llama3
+infer_backend: huggingface  # choices: [huggingface, vllm]
--- a/examples/inference/llama3_lora_sft.yaml
+++ b/examples/inference/llama3_lora_sft.yaml
@@ -2,3 +2,4 @@ model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 adapter_name_or_path: saves/llama3-8b/lora/sft
 template: llama3
 finetuning_type: lora
+infer_backend: huggingface  # choices: [huggingface, vllm]
--- a/examples/inference/llava1_5.yaml
+++ b/examples/inference/llava1_5.yaml
@@ -1,2 +1,3 @@
 model_name_or_path: llava-hf/llava-1.5-7b-hf
 template: llava
+infer_backend: huggingface  # choices: [huggingface, vllm]
--- a/examples/inference/qwen2_vl.yaml
+++ b/examples/inference/qwen2_vl.yaml
@@ -1,2 +1,3 @@
 model_name_or_path: Qwen/Qwen2-VL-7B-Instruct
 template: qwen2_vl
+infer_backend: huggingface  # choices: [huggingface, vllm]