"add support for vllm api stop parameter"

Former-commit-id: b9f21fa639b66db09c79404d885661c96bdf9395
2024-04-30 17:17:09 +08:00
parent 3cef844079
commit 2d95127c33
4 changed files with 12 additions and 3 deletions
--- a/src/llmtuner/api/app.py
+++ b/src/llmtuner/api/app.py
@@ -141,6 +141,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI":
            top_p=request.top_p,
            max_new_tokens=request.max_tokens,
            num_return_sequences=request.n,
+            stop=request.stop
        )

        prompt_length, response_length = 0, 0
@@ -193,6 +194,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI":
            temperature=request.temperature,
            top_p=request.top_p,
            max_new_tokens=request.max_tokens,
+            stop=request.stop
        ):
            if len(new_token) == 0:
                continue
--- a/src/llmtuner/api/protocol.py
+++ b/src/llmtuner/api/protocol.py
@@ -1,6 +1,6 @@
 import time
 from enum import Enum, unique
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union

 from pydantic import BaseModel, Field
 from typing_extensions import Literal
@@ -78,6 +78,7 @@ class ChatCompletionRequest(BaseModel):
    n: int = 1
    max_tokens: Optional[int] = None
    stream: bool = False
+    stop: Union[Optional[str], List[str]] = None


 class ChatCompletionResponseChoice(BaseModel):