"add support for vllm api stop parameter"

Former-commit-id: b9f21fa639b66db09c79404d885661c96bdf9395
This commit is contained in:
zhaonx
2024-04-30 17:17:09 +08:00
parent 3cef844079
commit 2d95127c33
4 changed files with 12 additions and 3 deletions

View File

@@ -141,6 +141,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI":
top_p=request.top_p,
max_new_tokens=request.max_tokens,
num_return_sequences=request.n,
stop=request.stop
)
prompt_length, response_length = 0, 0
@@ -193,6 +194,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI":
temperature=request.temperature,
top_p=request.top_p,
max_new_tokens=request.max_tokens,
stop=request.stop
):
if len(new_token) == 0:
continue

View File

@@ -1,6 +1,6 @@
import time
from enum import Enum, unique
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union
from pydantic import BaseModel, Field
from typing_extensions import Literal
@@ -78,6 +78,7 @@ class ChatCompletionRequest(BaseModel):
n: int = 1
max_tokens: Optional[int] = None
stream: bool = False
stop: Union[Optional[str], List[str]] = None
class ChatCompletionResponseChoice(BaseModel):