"add support for vllm api stop parameter"
Former-commit-id: b9f21fa639b66db09c79404d885661c96bdf9395
This commit is contained in:
@@ -141,6 +141,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI":
|
||||
top_p=request.top_p,
|
||||
max_new_tokens=request.max_tokens,
|
||||
num_return_sequences=request.n,
|
||||
stop=request.stop
|
||||
)
|
||||
|
||||
prompt_length, response_length = 0, 0
|
||||
@@ -193,6 +194,7 @@ def create_app(chat_model: "ChatModel") -> "FastAPI":
|
||||
temperature=request.temperature,
|
||||
top_p=request.top_p,
|
||||
max_new_tokens=request.max_tokens,
|
||||
stop=request.stop
|
||||
):
|
||||
if len(new_token) == 0:
|
||||
continue
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import time
|
||||
from enum import Enum, unique
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Literal
|
||||
@@ -78,6 +78,7 @@ class ChatCompletionRequest(BaseModel):
|
||||
n: int = 1
|
||||
max_tokens: Optional[int] = None
|
||||
stream: bool = False
|
||||
stop: Union[Optional[str], List[str]] = None
|
||||
|
||||
|
||||
class ChatCompletionResponseChoice(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user