@@ -68,7 +68,11 @@ def create_app(chat_model: ChatModel) -> FastAPI:
|
||||
return EventSourceResponse(generate, media_type="text/event-stream")
|
||||
|
||||
response, (prompt_length, response_length) = chat_model.chat(
|
||||
query, history, system, temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens
|
||||
query, history, system,
|
||||
do_sample=request.do_sample,
|
||||
temperature=request.temperature,
|
||||
top_p=request.top_p,
|
||||
max_new_tokens=request.max_tokens
|
||||
)
|
||||
|
||||
usage = ChatCompletionResponseUsage(
|
||||
@@ -95,7 +99,11 @@ def create_app(chat_model: ChatModel) -> FastAPI:
|
||||
yield chunk.json(exclude_unset=True, ensure_ascii=False)
|
||||
|
||||
for new_text in chat_model.stream_chat(
|
||||
query, history, system, temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens
|
||||
query, history, system,
|
||||
do_sample=request.do_sample,
|
||||
temperature=request.temperature,
|
||||
top_p=request.top_p,
|
||||
max_new_tokens=request.max_tokens
|
||||
):
|
||||
if len(new_text) == 0:
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user