fix #1032

Former-commit-id: 1235b2da5a79ffefd1342054ea8e7dabf47398c1
2023-09-27 22:42:16 +08:00
parent 386d85ae72
commit e8a375c8f2
2 changed files with 11 additions and 2 deletions
--- a/src/llmtuner/api/app.py
+++ b/src/llmtuner/api/app.py
@@ -68,7 +68,11 @@ def create_app(chat_model: ChatModel) -> FastAPI:
            return EventSourceResponse(generate, media_type="text/event-stream")

        response, (prompt_length, response_length) = chat_model.chat(
-            query, history, system, temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens
+            query, history, system,
+            do_sample=request.do_sample,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            max_new_tokens=request.max_tokens
        )

        usage = ChatCompletionResponseUsage(
@@ -95,7 +99,11 @@ def create_app(chat_model: ChatModel) -> FastAPI:
        yield chunk.json(exclude_unset=True, ensure_ascii=False)

        for new_text in chat_model.stream_chat(
-            query, history, system, temperature=request.temperature, top_p=request.top_p, max_new_tokens=request.max_tokens
+            query, history, system,
+            do_sample=request.do_sample,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            max_new_tokens=request.max_tokens
        ):
            if len(new_text) == 0:
                continue