Optimize the handling of QWEN2 in scenarios involving multiple tool calls.

Former-commit-id: 48f870edc96ada40360f7e6e67cbf58805295b33
2024-06-10 02:00:14 +08:00
parent 44829df762
commit bc04ca464a
2 changed files with 28 additions and 19 deletions
--- a/src/llamafactory/api/chat.py
+++ b/src/llamafactory/api/chat.py
@@ -150,11 +150,14 @@ async def create_chat_completion_response(
        else:
            result = response.response_text

-        if isinstance(result, tuple):
-            name, arguments = result
-            function = Function(name=name, arguments=arguments)
-            tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function)
-            response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=[tool_call])
+        if isinstance(result, list):
+            tool_calls = []
+            for tool in result:
+                name, arguments = tool
+                function = Function(name=name, arguments=arguments)
+                tool_call = FunctionCall(id="call_{}".format(uuid.uuid4().hex), function=function)
+                tool_calls.append(tool_call)
+            response_message = ChatCompletionMessage(role=Role.ASSISTANT, tool_calls=tool_calls)
            finish_reason = Finish.TOOL
        else:
            response_message = ChatCompletionMessage(role=Role.ASSISTANT, content=result)