fix: handle rate_limit_event crash in chat sessions

The Claude CLI sends `rate_limit_event` messages that the SDK's `parse_message()` doesn't recognize, raising `MessageParseError` and crashing all three chat session types (spec, assistant, expand). Changes: - Bump claude-agent-sdk minimum from 0.1.0 to 0.1.39 - Add `check_rate_limit_error()` helper in chat_constants.py that detects rate limits from both MessageParseError data payloads and error message text patterns - Wrap `receive_response()` loops in all three `_query_claude()` methods with retry-on-rate-limit logic (up to 3 retries with backoff) - Gracefully log and skip non-rate-limit MessageParseError instead of crashing the session - Add `rate_limited` message type to frontend TypeScript types and handle it in useSpecChat, useAssistantChat, useExpandChat hooks to show "Rate limited. Retrying in Xs..." system messages Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 10:53:09 +00:00 · 2026-02-23 12:21:49 +02:00
parent b7aef15c3b
commit dcdd06e02e
11 changed files with 362 additions and 145 deletions
--- a/server/services/assistant_chat_session.py
+++ b/server/services/assistant_chat_session.py
@@ -7,6 +7,7 @@ The assistant can answer questions about the codebase and features
 but cannot modify any files.
 """

+import asyncio
 import json
 import logging
 import os
@@ -25,7 +26,12 @@ from .assistant_database import (
    create_conversation,
    get_messages,
 )
-from .chat_constants import ROOT_DIR
+from .chat_constants import (
+    MAX_CHAT_RATE_LIMIT_RETRIES,
+    ROOT_DIR,
+    calculate_rate_limit_backoff,
+    check_rate_limit_error,
+)

 # Load environment variables from .env file if present
 load_dotenv()
@@ -393,39 +399,66 @@ class AssistantChatSession:

        full_response = ""

-        # Stream the response
-        async for msg in self.client.receive_response():
-            msg_type = type(msg).__name__
+        # Stream the response (with rate-limit retry)
+        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
+            try:
+                async for msg in self.client.receive_response():
+                    msg_type = type(msg).__name__

-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
+                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                        for block in msg.content:
+                            block_type = type(block).__name__

-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        text = block.text
-                        if text:
-                            full_response += text
-                            yield {"type": "text", "content": text}
+                            if block_type == "TextBlock" and hasattr(block, "text"):
+                                text = block.text
+                                if text:
+                                    full_response += text
+                                    yield {"type": "text", "content": text}

-                    elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                        tool_name = block.name
-                        tool_input = getattr(block, "input", {})
+                            elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                                tool_name = block.name
+                                tool_input = getattr(block, "input", {})
+
+                                # Intercept ask_user tool calls -> yield as question message
+                                if tool_name == "mcp__features__ask_user":
+                                    questions = tool_input.get("questions", [])
+                                    if questions:
+                                        yield {
+                                            "type": "question",
+                                            "questions": questions,
+                                        }
+                                        continue

-                        # Intercept ask_user tool calls -> yield as question message
-                        if tool_name == "mcp__features__ask_user":
-                            questions = tool_input.get("questions", [])
-                            if questions:
                                yield {
-                                    "type": "question",
-                                    "questions": questions,
+                                    "type": "tool_call",
+                                    "tool": tool_name,
+                                    "input": tool_input,
                                }
-                                continue
-
-                        yield {
-                            "type": "tool_call",
-                            "tool": tool_name,
-                            "input": tool_input,
-                        }
+                # Completed successfully — break out of retry loop
+                break
+            except Exception as exc:
+                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
+                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
+                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
+                    yield {
+                        "type": "rate_limited",
+                        "retry_in": delay,
+                        "attempt": _attempt + 1,
+                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
+                    }
+                    await asyncio.sleep(delay)
+                    await self.client.query(message)
+                    continue
+                if is_rate_limit:
+                    logger.error("Rate limit retries exhausted for assistant chat")
+                    yield {"type": "error", "content": "Rate limited. Please try again later."}
+                    return
+                # Non-rate-limit MessageParseError: log and break (don't crash)
+                if type(exc).__name__ == "MessageParseError":
+                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
+                    break
+                raise

        # Store the complete response in the database
        if full_response and self.conversation_id: