fix: handle rate_limit_event crash in chat sessions

The Claude CLI sends `rate_limit_event` messages that the SDK's `parse_message()` doesn't recognize, raising `MessageParseError` and crashing all three chat session types (spec, assistant, expand). Changes: - Bump claude-agent-sdk minimum from 0.1.0 to 0.1.39 - Add `check_rate_limit_error()` helper in chat_constants.py that detects rate limits from both MessageParseError data payloads and error message text patterns - Wrap `receive_response()` loops in all three `_query_claude()` methods with retry-on-rate-limit logic (up to 3 retries with backoff) - Gracefully log and skip non-rate-limit MessageParseError instead of crashing the session - Add `rate_limited` message type to frontend TypeScript types and handle it in useSpecChat, useAssistantChat, useExpandChat hooks to show "Rate limited. Retrying in Xs..." system messages Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:43:09 +00:00 · 2026-02-23 12:21:49 +02:00
parent b7aef15c3b
commit dcdd06e02e
11 changed files with 362 additions and 145 deletions
--- a/server/services/chat_constants.py
+++ b/server/services/chat_constants.py
@@ -9,6 +9,7 @@ project root and is re-exported here for convenience so that existing
 imports (``from .chat_constants import API_ENV_VARS``) continue to work.
 """

+import logging
 import sys
 from pathlib import Path
 from typing import AsyncGenerator
@@ -32,6 +33,45 @@ if _root_str not in sys.path:
 # imports continue to work unchanged.
 # -------------------------------------------------------------------
 from env_constants import API_ENV_VARS  # noqa: E402, F401
+from rate_limit_utils import calculate_rate_limit_backoff, is_rate_limit_error, parse_retry_after  # noqa: E402, F401
+
+logger = logging.getLogger(__name__)
+
+# -------------------------------------------------------------------
+# Rate-limit handling for chat sessions
+# -------------------------------------------------------------------
+MAX_CHAT_RATE_LIMIT_RETRIES = 3
+
+
+def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
+    """Inspect an exception and determine if it represents a rate-limit.
+
+    Returns ``(is_rate_limit, retry_seconds)``.  ``retry_seconds`` is the
+    parsed Retry-After value when available, otherwise ``None`` (caller
+    should use exponential backoff).
+
+    Handles:
+    - ``MessageParseError`` whose raw *data* dict has
+      ``type == "rate_limit_event"`` (Claude CLI sends this).
+    - Any exception whose string representation matches known rate-limit
+      patterns (via ``rate_limit_utils.is_rate_limit_error``).
+    """
+    exc_str = str(exc)
+
+    # Check for MessageParseError with a rate_limit_event payload
+    cls_name = type(exc).__name__
+    if cls_name == "MessageParseError":
+        raw_data = getattr(exc, "data", None)
+        if isinstance(raw_data, dict) and raw_data.get("type") == "rate_limit_event":
+            retry = parse_retry_after(str(raw_data)) if raw_data else None
+            return True, retry
+
+    # Fallback: match error text against known rate-limit patterns
+    if is_rate_limit_error(exc_str):
+        retry = parse_retry_after(exc_str)
+        return True, retry
+
+    return False, None


 async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: