fix: handle rate_limit_event crash in chat sessions

The Claude CLI sends `rate_limit_event` messages that the SDK's `parse_message()` doesn't recognize, raising `MessageParseError` and crashing all three chat session types (spec, assistant, expand). Changes: - Bump claude-agent-sdk minimum from 0.1.0 to 0.1.39 - Add `check_rate_limit_error()` helper in chat_constants.py that detects rate limits from both MessageParseError data payloads and error message text patterns - Wrap `receive_response()` loops in all three `_query_claude()` methods with retry-on-rate-limit logic (up to 3 retries with backoff) - Gracefully log and skip non-rate-limit MessageParseError instead of crashing the session - Add `rate_limited` message type to frontend TypeScript types and handle it in useSpecChat, useAssistantChat, useExpandChat hooks to show "Rate limited. Retrying in Xs..." system messages Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 18:33:08 +00:00 · 2026-02-23 12:21:49 +02:00
parent b7aef15c3b
commit dcdd06e02e
11 changed files with 362 additions and 145 deletions
--- a/.claude/launch.json
+++ b/.claude/launch.json
@@ -13,5 +13,6 @@
      "runtimeArgs": ["/c", "cd ui && npx vite"],
      "port": 5173
    }
-  ]
+  ],
+  "autoVerify": true
 }
--- a/requirements-prod.txt
+++ b/requirements-prod.txt
@@ -1,6 +1,6 @@
 # Production runtime dependencies only
 # For development, use requirements.txt (includes ruff, mypy, pytest)
-claude-agent-sdk>=0.1.0,<0.2.0
+claude-agent-sdk>=0.1.39,<0.2.0
 python-dotenv>=1.0.0
 sqlalchemy>=2.0.0
 fastapi>=0.115.0
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-claude-agent-sdk>=0.1.0,<0.2.0
+claude-agent-sdk>=0.1.39,<0.2.0
 python-dotenv>=1.0.0
 sqlalchemy>=2.0.0
 fastapi>=0.115.0
--- a/server/services/assistant_chat_session.py
+++ b/server/services/assistant_chat_session.py
@@ -7,6 +7,7 @@ The assistant can answer questions about the codebase and features
 but cannot modify any files.
 """

+import asyncio
 import json
 import logging
 import os
@@ -25,7 +26,12 @@ from .assistant_database import (
    create_conversation,
    get_messages,
 )
-from .chat_constants import ROOT_DIR
+from .chat_constants import (
+    MAX_CHAT_RATE_LIMIT_RETRIES,
+    ROOT_DIR,
+    calculate_rate_limit_backoff,
+    check_rate_limit_error,
+)

 # Load environment variables from .env file if present
 load_dotenv()
@@ -393,39 +399,66 @@ class AssistantChatSession:

        full_response = ""

-        # Stream the response
-        async for msg in self.client.receive_response():
-            msg_type = type(msg).__name__
+        # Stream the response (with rate-limit retry)
+        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
+            try:
+                async for msg in self.client.receive_response():
+                    msg_type = type(msg).__name__

-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
+                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                        for block in msg.content:
+                            block_type = type(block).__name__

-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        text = block.text
-                        if text:
-                            full_response += text
-                            yield {"type": "text", "content": text}
+                            if block_type == "TextBlock" and hasattr(block, "text"):
+                                text = block.text
+                                if text:
+                                    full_response += text
+                                    yield {"type": "text", "content": text}

-                    elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                        tool_name = block.name
-                        tool_input = getattr(block, "input", {})
+                            elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                                tool_name = block.name
+                                tool_input = getattr(block, "input", {})
+
+                                # Intercept ask_user tool calls -> yield as question message
+                                if tool_name == "mcp__features__ask_user":
+                                    questions = tool_input.get("questions", [])
+                                    if questions:
+                                        yield {
+                                            "type": "question",
+                                            "questions": questions,
+                                        }
+                                        continue

-                        # Intercept ask_user tool calls -> yield as question message
-                        if tool_name == "mcp__features__ask_user":
-                            questions = tool_input.get("questions", [])
-                            if questions:
                                yield {
-                                    "type": "question",
-                                    "questions": questions,
+                                    "type": "tool_call",
+                                    "tool": tool_name,
+                                    "input": tool_input,
                                }
-                                continue
-
-                        yield {
-                            "type": "tool_call",
-                            "tool": tool_name,
-                            "input": tool_input,
-                        }
+                # Completed successfully — break out of retry loop
+                break
+            except Exception as exc:
+                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
+                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
+                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
+                    yield {
+                        "type": "rate_limited",
+                        "retry_in": delay,
+                        "attempt": _attempt + 1,
+                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
+                    }
+                    await asyncio.sleep(delay)
+                    await self.client.query(message)
+                    continue
+                if is_rate_limit:
+                    logger.error("Rate limit retries exhausted for assistant chat")
+                    yield {"type": "error", "content": "Rate limited. Please try again later."}
+                    return
+                # Non-rate-limit MessageParseError: log and break (don't crash)
+                if type(exc).__name__ == "MessageParseError":
+                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
+                    break
+                raise

        # Store the complete response in the database
        if full_response and self.conversation_id:
--- a/server/services/chat_constants.py
+++ b/server/services/chat_constants.py
@@ -9,6 +9,7 @@ project root and is re-exported here for convenience so that existing
 imports (``from .chat_constants import API_ENV_VARS``) continue to work.
 """

+import logging
 import sys
 from pathlib import Path
 from typing import AsyncGenerator
@@ -32,6 +33,45 @@ if _root_str not in sys.path:
 # imports continue to work unchanged.
 # -------------------------------------------------------------------
 from env_constants import API_ENV_VARS  # noqa: E402, F401
+from rate_limit_utils import calculate_rate_limit_backoff, is_rate_limit_error, parse_retry_after  # noqa: E402, F401
+
+logger = logging.getLogger(__name__)
+
+# -------------------------------------------------------------------
+# Rate-limit handling for chat sessions
+# -------------------------------------------------------------------
+MAX_CHAT_RATE_LIMIT_RETRIES = 3
+
+
+def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
+    """Inspect an exception and determine if it represents a rate-limit.
+
+    Returns ``(is_rate_limit, retry_seconds)``.  ``retry_seconds`` is the
+    parsed Retry-After value when available, otherwise ``None`` (caller
+    should use exponential backoff).
+
+    Handles:
+    - ``MessageParseError`` whose raw *data* dict has
+      ``type == "rate_limit_event"`` (Claude CLI sends this).
+    - Any exception whose string representation matches known rate-limit
+      patterns (via ``rate_limit_utils.is_rate_limit_error``).
+    """
+    exc_str = str(exc)
+
+    # Check for MessageParseError with a rate_limit_event payload
+    cls_name = type(exc).__name__
+    if cls_name == "MessageParseError":
+        raw_data = getattr(exc, "data", None)
+        if isinstance(raw_data, dict) and raw_data.get("type") == "rate_limit_event":
+            retry = parse_retry_after(str(raw_data)) if raw_data else None
+            return True, retry
+
+    # Fallback: match error text against known rate-limit patterns
+    if is_rate_limit_error(exc_str):
+        retry = parse_retry_after(exc_str)
+        return True, retry
+
+    return False, None


 async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
--- a/server/services/expand_chat_session.py
+++ b/server/services/expand_chat_session.py
@@ -22,7 +22,13 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
 from dotenv import load_dotenv

 from ..schemas import ImageAttachment
-from .chat_constants import ROOT_DIR, make_multimodal_message
+from .chat_constants import (
+    MAX_CHAT_RATE_LIMIT_RETRIES,
+    ROOT_DIR,
+    calculate_rate_limit_backoff,
+    check_rate_limit_error,
+    make_multimodal_message,
+)

 # Load environment variables from .env file if present
 load_dotenv()
@@ -298,24 +304,67 @@ class ExpandChatSession:
        else:
            await self.client.query(message)

-        # Stream the response
-        async for msg in self.client.receive_response():
-            msg_type = type(msg).__name__
+        # Stream the response (with rate-limit retry)
+        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
+            try:
+                async for msg in self.client.receive_response():
+                    msg_type = type(msg).__name__

-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                for block in msg.content:
-                    block_type = type(block).__name__
+                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                        for block in msg.content:
+                            block_type = type(block).__name__

-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        text = block.text
-                        if text:
-                            yield {"type": "text", "content": text}
+                            if block_type == "TextBlock" and hasattr(block, "text"):
+                                text = block.text
+                                if text:
+                                    yield {"type": "text", "content": text}

-                            self.messages.append({
-                                "role": "assistant",
-                                "content": text,
-                                "timestamp": datetime.now().isoformat()
+                                    self.messages.append({
+                                        "role": "assistant",
+                                        "content": text,
+                                        "timestamp": datetime.now().isoformat()
+                                    })
+                # Completed successfully — break out of retry loop
+                break
+            except Exception as exc:
+                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
+                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
+                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
+                    yield {
+                        "type": "rate_limited",
+                        "retry_in": delay,
+                        "attempt": _attempt + 1,
+                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
+                    }
+                    await asyncio.sleep(delay)
+                    # Re-send the query before retrying receive_response
+                    if attachments and len(attachments) > 0:
+                        content_blocks_retry: list[dict[str, Any]] = []
+                        if message:
+                            content_blocks_retry.append({"type": "text", "text": message})
+                        for att in attachments:
+                            content_blocks_retry.append({
+                                "type": "image",
+                                "source": {
+                                    "type": "base64",
+                                    "media_type": att.mimeType,
+                                    "data": att.base64Data,
+                                }
                            })
+                        await self.client.query(make_multimodal_message(content_blocks_retry))
+                    else:
+                        await self.client.query(message)
+                    continue
+                if is_rate_limit:
+                    logger.error("Rate limit retries exhausted for expand chat")
+                    yield {"type": "error", "content": "Rate limited. Please try again later."}
+                    return
+                # Non-rate-limit MessageParseError: log and break (don't crash)
+                if type(exc).__name__ == "MessageParseError":
+                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
+                    break
+                raise

    def get_features_created(self) -> int:
        """Get the total number of features created in this session."""
--- a/server/services/spec_chat_session.py
+++ b/server/services/spec_chat_session.py
@@ -6,6 +6,7 @@ Manages interactive spec creation conversation with Claude.
 Uses the create-spec.md skill to guide users through app spec creation.
 """

+import asyncio
 import json
 import logging
 import os
@@ -19,7 +20,13 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
 from dotenv import load_dotenv

 from ..schemas import ImageAttachment
-from .chat_constants import ROOT_DIR, make_multimodal_message
+from .chat_constants import (
+    MAX_CHAT_RATE_LIMIT_RETRIES,
+    ROOT_DIR,
+    calculate_rate_limit_backoff,
+    check_rate_limit_error,
+    make_multimodal_message,
+)

 # Load environment variables from .env file if present
 load_dotenv()
@@ -304,117 +311,145 @@ class SpecChatSession:
        # Store paths for the completion message
        spec_path = None

-        # Stream the response using receive_response
-        async for msg in self.client.receive_response():
-            msg_type = type(msg).__name__
+        # Stream the response using receive_response (with rate-limit retry)
+        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
+            try:
+                async for msg in self.client.receive_response():
+                    msg_type = type(msg).__name__

-            if msg_type == "AssistantMessage" and hasattr(msg, "content"):
-                # Process content blocks in the assistant message
-                for block in msg.content:
-                    block_type = type(block).__name__
+                    if msg_type == "AssistantMessage" and hasattr(msg, "content"):
+                        # Process content blocks in the assistant message
+                        for block in msg.content:
+                            block_type = type(block).__name__

-                    if block_type == "TextBlock" and hasattr(block, "text"):
-                        # Accumulate text and yield it
-                        text = block.text
-                        if text:
-                            current_text += text
-                            yield {"type": "text", "content": text}
+                            if block_type == "TextBlock" and hasattr(block, "text"):
+                                # Accumulate text and yield it
+                                text = block.text
+                                if text:
+                                    current_text += text
+                                    yield {"type": "text", "content": text}

-                            # Store in message history
-                            self.messages.append({
-                                "role": "assistant",
-                                "content": text,
-                                "timestamp": datetime.now().isoformat()
-                            })
+                                    # Store in message history
+                                    self.messages.append({
+                                        "role": "assistant",
+                                        "content": text,
+                                        "timestamp": datetime.now().isoformat()
+                                    })

-                    elif block_type == "ToolUseBlock" and hasattr(block, "name"):
-                        tool_name = block.name
-                        tool_input = getattr(block, "input", {})
-                        tool_id = getattr(block, "id", "")
+                            elif block_type == "ToolUseBlock" and hasattr(block, "name"):
+                                tool_name = block.name
+                                tool_input = getattr(block, "input", {})
+                                tool_id = getattr(block, "id", "")

-                        if tool_name in ("Write", "Edit"):
-                            # File being written or edited - track for verification
-                            file_path = tool_input.get("file_path", "")
+                                if tool_name in ("Write", "Edit"):
+                                    # File being written or edited - track for verification
+                                    file_path = tool_input.get("file_path", "")

-                            # Track app_spec.txt
-                            if "app_spec.txt" in str(file_path):
-                                pending_writes["app_spec"] = {
-                                    "tool_id": tool_id,
-                                    "path": file_path
-                                }
-                                logger.info(f"{tool_name} tool called for app_spec.txt: {file_path}")
+                                    # Track app_spec.txt
+                                    if "app_spec.txt" in str(file_path):
+                                        pending_writes["app_spec"] = {
+                                            "tool_id": tool_id,
+                                            "path": file_path
+                                        }
+                                        logger.info(f"{tool_name} tool called for app_spec.txt: {file_path}")

-                            # Track initializer_prompt.md
-                            elif "initializer_prompt.md" in str(file_path):
-                                pending_writes["initializer"] = {
-                                    "tool_id": tool_id,
-                                    "path": file_path
-                                }
-                                logger.info(f"{tool_name} tool called for initializer_prompt.md: {file_path}")
+                                    # Track initializer_prompt.md
+                                    elif "initializer_prompt.md" in str(file_path):
+                                        pending_writes["initializer"] = {
+                                            "tool_id": tool_id,
+                                            "path": file_path
+                                        }
+                                        logger.info(f"{tool_name} tool called for initializer_prompt.md: {file_path}")

-            elif msg_type == "UserMessage" and hasattr(msg, "content"):
-                # Tool results - check for write confirmations and errors
-                for block in msg.content:
-                    block_type = type(block).__name__
-                    if block_type == "ToolResultBlock":
-                        is_error = getattr(block, "is_error", False)
-                        tool_use_id = getattr(block, "tool_use_id", "")
+                    elif msg_type == "UserMessage" and hasattr(msg, "content"):
+                        # Tool results - check for write confirmations and errors
+                        for block in msg.content:
+                            block_type = type(block).__name__
+                            if block_type == "ToolResultBlock":
+                                is_error = getattr(block, "is_error", False)
+                                tool_use_id = getattr(block, "tool_use_id", "")

-                        if is_error:
-                            content = getattr(block, "content", "Unknown error")
-                            logger.warning(f"Tool error: {content}")
-                            # Clear any pending writes that failed
-                            for key in pending_writes:
-                                pending_write = pending_writes[key]
-                                if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
-                                    logger.error(f"{key} write failed: {content}")
-                                    pending_writes[key] = None
-                        else:
-                            # Tool succeeded - check which file was written
-
-                            # Check app_spec.txt
-                            if pending_writes["app_spec"] and tool_use_id == pending_writes["app_spec"].get("tool_id"):
-                                file_path = pending_writes["app_spec"]["path"]
-                                full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
-                                if full_path.exists():
-                                    logger.info(f"app_spec.txt verified at: {full_path}")
-                                    files_written["app_spec"] = True
-                                    spec_path = file_path
-
-                                    # Notify about file write (but NOT completion yet)
-                                    yield {
-                                        "type": "file_written",
-                                        "path": str(file_path)
-                                    }
+                                if is_error:
+                                    content = getattr(block, "content", "Unknown error")
+                                    logger.warning(f"Tool error: {content}")
+                                    # Clear any pending writes that failed
+                                    for key in pending_writes:
+                                        pending_write = pending_writes[key]
+                                        if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
+                                            logger.error(f"{key} write failed: {content}")
+                                            pending_writes[key] = None
                                else:
-                                    logger.error(f"app_spec.txt not found after write: {full_path}")
-                                pending_writes["app_spec"] = None
+                                    # Tool succeeded - check which file was written

-                            # Check initializer_prompt.md
-                            if pending_writes["initializer"] and tool_use_id == pending_writes["initializer"].get("tool_id"):
-                                file_path = pending_writes["initializer"]["path"]
-                                full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
-                                if full_path.exists():
-                                    logger.info(f"initializer_prompt.md verified at: {full_path}")
-                                    files_written["initializer"] = True
+                                    # Check app_spec.txt
+                                    if pending_writes["app_spec"] and tool_use_id == pending_writes["app_spec"].get("tool_id"):
+                                        file_path = pending_writes["app_spec"]["path"]
+                                        full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
+                                        if full_path.exists():
+                                            logger.info(f"app_spec.txt verified at: {full_path}")
+                                            files_written["app_spec"] = True
+                                            spec_path = file_path

-                                    # Notify about file write
-                                    yield {
-                                        "type": "file_written",
-                                        "path": str(file_path)
-                                    }
-                                else:
-                                    logger.error(f"initializer_prompt.md not found after write: {full_path}")
-                                pending_writes["initializer"] = None
+                                            # Notify about file write (but NOT completion yet)
+                                            yield {
+                                                "type": "file_written",
+                                                "path": str(file_path)
+                                            }
+                                        else:
+                                            logger.error(f"app_spec.txt not found after write: {full_path}")
+                                        pending_writes["app_spec"] = None

-                            # Check if BOTH files are now written - only then signal completion
-                            if files_written["app_spec"] and files_written["initializer"]:
-                                logger.info("Both app_spec.txt and initializer_prompt.md verified - signaling completion")
-                                self.complete = True
-                                yield {
-                                    "type": "spec_complete",
-                                    "path": str(spec_path)
-                                }
+                                    # Check initializer_prompt.md
+                                    if pending_writes["initializer"] and tool_use_id == pending_writes["initializer"].get("tool_id"):
+                                        file_path = pending_writes["initializer"]["path"]
+                                        full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
+                                        if full_path.exists():
+                                            logger.info(f"initializer_prompt.md verified at: {full_path}")
+                                            files_written["initializer"] = True
+
+                                            # Notify about file write
+                                            yield {
+                                                "type": "file_written",
+                                                "path": str(file_path)
+                                            }
+                                        else:
+                                            logger.error(f"initializer_prompt.md not found after write: {full_path}")
+                                        pending_writes["initializer"] = None
+
+                                    # Check if BOTH files are now written - only then signal completion
+                                    if files_written["app_spec"] and files_written["initializer"]:
+                                        logger.info("Both app_spec.txt and initializer_prompt.md verified - signaling completion")
+                                        self.complete = True
+                                        yield {
+                                            "type": "spec_complete",
+                                            "path": str(spec_path)
+                                        }
+                # Completed successfully — break out of retry loop
+                break
+            except Exception as exc:
+                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
+                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
+                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
+                    yield {
+                        "type": "rate_limited",
+                        "retry_in": delay,
+                        "attempt": _attempt + 1,
+                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
+                    }
+                    await asyncio.sleep(delay)
+                    # Re-send the query before retrying receive_response
+                    await self.client.query(message)
+                    continue
+                if is_rate_limit:
+                    logger.error("Rate limit retries exhausted for spec chat")
+                    yield {"type": "error", "content": "Rate limited. Please try again later."}
+                    return
+                # Non-rate-limit MessageParseError: log and break (don't crash)
+                if type(exc).__name__ == "MessageParseError":
+                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
+                    break
+                raise

    def is_complete(self) -> bool:
        """Check if spec creation is complete."""
--- a/ui/src/hooks/useAssistantChat.ts
+++ b/ui/src/hooks/useAssistantChat.ts
@@ -269,6 +269,20 @@ export function useAssistantChat({
            break;
          }

+          case "rate_limited": {
+            // Show rate limit info as system message
+            setMessages((prev) => [
+              ...prev,
+              {
+                id: generateId(),
+                role: "system",
+                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
+                timestamp: new Date(),
+              },
+            ]);
+            break;
+          }
+
          case "pong": {
            // Keep-alive response, nothing to do
            break;
--- a/ui/src/hooks/useExpandChat.ts
+++ b/ui/src/hooks/useExpandChat.ts
@@ -226,6 +226,20 @@ export function useExpandChat({
            break
          }

+          case 'rate_limited': {
+            // Show rate limit info as system message
+            setMessages((prev) => [
+              ...prev,
+              {
+                id: generateId(),
+                role: 'system',
+                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
+                timestamp: new Date(),
+              },
+            ])
+            break
+          }
+
          case 'pong': {
            // Keep-alive response, nothing to do
            break
--- a/ui/src/hooks/useSpecChat.ts
+++ b/ui/src/hooks/useSpecChat.ts
@@ -322,6 +322,20 @@ export function useSpecChat({
            break
          }

+          case 'rate_limited': {
+            // Show rate limit info as system message
+            setMessages((prev) => [
+              ...prev,
+              {
+                id: generateId(),
+                role: 'system',
+                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
+                timestamp: new Date(),
+              },
+            ])
+            break
+          }
+
          case 'pong': {
            // Keep-alive response, nothing to do
            break
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -407,6 +407,13 @@ export interface SpecChatResponseDoneMessage {
  type: 'response_done'
 }

+export interface SpecChatRateLimitedMessage {
+  type: 'rate_limited'
+  retry_in: number
+  attempt: number
+  max_attempts: number
+}
+
 export type SpecChatServerMessage =
  | SpecChatTextMessage
  | SpecChatQuestionMessage
@@ -416,6 +423,7 @@ export type SpecChatServerMessage =
  | SpecChatErrorMessage
  | SpecChatPongMessage
  | SpecChatResponseDoneMessage
+  | SpecChatRateLimitedMessage

 // Image attachment for chat messages
 export interface ImageAttachment {
@@ -501,6 +509,13 @@ export interface AssistantChatPongMessage {
  type: 'pong'
 }

+export interface AssistantChatRateLimitedMessage {
+  type: 'rate_limited'
+  retry_in: number
+  attempt: number
+  max_attempts: number
+}
+
 export type AssistantChatServerMessage =
  | AssistantChatTextMessage
  | AssistantChatToolCallMessage
@@ -509,6 +524,7 @@ export type AssistantChatServerMessage =
  | AssistantChatErrorMessage
  | AssistantChatConversationCreatedMessage
  | AssistantChatPongMessage
+  | AssistantChatRateLimitedMessage

 // ============================================================================
 // Expand Chat Types
@@ -532,6 +548,7 @@ export type ExpandChatServerMessage =
  | SpecChatErrorMessage       // Reuse error message type
  | SpecChatPongMessage        // Reuse pong message type
  | SpecChatResponseDoneMessage // Reuse response_done type
+  | SpecChatRateLimitedMessage // Reuse rate_limited message type

 // Bulk feature creation
 export interface FeatureBulkCreate {