fix: resolve false-positive rate limit and one-message-behind in chat sessions

The Claude Code CLI v2.1.45+ emits a `rate_limit_event` message type that the Python SDK v0.1.19 cannot parse, raising MessageParseError. Two bugs resulted: 1. **False-positive rate limit**: check_rate_limit_error() matched "rate_limit" in the exception string "Unknown message type: rate_limit_event" via both an explicit type check and a regex fallback, triggering 15-19s backoff + query re-send on every session. 2. **One-message-behind**: The MessageParseError killed the receive_response() async generator, but the CLI subprocess was still alive with buffered response data. Catching and returning meant the response was never consumed. The next send_message() would read the previous response first, creating a one-behind offset. Changes: - chat_constants.py: check_rate_limit_error() now returns (False, None) for any MessageParseError, blocking both false-positive paths. Added safe_receive_response() helper that retries receive_response() on MessageParseError — the SDK's decoupled producer/consumer architecture (anyio memory channel) allows the new generator to continue reading remaining messages without data loss. Removed calculate_rate_limit_backoff re-export and MAX_CHAT_RATE_LIMIT_RETRIES constant. - spec_chat_session.py, assistant_chat_session.py, expand_chat_session.py: Replaced retry-with-backoff loops with safe_receive_response() wrapper. Removed asyncio.sleep backoff, query re-send, and rate_limited yield. Cleaned up unused imports (asyncio, calculate_rate_limit_backoff, MAX_CHAT_RATE_LIMIT_RETRIES). - agent.py: Added inner retry loop around receive_response() with same MessageParseError skip-and-restart pattern. Removed early-return that truncated responses. - types.ts: Removed SpecChatRateLimitedMessage, AssistantChatRateLimitedMessage, and their union entries. - useSpecChat.ts, useAssistantChat.ts, useExpandChat.ts: Removed dead 'rate_limited' case handlers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:43:09 +00:00 · 2026-02-23 13:00:16 +02:00
parent 9af0f309b7
commit 4f102e7bc2
9 changed files with 258 additions and 363 deletions
--- a/agent.py
+++ b/agent.py
@@ -74,7 +74,15 @@ async def run_agent_session(
        await client.query(message)
        # Collect response text and show tool use
        # Retry receive_response() on MessageParseError — the SDK raises this for
        # unknown CLI message types (e.g. "rate_limit_event") which kills the async
        # generator.  The subprocess is still alive so we restart to read remaining
        # messages from the buffered channel.
        response_text = ""
        max_parse_retries = 50
        parse_retries = 0
        while True:
            try:
                async for msg in client.receive_response():
                    msg_type = type(msg).__name__
@@ -115,6 +123,17 @@ async def run_agent_session(
                                    # Tool succeeded - just show brief confirmation
                                    print("   [Done]", flush=True)
                break  # Normal completion
            except Exception as inner_exc:
                if type(inner_exc).__name__ == "MessageParseError":
                    parse_retries += 1
                    if parse_retries > max_parse_retries:
                        print(f"Too many unrecognized CLI messages ({parse_retries}), stopping")
                        break
                    print(f"Ignoring unrecognized message from Claude CLI: {inner_exc}")
                    continue
                raise  # Re-raise to outer except
        print("\n" + "-" * 70 + "\n")
        return "continue", response_text
--- a/server/services/assistant_chat_session.py
+++ b/server/services/assistant_chat_session.py
@@ -7,7 +7,6 @@ The assistant can answer questions about the codebase and features
 but cannot modify any files.
 """
 import asyncio
 import json
 import logging
 import os
@@ -27,10 +26,9 @@ from .assistant_database import (
    get_messages,
 )
 from .chat_constants import (
    MAX_CHAT_RATE_LIMIT_RETRIES,
    ROOT_DIR,
    calculate_rate_limit_backoff,
    check_rate_limit_error,
    safe_receive_response,
 )
 # Load environment variables from .env file if present
@@ -399,10 +397,9 @@ class AssistantChatSession:
        full_response = ""
-        # Stream the response (with rate-limit retry)
+        # Stream the response
        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
        try:
-                async for msg in self.client.receive_response():
+            async for msg in safe_receive_response(self.client, logger):
                msg_type = type(msg).__name__
                if msg_type == "AssistantMessage" and hasattr(msg, "content"):
@@ -434,30 +431,12 @@ class AssistantChatSession:
                                "tool": tool_name,
                                "input": tool_input,
                            }
                # Completed successfully — break out of retry loop
                break
        except Exception as exc:
-                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+            is_rate_limit, _ = check_rate_limit_error(exc)
                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
                    yield {
                        "type": "rate_limited",
                        "retry_in": delay,
                        "attempt": _attempt + 1,
                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
                    }
                    await asyncio.sleep(delay)
                    await self.client.query(message)
                    continue
            if is_rate_limit:
-                    logger.error("Rate limit retries exhausted for assistant chat")
+                logger.warning(f"Rate limited: {exc}")
                yield {"type": "error", "content": "Rate limited. Please try again later."}
                return
                # Non-rate-limit MessageParseError: log and break (don't crash)
                if type(exc).__name__ == "MessageParseError":
                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
                    break
            raise
        # Store the complete response in the database
--- a/server/services/chat_constants.py
+++ b/server/services/chat_constants.py
@@ -12,7 +12,7 @@ imports (``from .chat_constants import API_ENV_VARS``) continue to work.
 import logging
 import sys
 from pathlib import Path
-from typing import AsyncGenerator
+from typing import Any, AsyncGenerator
 # -------------------------------------------------------------------
 # Root directory of the autoforge project (repository root).
@@ -33,15 +33,10 @@ if _root_str not in sys.path:
 # imports continue to work unchanged.
 # -------------------------------------------------------------------
 from env_constants import API_ENV_VARS  # noqa: E402, F401
-from rate_limit_utils import calculate_rate_limit_backoff, is_rate_limit_error, parse_retry_after  # noqa: E402, F401
+from rate_limit_utils import is_rate_limit_error, parse_retry_after  # noqa: E402, F401
 logger = logging.getLogger(__name__)
 # -------------------------------------------------------------------
 # Rate-limit handling for chat sessions
 # -------------------------------------------------------------------
 MAX_CHAT_RATE_LIMIT_RETRIES = 3
 def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
    """Inspect an exception and determine if it represents a rate-limit.
@@ -49,24 +44,15 @@ def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
    Returns ``(is_rate_limit, retry_seconds)``.  ``retry_seconds`` is the
    parsed Retry-After value when available, otherwise ``None`` (caller
    should use exponential backoff).
    Handles:
    - ``MessageParseError`` whose raw *data* dict has
      ``type == "rate_limit_event"`` (Claude CLI sends this).
    - Any exception whose string representation matches known rate-limit
      patterns (via ``rate_limit_utils.is_rate_limit_error``).
    """
    # MessageParseError = unknown CLI message type (e.g. "rate_limit_event").
    # These are informational events, NOT actual rate limit errors.
    # The word "rate_limit" in the type name would false-positive the regex.
    if type(exc).__name__ == "MessageParseError":
        return False, None
    # For all other exceptions: match error text against known rate-limit patterns
    exc_str = str(exc)
    # Check for MessageParseError with a rate_limit_event payload
    cls_name = type(exc).__name__
    if cls_name == "MessageParseError":
        raw_data = getattr(exc, "data", None)
        if isinstance(raw_data, dict) and raw_data.get("type") == "rate_limit_event":
            retry = parse_retry_after(str(raw_data)) if raw_data else None
            return True, retry
    # Fallback: match error text against known rate-limit patterns
    if is_rate_limit_error(exc_str):
        retry = parse_retry_after(exc_str)
        return True, retry
@@ -74,6 +60,34 @@ def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
    return False, None
 async def safe_receive_response(client: Any, log: logging.Logger) -> AsyncGenerator:
    """Wrap ``client.receive_response()`` to skip ``MessageParseError``.
    The Claude Code CLI may emit message types (e.g. ``rate_limit_event``)
    that the installed Python SDK does not recognise, causing
    ``MessageParseError`` which kills the async generator.  The CLI
    subprocess is still alive and the SDK uses a buffered memory channel,
    so we restart ``receive_response()`` to continue reading remaining
    messages without losing data.
    """
    max_retries = 50
    retries = 0
    while True:
        try:
            async for msg in client.receive_response():
                yield msg
            return  # Normal completion
        except Exception as exc:
            if type(exc).__name__ == "MessageParseError":
                retries += 1
                if retries > max_retries:
                    log.error(f"Too many unrecognized CLI messages ({retries}), stopping")
                    return
                log.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
                continue
            raise
 async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
    """Yield a single multimodal user message in Claude Agent SDK format.
--- a/server/services/expand_chat_session.py
+++ b/server/services/expand_chat_session.py
@@ -23,11 +23,10 @@ from dotenv import load_dotenv
 from ..schemas import ImageAttachment
 from .chat_constants import (
    MAX_CHAT_RATE_LIMIT_RETRIES,
    ROOT_DIR,
    calculate_rate_limit_backoff,
    check_rate_limit_error,
    make_multimodal_message,
    safe_receive_response,
 )
 # Load environment variables from .env file if present
@@ -304,10 +303,9 @@ class ExpandChatSession:
        else:
            await self.client.query(message)
-        # Stream the response (with rate-limit retry)
+        # Stream the response
        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
        try:
-                async for msg in self.client.receive_response():
+            async for msg in safe_receive_response(self.client, logger):
                msg_type = type(msg).__name__
                if msg_type == "AssistantMessage" and hasattr(msg, "content"):
@@ -324,46 +322,12 @@ class ExpandChatSession:
                                    "content": text,
                                    "timestamp": datetime.now().isoformat()
                                })
                # Completed successfully — break out of retry loop
                break
        except Exception as exc:
-                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+            is_rate_limit, _ = check_rate_limit_error(exc)
                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
                    yield {
                        "type": "rate_limited",
                        "retry_in": delay,
                        "attempt": _attempt + 1,
                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
                    }
                    await asyncio.sleep(delay)
                    # Re-send the query before retrying receive_response
                    if attachments and len(attachments) > 0:
                        content_blocks_retry: list[dict[str, Any]] = []
                        if message:
                            content_blocks_retry.append({"type": "text", "text": message})
                        for att in attachments:
                            content_blocks_retry.append({
                                "type": "image",
                                "source": {
                                    "type": "base64",
                                    "media_type": att.mimeType,
                                    "data": att.base64Data,
                                }
                            })
                        await self.client.query(make_multimodal_message(content_blocks_retry))
                    else:
                        await self.client.query(message)
                    continue
            if is_rate_limit:
-                    logger.error("Rate limit retries exhausted for expand chat")
+                logger.warning(f"Rate limited: {exc}")
                yield {"type": "error", "content": "Rate limited. Please try again later."}
                return
                # Non-rate-limit MessageParseError: log and break (don't crash)
                if type(exc).__name__ == "MessageParseError":
                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
                    break
            raise
    def get_features_created(self) -> int:
--- a/server/services/spec_chat_session.py
+++ b/server/services/spec_chat_session.py
@@ -6,7 +6,6 @@ Manages interactive spec creation conversation with Claude.
 Uses the create-spec.md skill to guide users through app spec creation.
 """
 import asyncio
 import json
 import logging
 import os
@@ -21,11 +20,10 @@ from dotenv import load_dotenv
 from ..schemas import ImageAttachment
 from .chat_constants import (
    MAX_CHAT_RATE_LIMIT_RETRIES,
    ROOT_DIR,
    calculate_rate_limit_backoff,
    check_rate_limit_error,
    make_multimodal_message,
    safe_receive_response,
 )
 # Load environment variables from .env file if present
@@ -311,10 +309,9 @@ class SpecChatSession:
        # Store paths for the completion message
        spec_path = None
-        # Stream the response using receive_response (with rate-limit retry)
+        # Stream the response
        for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
        try:
-                async for msg in self.client.receive_response():
+            async for msg in safe_receive_response(self.client, logger):
                msg_type = type(msg).__name__
                if msg_type == "AssistantMessage" and hasattr(msg, "content"):
@@ -424,31 +421,12 @@ class SpecChatSession:
                                        "type": "spec_complete",
                                        "path": str(spec_path)
                                    }
                # Completed successfully — break out of retry loop
                break
        except Exception as exc:
-                is_rate_limit, retry_secs = check_rate_limit_error(exc)
+            is_rate_limit, _ = check_rate_limit_error(exc)
                if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
                    delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
                    logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
                    yield {
                        "type": "rate_limited",
                        "retry_in": delay,
                        "attempt": _attempt + 1,
                        "max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
                    }
                    await asyncio.sleep(delay)
                    # Re-send the query before retrying receive_response
                    await self.client.query(message)
                    continue
            if is_rate_limit:
-                    logger.error("Rate limit retries exhausted for spec chat")
+                logger.warning(f"Rate limited: {exc}")
                yield {"type": "error", "content": "Rate limited. Please try again later."}
                return
                # Non-rate-limit MessageParseError: log and break (don't crash)
                if type(exc).__name__ == "MessageParseError":
                    logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
                    break
            raise
    def is_complete(self) -> bool:
--- a/ui/src/hooks/useAssistantChat.ts
+++ b/ui/src/hooks/useAssistantChat.ts
@@ -269,20 +269,6 @@ export function useAssistantChat({
            break;
          }
          case "rate_limited": {
            // Show rate limit info as system message
            setMessages((prev) => [
              ...prev,
              {
                id: generateId(),
                role: "system",
                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
                timestamp: new Date(),
              },
            ]);
            break;
          }
          case "pong": {
            // Keep-alive response, nothing to do
            break;
--- a/ui/src/hooks/useExpandChat.ts
+++ b/ui/src/hooks/useExpandChat.ts
@@ -226,20 +226,6 @@ export function useExpandChat({
            break
          }
          case 'rate_limited': {
            // Show rate limit info as system message
            setMessages((prev) => [
              ...prev,
              {
                id: generateId(),
                role: 'system',
                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
                timestamp: new Date(),
              },
            ])
            break
          }
          case 'pong': {
            // Keep-alive response, nothing to do
            break
--- a/ui/src/hooks/useSpecChat.ts
+++ b/ui/src/hooks/useSpecChat.ts
@@ -322,20 +322,6 @@ export function useSpecChat({
            break
          }
          case 'rate_limited': {
            // Show rate limit info as system message
            setMessages((prev) => [
              ...prev,
              {
                id: generateId(),
                role: 'system',
                content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
                timestamp: new Date(),
              },
            ])
            break
          }
          case 'pong': {
            // Keep-alive response, nothing to do
            break
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -407,13 +407,6 @@ export interface SpecChatResponseDoneMessage {
  type: 'response_done'
 }
 export interface SpecChatRateLimitedMessage {
  type: 'rate_limited'
  retry_in: number
  attempt: number
  max_attempts: number
 }
 export type SpecChatServerMessage =
  | SpecChatTextMessage
  | SpecChatQuestionMessage
@@ -423,7 +416,6 @@ export type SpecChatServerMessage =
  | SpecChatErrorMessage
  | SpecChatPongMessage
  | SpecChatResponseDoneMessage
  | SpecChatRateLimitedMessage
 // Image attachment for chat messages
 export interface ImageAttachment {
@@ -509,13 +501,6 @@ export interface AssistantChatPongMessage {
  type: 'pong'
 }
 export interface AssistantChatRateLimitedMessage {
  type: 'rate_limited'
  retry_in: number
  attempt: number
  max_attempts: number
 }
 export type AssistantChatServerMessage =
  | AssistantChatTextMessage
  | AssistantChatToolCallMessage
@@ -524,7 +509,6 @@ export type AssistantChatServerMessage =
  | AssistantChatErrorMessage
  | AssistantChatConversationCreatedMessage
  | AssistantChatPongMessage
  | AssistantChatRateLimitedMessage
 // ============================================================================
 // Expand Chat Types
@@ -548,7 +532,6 @@ export type ExpandChatServerMessage =
  | SpecChatErrorMessage       // Reuse error message type
  | SpecChatPongMessage        // Reuse pong message type
  | SpecChatResponseDoneMessage // Reuse response_done type
  | SpecChatRateLimitedMessage // Reuse rate_limited message type
 // Bulk feature creation
 export interface FeatureBulkCreate {