fix: handle rate_limit_event crash in chat sessions

The Claude CLI sends `rate_limit_event` messages that the SDK's
`parse_message()` doesn't recognize, raising `MessageParseError` and
crashing all three chat session types (spec, assistant, expand).

Changes:
- Bump claude-agent-sdk minimum from 0.1.0 to 0.1.39
- Add `check_rate_limit_error()` helper in chat_constants.py that
  detects rate limits from both MessageParseError data payloads and
  error message text patterns
- Wrap `receive_response()` loops in all three `_query_claude()` methods
  with retry-on-rate-limit logic (up to 3 retries with backoff)
- Gracefully log and skip non-rate-limit MessageParseError instead of
  crashing the session
- Add `rate_limited` message type to frontend TypeScript types and
  handle it in useSpecChat, useAssistantChat, useExpandChat hooks to
  show "Rate limited. Retrying in Xs..." system messages

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Auto
2026-02-23 12:21:49 +02:00
parent b7aef15c3b
commit dcdd06e02e
11 changed files with 362 additions and 145 deletions

View File

@@ -13,5 +13,6 @@
"runtimeArgs": ["/c", "cd ui && npx vite"],
"port": 5173
}
]
],
"autoVerify": true
}

View File

@@ -1,6 +1,6 @@
# Production runtime dependencies only
# For development, use requirements.txt (includes ruff, mypy, pytest)
claude-agent-sdk>=0.1.0,<0.2.0
claude-agent-sdk>=0.1.39,<0.2.0
python-dotenv>=1.0.0
sqlalchemy>=2.0.0
fastapi>=0.115.0

View File

@@ -1,4 +1,4 @@
claude-agent-sdk>=0.1.0,<0.2.0
claude-agent-sdk>=0.1.39,<0.2.0
python-dotenv>=1.0.0
sqlalchemy>=2.0.0
fastapi>=0.115.0

View File

@@ -7,6 +7,7 @@ The assistant can answer questions about the codebase and features
but cannot modify any files.
"""
import asyncio
import json
import logging
import os
@@ -25,7 +26,12 @@ from .assistant_database import (
create_conversation,
get_messages,
)
from .chat_constants import ROOT_DIR
from .chat_constants import (
MAX_CHAT_RATE_LIMIT_RETRIES,
ROOT_DIR,
calculate_rate_limit_backoff,
check_rate_limit_error,
)
# Load environment variables from .env file if present
load_dotenv()
@@ -393,7 +399,9 @@ class AssistantChatSession:
full_response = ""
# Stream the response
# Stream the response (with rate-limit retry)
for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
try:
async for msg in self.client.receive_response():
msg_type = type(msg).__name__
@@ -426,6 +434,31 @@ class AssistantChatSession:
"tool": tool_name,
"input": tool_input,
}
# Completed successfully — break out of retry loop
break
except Exception as exc:
is_rate_limit, retry_secs = check_rate_limit_error(exc)
if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
yield {
"type": "rate_limited",
"retry_in": delay,
"attempt": _attempt + 1,
"max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
}
await asyncio.sleep(delay)
await self.client.query(message)
continue
if is_rate_limit:
logger.error("Rate limit retries exhausted for assistant chat")
yield {"type": "error", "content": "Rate limited. Please try again later."}
return
# Non-rate-limit MessageParseError: log and break (don't crash)
if type(exc).__name__ == "MessageParseError":
logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
break
raise
# Store the complete response in the database
if full_response and self.conversation_id:

View File

@@ -9,6 +9,7 @@ project root and is re-exported here for convenience so that existing
imports (``from .chat_constants import API_ENV_VARS``) continue to work.
"""
import logging
import sys
from pathlib import Path
from typing import AsyncGenerator
@@ -32,6 +33,45 @@ if _root_str not in sys.path:
# imports continue to work unchanged.
# -------------------------------------------------------------------
from env_constants import API_ENV_VARS # noqa: E402, F401
from rate_limit_utils import calculate_rate_limit_backoff, is_rate_limit_error, parse_retry_after # noqa: E402, F401
logger = logging.getLogger(__name__)
# -------------------------------------------------------------------
# Rate-limit handling for chat sessions
# -------------------------------------------------------------------
MAX_CHAT_RATE_LIMIT_RETRIES = 3
def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
"""Inspect an exception and determine if it represents a rate-limit.
Returns ``(is_rate_limit, retry_seconds)``. ``retry_seconds`` is the
parsed Retry-After value when available, otherwise ``None`` (caller
should use exponential backoff).
Handles:
- ``MessageParseError`` whose raw *data* dict has
``type == "rate_limit_event"`` (Claude CLI sends this).
- Any exception whose string representation matches known rate-limit
patterns (via ``rate_limit_utils.is_rate_limit_error``).
"""
exc_str = str(exc)
# Check for MessageParseError with a rate_limit_event payload
cls_name = type(exc).__name__
if cls_name == "MessageParseError":
raw_data = getattr(exc, "data", None)
if isinstance(raw_data, dict) and raw_data.get("type") == "rate_limit_event":
retry = parse_retry_after(str(raw_data)) if raw_data else None
return True, retry
# Fallback: match error text against known rate-limit patterns
if is_rate_limit_error(exc_str):
retry = parse_retry_after(exc_str)
return True, retry
return False, None
async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:

View File

@@ -22,7 +22,13 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from dotenv import load_dotenv
from ..schemas import ImageAttachment
from .chat_constants import ROOT_DIR, make_multimodal_message
from .chat_constants import (
MAX_CHAT_RATE_LIMIT_RETRIES,
ROOT_DIR,
calculate_rate_limit_backoff,
check_rate_limit_error,
make_multimodal_message,
)
# Load environment variables from .env file if present
load_dotenv()
@@ -298,7 +304,9 @@ class ExpandChatSession:
else:
await self.client.query(message)
# Stream the response
# Stream the response (with rate-limit retry)
for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
try:
async for msg in self.client.receive_response():
msg_type = type(msg).__name__
@@ -316,6 +324,47 @@ class ExpandChatSession:
"content": text,
"timestamp": datetime.now().isoformat()
})
# Completed successfully — break out of retry loop
break
except Exception as exc:
is_rate_limit, retry_secs = check_rate_limit_error(exc)
if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
yield {
"type": "rate_limited",
"retry_in": delay,
"attempt": _attempt + 1,
"max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
}
await asyncio.sleep(delay)
# Re-send the query before retrying receive_response
if attachments and len(attachments) > 0:
content_blocks_retry: list[dict[str, Any]] = []
if message:
content_blocks_retry.append({"type": "text", "text": message})
for att in attachments:
content_blocks_retry.append({
"type": "image",
"source": {
"type": "base64",
"media_type": att.mimeType,
"data": att.base64Data,
}
})
await self.client.query(make_multimodal_message(content_blocks_retry))
else:
await self.client.query(message)
continue
if is_rate_limit:
logger.error("Rate limit retries exhausted for expand chat")
yield {"type": "error", "content": "Rate limited. Please try again later."}
return
# Non-rate-limit MessageParseError: log and break (don't crash)
if type(exc).__name__ == "MessageParseError":
logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
break
raise
def get_features_created(self) -> int:
"""Get the total number of features created in this session."""

View File

@@ -6,6 +6,7 @@ Manages interactive spec creation conversation with Claude.
Uses the create-spec.md skill to guide users through app spec creation.
"""
import asyncio
import json
import logging
import os
@@ -19,7 +20,13 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
from dotenv import load_dotenv
from ..schemas import ImageAttachment
from .chat_constants import ROOT_DIR, make_multimodal_message
from .chat_constants import (
MAX_CHAT_RATE_LIMIT_RETRIES,
ROOT_DIR,
calculate_rate_limit_backoff,
check_rate_limit_error,
make_multimodal_message,
)
# Load environment variables from .env file if present
load_dotenv()
@@ -304,7 +311,9 @@ class SpecChatSession:
# Store paths for the completion message
spec_path = None
# Stream the response using receive_response
# Stream the response using receive_response (with rate-limit retry)
for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
try:
async for msg in self.client.receive_response():
msg_type = type(msg).__name__
@@ -415,6 +424,32 @@ class SpecChatSession:
"type": "spec_complete",
"path": str(spec_path)
}
# Completed successfully — break out of retry loop
break
except Exception as exc:
is_rate_limit, retry_secs = check_rate_limit_error(exc)
if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
yield {
"type": "rate_limited",
"retry_in": delay,
"attempt": _attempt + 1,
"max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
}
await asyncio.sleep(delay)
# Re-send the query before retrying receive_response
await self.client.query(message)
continue
if is_rate_limit:
logger.error("Rate limit retries exhausted for spec chat")
yield {"type": "error", "content": "Rate limited. Please try again later."}
return
# Non-rate-limit MessageParseError: log and break (don't crash)
if type(exc).__name__ == "MessageParseError":
logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
break
raise
def is_complete(self) -> bool:
"""Check if spec creation is complete."""

View File

@@ -269,6 +269,20 @@ export function useAssistantChat({
break;
}
case "rate_limited": {
// Show rate limit info as system message
setMessages((prev) => [
...prev,
{
id: generateId(),
role: "system",
content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
timestamp: new Date(),
},
]);
break;
}
case "pong": {
// Keep-alive response, nothing to do
break;

View File

@@ -226,6 +226,20 @@ export function useExpandChat({
break
}
case 'rate_limited': {
// Show rate limit info as system message
setMessages((prev) => [
...prev,
{
id: generateId(),
role: 'system',
content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
timestamp: new Date(),
},
])
break
}
case 'pong': {
// Keep-alive response, nothing to do
break

View File

@@ -322,6 +322,20 @@ export function useSpecChat({
break
}
case 'rate_limited': {
// Show rate limit info as system message
setMessages((prev) => [
...prev,
{
id: generateId(),
role: 'system',
content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
timestamp: new Date(),
},
])
break
}
case 'pong': {
// Keep-alive response, nothing to do
break

View File

@@ -407,6 +407,13 @@ export interface SpecChatResponseDoneMessage {
type: 'response_done'
}
export interface SpecChatRateLimitedMessage {
type: 'rate_limited'
retry_in: number
attempt: number
max_attempts: number
}
export type SpecChatServerMessage =
| SpecChatTextMessage
| SpecChatQuestionMessage
@@ -416,6 +423,7 @@ export type SpecChatServerMessage =
| SpecChatErrorMessage
| SpecChatPongMessage
| SpecChatResponseDoneMessage
| SpecChatRateLimitedMessage
// Image attachment for chat messages
export interface ImageAttachment {
@@ -501,6 +509,13 @@ export interface AssistantChatPongMessage {
type: 'pong'
}
export interface AssistantChatRateLimitedMessage {
type: 'rate_limited'
retry_in: number
attempt: number
max_attempts: number
}
export type AssistantChatServerMessage =
| AssistantChatTextMessage
| AssistantChatToolCallMessage
@@ -509,6 +524,7 @@ export type AssistantChatServerMessage =
| AssistantChatErrorMessage
| AssistantChatConversationCreatedMessage
| AssistantChatPongMessage
| AssistantChatRateLimitedMessage
// ============================================================================
// Expand Chat Types
@@ -532,6 +548,7 @@ export type ExpandChatServerMessage =
| SpecChatErrorMessage // Reuse error message type
| SpecChatPongMessage // Reuse pong message type
| SpecChatResponseDoneMessage // Reuse response_done type
| SpecChatRateLimitedMessage // Reuse rate_limited message type
// Bulk feature creation
export interface FeatureBulkCreate {