mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-03-16 18:33:08 +00:00
fix: handle rate_limit_event crash in chat sessions
The Claude CLI sends `rate_limit_event` messages that the SDK's `parse_message()` doesn't recognize, raising `MessageParseError` and crashing all three chat session types (spec, assistant, expand). Changes: - Bump claude-agent-sdk minimum from 0.1.0 to 0.1.39 - Add `check_rate_limit_error()` helper in chat_constants.py that detects rate limits from both MessageParseError data payloads and error message text patterns - Wrap `receive_response()` loops in all three `_query_claude()` methods with retry-on-rate-limit logic (up to 3 retries with backoff) - Gracefully log and skip non-rate-limit MessageParseError instead of crashing the session - Add `rate_limited` message type to frontend TypeScript types and handle it in useSpecChat, useAssistantChat, useExpandChat hooks to show "Rate limited. Retrying in Xs..." system messages Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -13,5 +13,6 @@
|
||||
"runtimeArgs": ["/c", "cd ui && npx vite"],
|
||||
"port": 5173
|
||||
}
|
||||
]
|
||||
],
|
||||
"autoVerify": true
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Production runtime dependencies only
|
||||
# For development, use requirements.txt (includes ruff, mypy, pytest)
|
||||
claude-agent-sdk>=0.1.0,<0.2.0
|
||||
claude-agent-sdk>=0.1.39,<0.2.0
|
||||
python-dotenv>=1.0.0
|
||||
sqlalchemy>=2.0.0
|
||||
fastapi>=0.115.0
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
claude-agent-sdk>=0.1.0,<0.2.0
|
||||
claude-agent-sdk>=0.1.39,<0.2.0
|
||||
python-dotenv>=1.0.0
|
||||
sqlalchemy>=2.0.0
|
||||
fastapi>=0.115.0
|
||||
|
||||
@@ -7,6 +7,7 @@ The assistant can answer questions about the codebase and features
|
||||
but cannot modify any files.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -25,7 +26,12 @@ from .assistant_database import (
|
||||
create_conversation,
|
||||
get_messages,
|
||||
)
|
||||
from .chat_constants import ROOT_DIR
|
||||
from .chat_constants import (
|
||||
MAX_CHAT_RATE_LIMIT_RETRIES,
|
||||
ROOT_DIR,
|
||||
calculate_rate_limit_backoff,
|
||||
check_rate_limit_error,
|
||||
)
|
||||
|
||||
# Load environment variables from .env file if present
|
||||
load_dotenv()
|
||||
@@ -393,39 +399,66 @@ class AssistantChatSession:
|
||||
|
||||
full_response = ""
|
||||
|
||||
# Stream the response
|
||||
async for msg in self.client.receive_response():
|
||||
msg_type = type(msg).__name__
|
||||
# Stream the response (with rate-limit retry)
|
||||
for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
|
||||
try:
|
||||
async for msg in self.client.receive_response():
|
||||
msg_type = type(msg).__name__
|
||||
|
||||
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
|
||||
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||
text = block.text
|
||||
if text:
|
||||
full_response += text
|
||||
yield {"type": "text", "content": text}
|
||||
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||
text = block.text
|
||||
if text:
|
||||
full_response += text
|
||||
yield {"type": "text", "content": text}
|
||||
|
||||
elif block_type == "ToolUseBlock" and hasattr(block, "name"):
|
||||
tool_name = block.name
|
||||
tool_input = getattr(block, "input", {})
|
||||
elif block_type == "ToolUseBlock" and hasattr(block, "name"):
|
||||
tool_name = block.name
|
||||
tool_input = getattr(block, "input", {})
|
||||
|
||||
# Intercept ask_user tool calls -> yield as question message
|
||||
if tool_name == "mcp__features__ask_user":
|
||||
questions = tool_input.get("questions", [])
|
||||
if questions:
|
||||
yield {
|
||||
"type": "question",
|
||||
"questions": questions,
|
||||
}
|
||||
continue
|
||||
|
||||
# Intercept ask_user tool calls -> yield as question message
|
||||
if tool_name == "mcp__features__ask_user":
|
||||
questions = tool_input.get("questions", [])
|
||||
if questions:
|
||||
yield {
|
||||
"type": "question",
|
||||
"questions": questions,
|
||||
"type": "tool_call",
|
||||
"tool": tool_name,
|
||||
"input": tool_input,
|
||||
}
|
||||
continue
|
||||
|
||||
yield {
|
||||
"type": "tool_call",
|
||||
"tool": tool_name,
|
||||
"input": tool_input,
|
||||
}
|
||||
# Completed successfully — break out of retry loop
|
||||
break
|
||||
except Exception as exc:
|
||||
is_rate_limit, retry_secs = check_rate_limit_error(exc)
|
||||
if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
|
||||
delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
|
||||
logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
|
||||
yield {
|
||||
"type": "rate_limited",
|
||||
"retry_in": delay,
|
||||
"attempt": _attempt + 1,
|
||||
"max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
|
||||
}
|
||||
await asyncio.sleep(delay)
|
||||
await self.client.query(message)
|
||||
continue
|
||||
if is_rate_limit:
|
||||
logger.error("Rate limit retries exhausted for assistant chat")
|
||||
yield {"type": "error", "content": "Rate limited. Please try again later."}
|
||||
return
|
||||
# Non-rate-limit MessageParseError: log and break (don't crash)
|
||||
if type(exc).__name__ == "MessageParseError":
|
||||
logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
|
||||
break
|
||||
raise
|
||||
|
||||
# Store the complete response in the database
|
||||
if full_response and self.conversation_id:
|
||||
|
||||
@@ -9,6 +9,7 @@ project root and is re-exported here for convenience so that existing
|
||||
imports (``from .chat_constants import API_ENV_VARS``) continue to work.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import AsyncGenerator
|
||||
@@ -32,6 +33,45 @@ if _root_str not in sys.path:
|
||||
# imports continue to work unchanged.
|
||||
# -------------------------------------------------------------------
|
||||
from env_constants import API_ENV_VARS # noqa: E402, F401
|
||||
from rate_limit_utils import calculate_rate_limit_backoff, is_rate_limit_error, parse_retry_after # noqa: E402, F401
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Rate-limit handling for chat sessions
|
||||
# -------------------------------------------------------------------
|
||||
MAX_CHAT_RATE_LIMIT_RETRIES = 3
|
||||
|
||||
|
||||
def check_rate_limit_error(exc: Exception) -> tuple[bool, int | None]:
|
||||
"""Inspect an exception and determine if it represents a rate-limit.
|
||||
|
||||
Returns ``(is_rate_limit, retry_seconds)``. ``retry_seconds`` is the
|
||||
parsed Retry-After value when available, otherwise ``None`` (caller
|
||||
should use exponential backoff).
|
||||
|
||||
Handles:
|
||||
- ``MessageParseError`` whose raw *data* dict has
|
||||
``type == "rate_limit_event"`` (Claude CLI sends this).
|
||||
- Any exception whose string representation matches known rate-limit
|
||||
patterns (via ``rate_limit_utils.is_rate_limit_error``).
|
||||
"""
|
||||
exc_str = str(exc)
|
||||
|
||||
# Check for MessageParseError with a rate_limit_event payload
|
||||
cls_name = type(exc).__name__
|
||||
if cls_name == "MessageParseError":
|
||||
raw_data = getattr(exc, "data", None)
|
||||
if isinstance(raw_data, dict) and raw_data.get("type") == "rate_limit_event":
|
||||
retry = parse_retry_after(str(raw_data)) if raw_data else None
|
||||
return True, retry
|
||||
|
||||
# Fallback: match error text against known rate-limit patterns
|
||||
if is_rate_limit_error(exc_str):
|
||||
retry = parse_retry_after(exc_str)
|
||||
return True, retry
|
||||
|
||||
return False, None
|
||||
|
||||
|
||||
async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
|
||||
|
||||
@@ -22,7 +22,13 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from ..schemas import ImageAttachment
|
||||
from .chat_constants import ROOT_DIR, make_multimodal_message
|
||||
from .chat_constants import (
|
||||
MAX_CHAT_RATE_LIMIT_RETRIES,
|
||||
ROOT_DIR,
|
||||
calculate_rate_limit_backoff,
|
||||
check_rate_limit_error,
|
||||
make_multimodal_message,
|
||||
)
|
||||
|
||||
# Load environment variables from .env file if present
|
||||
load_dotenv()
|
||||
@@ -298,24 +304,67 @@ class ExpandChatSession:
|
||||
else:
|
||||
await self.client.query(message)
|
||||
|
||||
# Stream the response
|
||||
async for msg in self.client.receive_response():
|
||||
msg_type = type(msg).__name__
|
||||
# Stream the response (with rate-limit retry)
|
||||
for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
|
||||
try:
|
||||
async for msg in self.client.receive_response():
|
||||
msg_type = type(msg).__name__
|
||||
|
||||
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
|
||||
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||
text = block.text
|
||||
if text:
|
||||
yield {"type": "text", "content": text}
|
||||
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||
text = block.text
|
||||
if text:
|
||||
yield {"type": "text", "content": text}
|
||||
|
||||
self.messages.append({
|
||||
"role": "assistant",
|
||||
"content": text,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
self.messages.append({
|
||||
"role": "assistant",
|
||||
"content": text,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
})
|
||||
# Completed successfully — break out of retry loop
|
||||
break
|
||||
except Exception as exc:
|
||||
is_rate_limit, retry_secs = check_rate_limit_error(exc)
|
||||
if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
|
||||
delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
|
||||
logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
|
||||
yield {
|
||||
"type": "rate_limited",
|
||||
"retry_in": delay,
|
||||
"attempt": _attempt + 1,
|
||||
"max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
|
||||
}
|
||||
await asyncio.sleep(delay)
|
||||
# Re-send the query before retrying receive_response
|
||||
if attachments and len(attachments) > 0:
|
||||
content_blocks_retry: list[dict[str, Any]] = []
|
||||
if message:
|
||||
content_blocks_retry.append({"type": "text", "text": message})
|
||||
for att in attachments:
|
||||
content_blocks_retry.append({
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": att.mimeType,
|
||||
"data": att.base64Data,
|
||||
}
|
||||
})
|
||||
await self.client.query(make_multimodal_message(content_blocks_retry))
|
||||
else:
|
||||
await self.client.query(message)
|
||||
continue
|
||||
if is_rate_limit:
|
||||
logger.error("Rate limit retries exhausted for expand chat")
|
||||
yield {"type": "error", "content": "Rate limited. Please try again later."}
|
||||
return
|
||||
# Non-rate-limit MessageParseError: log and break (don't crash)
|
||||
if type(exc).__name__ == "MessageParseError":
|
||||
logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
|
||||
break
|
||||
raise
|
||||
|
||||
def get_features_created(self) -> int:
|
||||
"""Get the total number of features created in this session."""
|
||||
|
||||
@@ -6,6 +6,7 @@ Manages interactive spec creation conversation with Claude.
|
||||
Uses the create-spec.md skill to guide users through app spec creation.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -19,7 +20,13 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from ..schemas import ImageAttachment
|
||||
from .chat_constants import ROOT_DIR, make_multimodal_message
|
||||
from .chat_constants import (
|
||||
MAX_CHAT_RATE_LIMIT_RETRIES,
|
||||
ROOT_DIR,
|
||||
calculate_rate_limit_backoff,
|
||||
check_rate_limit_error,
|
||||
make_multimodal_message,
|
||||
)
|
||||
|
||||
# Load environment variables from .env file if present
|
||||
load_dotenv()
|
||||
@@ -304,117 +311,145 @@ class SpecChatSession:
|
||||
# Store paths for the completion message
|
||||
spec_path = None
|
||||
|
||||
# Stream the response using receive_response
|
||||
async for msg in self.client.receive_response():
|
||||
msg_type = type(msg).__name__
|
||||
# Stream the response using receive_response (with rate-limit retry)
|
||||
for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
|
||||
try:
|
||||
async for msg in self.client.receive_response():
|
||||
msg_type = type(msg).__name__
|
||||
|
||||
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||
# Process content blocks in the assistant message
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||
# Process content blocks in the assistant message
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
|
||||
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||
# Accumulate text and yield it
|
||||
text = block.text
|
||||
if text:
|
||||
current_text += text
|
||||
yield {"type": "text", "content": text}
|
||||
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||
# Accumulate text and yield it
|
||||
text = block.text
|
||||
if text:
|
||||
current_text += text
|
||||
yield {"type": "text", "content": text}
|
||||
|
||||
# Store in message history
|
||||
self.messages.append({
|
||||
"role": "assistant",
|
||||
"content": text,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
})
|
||||
# Store in message history
|
||||
self.messages.append({
|
||||
"role": "assistant",
|
||||
"content": text,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
})
|
||||
|
||||
elif block_type == "ToolUseBlock" and hasattr(block, "name"):
|
||||
tool_name = block.name
|
||||
tool_input = getattr(block, "input", {})
|
||||
tool_id = getattr(block, "id", "")
|
||||
elif block_type == "ToolUseBlock" and hasattr(block, "name"):
|
||||
tool_name = block.name
|
||||
tool_input = getattr(block, "input", {})
|
||||
tool_id = getattr(block, "id", "")
|
||||
|
||||
if tool_name in ("Write", "Edit"):
|
||||
# File being written or edited - track for verification
|
||||
file_path = tool_input.get("file_path", "")
|
||||
if tool_name in ("Write", "Edit"):
|
||||
# File being written or edited - track for verification
|
||||
file_path = tool_input.get("file_path", "")
|
||||
|
||||
# Track app_spec.txt
|
||||
if "app_spec.txt" in str(file_path):
|
||||
pending_writes["app_spec"] = {
|
||||
"tool_id": tool_id,
|
||||
"path": file_path
|
||||
}
|
||||
logger.info(f"{tool_name} tool called for app_spec.txt: {file_path}")
|
||||
# Track app_spec.txt
|
||||
if "app_spec.txt" in str(file_path):
|
||||
pending_writes["app_spec"] = {
|
||||
"tool_id": tool_id,
|
||||
"path": file_path
|
||||
}
|
||||
logger.info(f"{tool_name} tool called for app_spec.txt: {file_path}")
|
||||
|
||||
# Track initializer_prompt.md
|
||||
elif "initializer_prompt.md" in str(file_path):
|
||||
pending_writes["initializer"] = {
|
||||
"tool_id": tool_id,
|
||||
"path": file_path
|
||||
}
|
||||
logger.info(f"{tool_name} tool called for initializer_prompt.md: {file_path}")
|
||||
# Track initializer_prompt.md
|
||||
elif "initializer_prompt.md" in str(file_path):
|
||||
pending_writes["initializer"] = {
|
||||
"tool_id": tool_id,
|
||||
"path": file_path
|
||||
}
|
||||
logger.info(f"{tool_name} tool called for initializer_prompt.md: {file_path}")
|
||||
|
||||
elif msg_type == "UserMessage" and hasattr(msg, "content"):
|
||||
# Tool results - check for write confirmations and errors
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
if block_type == "ToolResultBlock":
|
||||
is_error = getattr(block, "is_error", False)
|
||||
tool_use_id = getattr(block, "tool_use_id", "")
|
||||
elif msg_type == "UserMessage" and hasattr(msg, "content"):
|
||||
# Tool results - check for write confirmations and errors
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
if block_type == "ToolResultBlock":
|
||||
is_error = getattr(block, "is_error", False)
|
||||
tool_use_id = getattr(block, "tool_use_id", "")
|
||||
|
||||
if is_error:
|
||||
content = getattr(block, "content", "Unknown error")
|
||||
logger.warning(f"Tool error: {content}")
|
||||
# Clear any pending writes that failed
|
||||
for key in pending_writes:
|
||||
pending_write = pending_writes[key]
|
||||
if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
|
||||
logger.error(f"{key} write failed: {content}")
|
||||
pending_writes[key] = None
|
||||
else:
|
||||
# Tool succeeded - check which file was written
|
||||
|
||||
# Check app_spec.txt
|
||||
if pending_writes["app_spec"] and tool_use_id == pending_writes["app_spec"].get("tool_id"):
|
||||
file_path = pending_writes["app_spec"]["path"]
|
||||
full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
|
||||
if full_path.exists():
|
||||
logger.info(f"app_spec.txt verified at: {full_path}")
|
||||
files_written["app_spec"] = True
|
||||
spec_path = file_path
|
||||
|
||||
# Notify about file write (but NOT completion yet)
|
||||
yield {
|
||||
"type": "file_written",
|
||||
"path": str(file_path)
|
||||
}
|
||||
if is_error:
|
||||
content = getattr(block, "content", "Unknown error")
|
||||
logger.warning(f"Tool error: {content}")
|
||||
# Clear any pending writes that failed
|
||||
for key in pending_writes:
|
||||
pending_write = pending_writes[key]
|
||||
if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
|
||||
logger.error(f"{key} write failed: {content}")
|
||||
pending_writes[key] = None
|
||||
else:
|
||||
logger.error(f"app_spec.txt not found after write: {full_path}")
|
||||
pending_writes["app_spec"] = None
|
||||
# Tool succeeded - check which file was written
|
||||
|
||||
# Check initializer_prompt.md
|
||||
if pending_writes["initializer"] and tool_use_id == pending_writes["initializer"].get("tool_id"):
|
||||
file_path = pending_writes["initializer"]["path"]
|
||||
full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
|
||||
if full_path.exists():
|
||||
logger.info(f"initializer_prompt.md verified at: {full_path}")
|
||||
files_written["initializer"] = True
|
||||
# Check app_spec.txt
|
||||
if pending_writes["app_spec"] and tool_use_id == pending_writes["app_spec"].get("tool_id"):
|
||||
file_path = pending_writes["app_spec"]["path"]
|
||||
full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
|
||||
if full_path.exists():
|
||||
logger.info(f"app_spec.txt verified at: {full_path}")
|
||||
files_written["app_spec"] = True
|
||||
spec_path = file_path
|
||||
|
||||
# Notify about file write
|
||||
yield {
|
||||
"type": "file_written",
|
||||
"path": str(file_path)
|
||||
}
|
||||
else:
|
||||
logger.error(f"initializer_prompt.md not found after write: {full_path}")
|
||||
pending_writes["initializer"] = None
|
||||
# Notify about file write (but NOT completion yet)
|
||||
yield {
|
||||
"type": "file_written",
|
||||
"path": str(file_path)
|
||||
}
|
||||
else:
|
||||
logger.error(f"app_spec.txt not found after write: {full_path}")
|
||||
pending_writes["app_spec"] = None
|
||||
|
||||
# Check if BOTH files are now written - only then signal completion
|
||||
if files_written["app_spec"] and files_written["initializer"]:
|
||||
logger.info("Both app_spec.txt and initializer_prompt.md verified - signaling completion")
|
||||
self.complete = True
|
||||
yield {
|
||||
"type": "spec_complete",
|
||||
"path": str(spec_path)
|
||||
}
|
||||
# Check initializer_prompt.md
|
||||
if pending_writes["initializer"] and tool_use_id == pending_writes["initializer"].get("tool_id"):
|
||||
file_path = pending_writes["initializer"]["path"]
|
||||
full_path = Path(file_path) if Path(file_path).is_absolute() else self.project_dir / file_path
|
||||
if full_path.exists():
|
||||
logger.info(f"initializer_prompt.md verified at: {full_path}")
|
||||
files_written["initializer"] = True
|
||||
|
||||
# Notify about file write
|
||||
yield {
|
||||
"type": "file_written",
|
||||
"path": str(file_path)
|
||||
}
|
||||
else:
|
||||
logger.error(f"initializer_prompt.md not found after write: {full_path}")
|
||||
pending_writes["initializer"] = None
|
||||
|
||||
# Check if BOTH files are now written - only then signal completion
|
||||
if files_written["app_spec"] and files_written["initializer"]:
|
||||
logger.info("Both app_spec.txt and initializer_prompt.md verified - signaling completion")
|
||||
self.complete = True
|
||||
yield {
|
||||
"type": "spec_complete",
|
||||
"path": str(spec_path)
|
||||
}
|
||||
# Completed successfully — break out of retry loop
|
||||
break
|
||||
except Exception as exc:
|
||||
is_rate_limit, retry_secs = check_rate_limit_error(exc)
|
||||
if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
|
||||
delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
|
||||
logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
|
||||
yield {
|
||||
"type": "rate_limited",
|
||||
"retry_in": delay,
|
||||
"attempt": _attempt + 1,
|
||||
"max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
|
||||
}
|
||||
await asyncio.sleep(delay)
|
||||
# Re-send the query before retrying receive_response
|
||||
await self.client.query(message)
|
||||
continue
|
||||
if is_rate_limit:
|
||||
logger.error("Rate limit retries exhausted for spec chat")
|
||||
yield {"type": "error", "content": "Rate limited. Please try again later."}
|
||||
return
|
||||
# Non-rate-limit MessageParseError: log and break (don't crash)
|
||||
if type(exc).__name__ == "MessageParseError":
|
||||
logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
|
||||
break
|
||||
raise
|
||||
|
||||
def is_complete(self) -> bool:
|
||||
"""Check if spec creation is complete."""
|
||||
|
||||
@@ -269,6 +269,20 @@ export function useAssistantChat({
|
||||
break;
|
||||
}
|
||||
|
||||
case "rate_limited": {
|
||||
// Show rate limit info as system message
|
||||
setMessages((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id: generateId(),
|
||||
role: "system",
|
||||
content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
]);
|
||||
break;
|
||||
}
|
||||
|
||||
case "pong": {
|
||||
// Keep-alive response, nothing to do
|
||||
break;
|
||||
|
||||
@@ -226,6 +226,20 @@ export function useExpandChat({
|
||||
break
|
||||
}
|
||||
|
||||
case 'rate_limited': {
|
||||
// Show rate limit info as system message
|
||||
setMessages((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id: generateId(),
|
||||
role: 'system',
|
||||
content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
])
|
||||
break
|
||||
}
|
||||
|
||||
case 'pong': {
|
||||
// Keep-alive response, nothing to do
|
||||
break
|
||||
|
||||
@@ -322,6 +322,20 @@ export function useSpecChat({
|
||||
break
|
||||
}
|
||||
|
||||
case 'rate_limited': {
|
||||
// Show rate limit info as system message
|
||||
setMessages((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id: generateId(),
|
||||
role: 'system',
|
||||
content: `Rate limited. Retrying in ${data.retry_in}s... (attempt ${data.attempt}/${data.max_attempts})`,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
])
|
||||
break
|
||||
}
|
||||
|
||||
case 'pong': {
|
||||
// Keep-alive response, nothing to do
|
||||
break
|
||||
|
||||
@@ -407,6 +407,13 @@ export interface SpecChatResponseDoneMessage {
|
||||
type: 'response_done'
|
||||
}
|
||||
|
||||
export interface SpecChatRateLimitedMessage {
|
||||
type: 'rate_limited'
|
||||
retry_in: number
|
||||
attempt: number
|
||||
max_attempts: number
|
||||
}
|
||||
|
||||
export type SpecChatServerMessage =
|
||||
| SpecChatTextMessage
|
||||
| SpecChatQuestionMessage
|
||||
@@ -416,6 +423,7 @@ export type SpecChatServerMessage =
|
||||
| SpecChatErrorMessage
|
||||
| SpecChatPongMessage
|
||||
| SpecChatResponseDoneMessage
|
||||
| SpecChatRateLimitedMessage
|
||||
|
||||
// Image attachment for chat messages
|
||||
export interface ImageAttachment {
|
||||
@@ -501,6 +509,13 @@ export interface AssistantChatPongMessage {
|
||||
type: 'pong'
|
||||
}
|
||||
|
||||
export interface AssistantChatRateLimitedMessage {
|
||||
type: 'rate_limited'
|
||||
retry_in: number
|
||||
attempt: number
|
||||
max_attempts: number
|
||||
}
|
||||
|
||||
export type AssistantChatServerMessage =
|
||||
| AssistantChatTextMessage
|
||||
| AssistantChatToolCallMessage
|
||||
@@ -509,6 +524,7 @@ export type AssistantChatServerMessage =
|
||||
| AssistantChatErrorMessage
|
||||
| AssistantChatConversationCreatedMessage
|
||||
| AssistantChatPongMessage
|
||||
| AssistantChatRateLimitedMessage
|
||||
|
||||
// ============================================================================
|
||||
// Expand Chat Types
|
||||
@@ -532,6 +548,7 @@ export type ExpandChatServerMessage =
|
||||
| SpecChatErrorMessage // Reuse error message type
|
||||
| SpecChatPongMessage // Reuse pong message type
|
||||
| SpecChatResponseDoneMessage // Reuse response_done type
|
||||
| SpecChatRateLimitedMessage // Reuse rate_limited message type
|
||||
|
||||
// Bulk feature creation
|
||||
export interface FeatureBulkCreate {
|
||||
|
||||
Reference in New Issue
Block a user