mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-03-17 02:43:09 +00:00
fix: resolve false-positive rate limit and one-message-behind in chat sessions
The Claude Code CLI v2.1.45+ emits a `rate_limit_event` message type that the Python SDK v0.1.19 cannot parse, raising MessageParseError. Two bugs resulted: 1. **False-positive rate limit**: check_rate_limit_error() matched "rate_limit" in the exception string "Unknown message type: rate_limit_event" via both an explicit type check and a regex fallback, triggering 15-19s backoff + query re-send on every session. 2. **One-message-behind**: The MessageParseError killed the receive_response() async generator, but the CLI subprocess was still alive with buffered response data. Catching and returning meant the response was never consumed. The next send_message() would read the previous response first, creating a one-behind offset. Changes: - chat_constants.py: check_rate_limit_error() now returns (False, None) for any MessageParseError, blocking both false-positive paths. Added safe_receive_response() helper that retries receive_response() on MessageParseError — the SDK's decoupled producer/consumer architecture (anyio memory channel) allows the new generator to continue reading remaining messages without data loss. Removed calculate_rate_limit_backoff re-export and MAX_CHAT_RATE_LIMIT_RETRIES constant. - spec_chat_session.py, assistant_chat_session.py, expand_chat_session.py: Replaced retry-with-backoff loops with safe_receive_response() wrapper. Removed asyncio.sleep backoff, query re-send, and rate_limited yield. Cleaned up unused imports (asyncio, calculate_rate_limit_backoff, MAX_CHAT_RATE_LIMIT_RETRIES). - agent.py: Added inner retry loop around receive_response() with same MessageParseError skip-and-restart pattern. Removed early-return that truncated responses. - types.ts: Removed SpecChatRateLimitedMessage, AssistantChatRateLimitedMessage, and their union entries. - useSpecChat.ts, useAssistantChat.ts, useExpandChat.ts: Removed dead 'rate_limited' case handlers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,6 @@ The assistant can answer questions about the codebase and features
|
||||
but cannot modify any files.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -27,10 +26,9 @@ from .assistant_database import (
|
||||
get_messages,
|
||||
)
|
||||
from .chat_constants import (
|
||||
MAX_CHAT_RATE_LIMIT_RETRIES,
|
||||
ROOT_DIR,
|
||||
calculate_rate_limit_backoff,
|
||||
check_rate_limit_error,
|
||||
safe_receive_response,
|
||||
)
|
||||
|
||||
# Load environment variables from .env file if present
|
||||
@@ -399,66 +397,47 @@ class AssistantChatSession:
|
||||
|
||||
full_response = ""
|
||||
|
||||
# Stream the response (with rate-limit retry)
|
||||
for _attempt in range(MAX_CHAT_RATE_LIMIT_RETRIES + 1):
|
||||
try:
|
||||
async for msg in self.client.receive_response():
|
||||
msg_type = type(msg).__name__
|
||||
# Stream the response
|
||||
try:
|
||||
async for msg in safe_receive_response(self.client, logger):
|
||||
msg_type = type(msg).__name__
|
||||
|
||||
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
if msg_type == "AssistantMessage" and hasattr(msg, "content"):
|
||||
for block in msg.content:
|
||||
block_type = type(block).__name__
|
||||
|
||||
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||
text = block.text
|
||||
if text:
|
||||
full_response += text
|
||||
yield {"type": "text", "content": text}
|
||||
if block_type == "TextBlock" and hasattr(block, "text"):
|
||||
text = block.text
|
||||
if text:
|
||||
full_response += text
|
||||
yield {"type": "text", "content": text}
|
||||
|
||||
elif block_type == "ToolUseBlock" and hasattr(block, "name"):
|
||||
tool_name = block.name
|
||||
tool_input = getattr(block, "input", {})
|
||||
elif block_type == "ToolUseBlock" and hasattr(block, "name"):
|
||||
tool_name = block.name
|
||||
tool_input = getattr(block, "input", {})
|
||||
|
||||
# Intercept ask_user tool calls -> yield as question message
|
||||
if tool_name == "mcp__features__ask_user":
|
||||
questions = tool_input.get("questions", [])
|
||||
if questions:
|
||||
yield {
|
||||
"type": "question",
|
||||
"questions": questions,
|
||||
}
|
||||
continue
|
||||
# Intercept ask_user tool calls -> yield as question message
|
||||
if tool_name == "mcp__features__ask_user":
|
||||
questions = tool_input.get("questions", [])
|
||||
if questions:
|
||||
yield {
|
||||
"type": "question",
|
||||
"questions": questions,
|
||||
}
|
||||
continue
|
||||
|
||||
yield {
|
||||
"type": "tool_call",
|
||||
"tool": tool_name,
|
||||
"input": tool_input,
|
||||
}
|
||||
# Completed successfully — break out of retry loop
|
||||
break
|
||||
except Exception as exc:
|
||||
is_rate_limit, retry_secs = check_rate_limit_error(exc)
|
||||
if is_rate_limit and _attempt < MAX_CHAT_RATE_LIMIT_RETRIES:
|
||||
delay = retry_secs if retry_secs else calculate_rate_limit_backoff(_attempt)
|
||||
logger.warning(f"Rate limited (attempt {_attempt + 1}/{MAX_CHAT_RATE_LIMIT_RETRIES}), retrying in {delay}s")
|
||||
yield {
|
||||
"type": "rate_limited",
|
||||
"retry_in": delay,
|
||||
"attempt": _attempt + 1,
|
||||
"max_attempts": MAX_CHAT_RATE_LIMIT_RETRIES,
|
||||
}
|
||||
await asyncio.sleep(delay)
|
||||
await self.client.query(message)
|
||||
continue
|
||||
if is_rate_limit:
|
||||
logger.error("Rate limit retries exhausted for assistant chat")
|
||||
yield {"type": "error", "content": "Rate limited. Please try again later."}
|
||||
return
|
||||
# Non-rate-limit MessageParseError: log and break (don't crash)
|
||||
if type(exc).__name__ == "MessageParseError":
|
||||
logger.warning(f"Ignoring unrecognized message from Claude CLI: {exc}")
|
||||
break
|
||||
raise
|
||||
yield {
|
||||
"type": "tool_call",
|
||||
"tool": tool_name,
|
||||
"input": tool_input,
|
||||
}
|
||||
except Exception as exc:
|
||||
is_rate_limit, _ = check_rate_limit_error(exc)
|
||||
if is_rate_limit:
|
||||
logger.warning(f"Rate limited: {exc}")
|
||||
yield {"type": "error", "content": "Rate limited. Please try again later."}
|
||||
return
|
||||
raise
|
||||
|
||||
# Store the complete response in the database
|
||||
if full_response and self.conversation_id:
|
||||
|
||||
Reference in New Issue
Block a user