mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-02-01 15:03:36 +00:00
fix: improve rate limit handling with exponential backoff
When Claude API hits rate limits via HTTP 429 exceptions (rather than response text), the agent now properly detects and handles them: - Add RATE_LIMIT_PATTERNS constant for comprehensive detection - Add parse_retry_after() to extract wait times from error messages - Add is_rate_limit_error() helper for pattern matching - Return new "rate_limit" status from exception handler - Implement exponential backoff: 60s → 120s → 240s... (max 1 hour) - Improve generic error backoff: 30s → 60s → 90s... (max 5 minutes) - Expand text-based detection patterns in response handling - Add unit tests for new functions Fixes #41 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
100
agent.py
100
agent.py
@@ -35,6 +35,59 @@ from prompts import (
|
||||
# Configuration
|
||||
AUTO_CONTINUE_DELAY_SECONDS = 3
|
||||
|
||||
# Rate limit detection patterns (used in both exception messages and response text)
|
||||
RATE_LIMIT_PATTERNS = [
|
||||
"limit reached",
|
||||
"rate limit",
|
||||
"rate_limit",
|
||||
"too many requests",
|
||||
"quota exceeded",
|
||||
"please wait",
|
||||
"try again later",
|
||||
"429",
|
||||
"overloaded",
|
||||
]
|
||||
|
||||
|
||||
def parse_retry_after(error_message: str) -> Optional[int]:
|
||||
"""
|
||||
Extract retry-after seconds from various error message formats.
|
||||
|
||||
Returns seconds to wait, or None if not parseable.
|
||||
"""
|
||||
# Common patterns:
|
||||
# "retry after 60 seconds"
|
||||
# "Retry-After: 120"
|
||||
# "try again in 5 seconds"
|
||||
# "30 seconds remaining"
|
||||
|
||||
patterns = [
|
||||
r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?",
|
||||
r"try again in\s+(\d+)\s*(?:seconds?|s\b)",
|
||||
r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, error_message, re.IGNORECASE)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_rate_limit_error(error_message: str) -> bool:
|
||||
"""
|
||||
Detect if an error message indicates a rate limit.
|
||||
|
||||
Args:
|
||||
error_message: The error message to check
|
||||
|
||||
Returns:
|
||||
True if the error appears to be rate-limit related
|
||||
"""
|
||||
error_lower = error_message.lower()
|
||||
return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS)
|
||||
|
||||
|
||||
async def run_agent_session(
|
||||
client: ClaudeSDKClient,
|
||||
@@ -106,8 +159,19 @@ async def run_agent_session(
|
||||
return "continue", response_text
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error during agent session: {e}")
|
||||
return "error", str(e)
|
||||
error_str = str(e)
|
||||
print(f"Error during agent session: {error_str}")
|
||||
|
||||
# Detect rate limit errors from exception message
|
||||
if is_rate_limit_error(error_str):
|
||||
# Try to extract retry-after time from error
|
||||
retry_seconds = parse_retry_after(error_str)
|
||||
if retry_seconds:
|
||||
return "rate_limit", str(retry_seconds)
|
||||
else:
|
||||
return "rate_limit", "unknown"
|
||||
|
||||
return "error", error_str
|
||||
|
||||
|
||||
async def run_autonomous_agent(
|
||||
@@ -183,6 +247,8 @@ async def run_autonomous_agent(
|
||||
|
||||
# Main loop
|
||||
iteration = 0
|
||||
rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff
|
||||
error_retries = 0 # Track consecutive non-rate-limit errors
|
||||
|
||||
while True:
|
||||
iteration += 1
|
||||
@@ -250,11 +316,17 @@ async def run_autonomous_agent(
|
||||
|
||||
# Handle status
|
||||
if status == "continue":
|
||||
# Reset retry counters on success
|
||||
rate_limit_retries = 0
|
||||
error_retries = 0
|
||||
|
||||
delay_seconds = AUTO_CONTINUE_DELAY_SECONDS
|
||||
target_time_str = None
|
||||
|
||||
if "limit reached" in response.lower():
|
||||
print("Claude Agent SDK indicated limit reached.")
|
||||
# Check for rate limit indicators in response text
|
||||
response_lower = response.lower()
|
||||
if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS):
|
||||
print("Claude Agent SDK indicated rate limit reached.")
|
||||
|
||||
# Try to parse reset time from response
|
||||
match = re.search(
|
||||
@@ -326,10 +398,26 @@ async def run_autonomous_agent(
|
||||
|
||||
await asyncio.sleep(delay_seconds)
|
||||
|
||||
elif status == "rate_limit":
|
||||
# Smart rate limit handling with exponential backoff
|
||||
if response != "unknown":
|
||||
delay_seconds = int(response)
|
||||
print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...")
|
||||
else:
|
||||
# Use exponential backoff when retry-after unknown
|
||||
delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) # Max 1 hour
|
||||
rate_limit_retries += 1
|
||||
print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...")
|
||||
|
||||
await asyncio.sleep(delay_seconds)
|
||||
|
||||
elif status == "error":
|
||||
# Non-rate-limit errors: shorter backoff but still exponential
|
||||
error_retries += 1
|
||||
delay_seconds = min(30 * error_retries, 300) # Max 5 minutes
|
||||
print("\nSession encountered an error")
|
||||
print("Will retry with a fresh session...")
|
||||
await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
|
||||
print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...")
|
||||
await asyncio.sleep(delay_seconds)
|
||||
|
||||
# Small delay between sessions
|
||||
if max_iterations is None or iteration < max_iterations:
|
||||
|
||||
151
test_agent.py
Normal file
151
test_agent.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""
|
||||
Unit tests for agent.py rate limit handling functions.
|
||||
|
||||
Tests the parse_retry_after() and is_rate_limit_error() functions
|
||||
added for improved rate limit handling (Issue #41).
|
||||
"""
|
||||
|
||||
import re
|
||||
import unittest
|
||||
from typing import Optional
|
||||
|
||||
# Copy the constants and functions from agent.py for isolated testing
|
||||
# (Avoids dependency on claude_agent_sdk which may not be installed)
|
||||
|
||||
RATE_LIMIT_PATTERNS = [
|
||||
"limit reached",
|
||||
"rate limit",
|
||||
"rate_limit",
|
||||
"too many requests",
|
||||
"quota exceeded",
|
||||
"please wait",
|
||||
"try again later",
|
||||
"429",
|
||||
"overloaded",
|
||||
]
|
||||
|
||||
|
||||
def parse_retry_after(error_message: str) -> Optional[int]:
|
||||
"""
|
||||
Extract retry-after seconds from various error message formats.
|
||||
|
||||
Returns seconds to wait, or None if not parseable.
|
||||
"""
|
||||
patterns = [
|
||||
r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?",
|
||||
r"try again in\s+(\d+)\s*(?:seconds?|s\b)",
|
||||
r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, error_message, re.IGNORECASE)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def is_rate_limit_error(error_message: str) -> bool:
|
||||
"""
|
||||
Detect if an error message indicates a rate limit.
|
||||
"""
|
||||
error_lower = error_message.lower()
|
||||
return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS)
|
||||
|
||||
|
||||
class TestParseRetryAfter(unittest.TestCase):
|
||||
"""Tests for parse_retry_after() function."""
|
||||
|
||||
def test_retry_after_colon_format(self):
|
||||
"""Test 'Retry-After: 60' format."""
|
||||
assert parse_retry_after("Retry-After: 60") == 60
|
||||
assert parse_retry_after("retry-after: 120") == 120
|
||||
assert parse_retry_after("retry after: 30 seconds") == 30
|
||||
|
||||
def test_retry_after_space_format(self):
|
||||
"""Test 'retry after 60 seconds' format."""
|
||||
assert parse_retry_after("retry after 60 seconds") == 60
|
||||
assert parse_retry_after("Please retry after 120 seconds") == 120
|
||||
assert parse_retry_after("Retry after 30") == 30
|
||||
|
||||
def test_try_again_in_format(self):
|
||||
"""Test 'try again in X seconds' format."""
|
||||
assert parse_retry_after("try again in 120 seconds") == 120
|
||||
assert parse_retry_after("Please try again in 60s") == 60
|
||||
assert parse_retry_after("Try again in 30 seconds") == 30
|
||||
|
||||
def test_seconds_remaining_format(self):
|
||||
"""Test 'X seconds remaining' format."""
|
||||
assert parse_retry_after("30 seconds remaining") == 30
|
||||
assert parse_retry_after("60 seconds left") == 60
|
||||
assert parse_retry_after("120 seconds until reset") == 120
|
||||
|
||||
def test_no_match(self):
|
||||
"""Test messages that don't contain retry-after info."""
|
||||
assert parse_retry_after("no match here") is None
|
||||
assert parse_retry_after("Connection refused") is None
|
||||
assert parse_retry_after("Internal server error") is None
|
||||
assert parse_retry_after("") is None
|
||||
|
||||
def test_minutes_not_supported(self):
|
||||
"""Test that minutes are not parsed (by design)."""
|
||||
# We only support seconds to avoid complexity
|
||||
assert parse_retry_after("wait 5 minutes") is None
|
||||
assert parse_retry_after("try again in 2 minutes") is None
|
||||
|
||||
|
||||
class TestIsRateLimitError(unittest.TestCase):
|
||||
"""Tests for is_rate_limit_error() function."""
|
||||
|
||||
def test_rate_limit_patterns(self):
|
||||
"""Test various rate limit error messages."""
|
||||
assert is_rate_limit_error("Rate limit exceeded") is True
|
||||
assert is_rate_limit_error("rate_limit_exceeded") is True
|
||||
assert is_rate_limit_error("Too many requests") is True
|
||||
assert is_rate_limit_error("HTTP 429 Too Many Requests") is True
|
||||
assert is_rate_limit_error("API quota exceeded") is True
|
||||
assert is_rate_limit_error("Please wait before retrying") is True
|
||||
assert is_rate_limit_error("Try again later") is True
|
||||
assert is_rate_limit_error("Server is overloaded") is True
|
||||
assert is_rate_limit_error("Usage limit reached") is True
|
||||
|
||||
def test_case_insensitive(self):
|
||||
"""Test that detection is case-insensitive."""
|
||||
assert is_rate_limit_error("RATE LIMIT") is True
|
||||
assert is_rate_limit_error("Rate Limit") is True
|
||||
assert is_rate_limit_error("rate limit") is True
|
||||
assert is_rate_limit_error("RaTe LiMiT") is True
|
||||
|
||||
def test_non_rate_limit_errors(self):
|
||||
"""Test non-rate-limit error messages."""
|
||||
assert is_rate_limit_error("Connection refused") is False
|
||||
assert is_rate_limit_error("Authentication failed") is False
|
||||
assert is_rate_limit_error("Invalid API key") is False
|
||||
assert is_rate_limit_error("Internal server error") is False
|
||||
assert is_rate_limit_error("Network timeout") is False
|
||||
assert is_rate_limit_error("") is False
|
||||
|
||||
|
||||
class TestExponentialBackoff(unittest.TestCase):
|
||||
"""Test exponential backoff calculations."""
|
||||
|
||||
def test_backoff_sequence(self):
|
||||
"""Test that backoff follows expected sequence."""
|
||||
# Simulating: min(60 * (2 ** retries), 3600)
|
||||
expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600
|
||||
for retries, expected_delay in enumerate(expected):
|
||||
delay = min(60 * (2 ** retries), 3600)
|
||||
assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}"
|
||||
|
||||
def test_error_backoff_sequence(self):
|
||||
"""Test error backoff follows expected sequence."""
|
||||
# Simulating: min(30 * retries, 300)
|
||||
expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300
|
||||
for retries in range(1, len(expected) + 1):
|
||||
delay = min(30 * retries, 300)
|
||||
expected_delay = expected[retries - 1]
|
||||
assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user