fix: improve rate limit handling with exponential backoff

When Claude API hits rate limits via HTTP 429 exceptions (rather than
response text), the agent now properly detects and handles them:

- Add RATE_LIMIT_PATTERNS constant for comprehensive detection
- Add parse_retry_after() to extract wait times from error messages
- Add is_rate_limit_error() helper for pattern matching
- Return new "rate_limit" status from exception handler
- Implement exponential backoff: 60s → 120s → 240s... (max 1 hour)
- Improve generic error backoff: 30s → 60s → 90s... (max 5 minutes)
- Expand text-based detection patterns in response handling
- Add unit tests for new functions

Fixes #41

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
cabana8471
2026-01-26 22:56:57 +01:00
parent 910ca34eac
commit bf194ad72f
2 changed files with 245 additions and 6 deletions

100
agent.py
View File

@@ -35,6 +35,59 @@ from prompts import (
# Configuration
AUTO_CONTINUE_DELAY_SECONDS = 3
# Rate limit detection patterns (used in both exception messages and response text)
RATE_LIMIT_PATTERNS = [
"limit reached",
"rate limit",
"rate_limit",
"too many requests",
"quota exceeded",
"please wait",
"try again later",
"429",
"overloaded",
]
def parse_retry_after(error_message: str) -> Optional[int]:
"""
Extract retry-after seconds from various error message formats.
Returns seconds to wait, or None if not parseable.
"""
# Common patterns:
# "retry after 60 seconds"
# "Retry-After: 120"
# "try again in 5 seconds"
# "30 seconds remaining"
patterns = [
r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?",
r"try again in\s+(\d+)\s*(?:seconds?|s\b)",
r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
]
for pattern in patterns:
match = re.search(pattern, error_message, re.IGNORECASE)
if match:
return int(match.group(1))
return None
def is_rate_limit_error(error_message: str) -> bool:
"""
Detect if an error message indicates a rate limit.
Args:
error_message: The error message to check
Returns:
True if the error appears to be rate-limit related
"""
error_lower = error_message.lower()
return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS)
async def run_agent_session(
client: ClaudeSDKClient,
@@ -106,8 +159,19 @@ async def run_agent_session(
return "continue", response_text
except Exception as e:
print(f"Error during agent session: {e}")
return "error", str(e)
error_str = str(e)
print(f"Error during agent session: {error_str}")
# Detect rate limit errors from exception message
if is_rate_limit_error(error_str):
# Try to extract retry-after time from error
retry_seconds = parse_retry_after(error_str)
if retry_seconds:
return "rate_limit", str(retry_seconds)
else:
return "rate_limit", "unknown"
return "error", error_str
async def run_autonomous_agent(
@@ -183,6 +247,8 @@ async def run_autonomous_agent(
# Main loop
iteration = 0
rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff
error_retries = 0 # Track consecutive non-rate-limit errors
while True:
iteration += 1
@@ -250,11 +316,17 @@ async def run_autonomous_agent(
# Handle status
if status == "continue":
# Reset retry counters on success
rate_limit_retries = 0
error_retries = 0
delay_seconds = AUTO_CONTINUE_DELAY_SECONDS
target_time_str = None
if "limit reached" in response.lower():
print("Claude Agent SDK indicated limit reached.")
# Check for rate limit indicators in response text
response_lower = response.lower()
if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS):
print("Claude Agent SDK indicated rate limit reached.")
# Try to parse reset time from response
match = re.search(
@@ -326,10 +398,26 @@ async def run_autonomous_agent(
await asyncio.sleep(delay_seconds)
elif status == "rate_limit":
# Smart rate limit handling with exponential backoff
if response != "unknown":
delay_seconds = int(response)
print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...")
else:
# Use exponential backoff when retry-after unknown
delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) # Max 1 hour
rate_limit_retries += 1
print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...")
await asyncio.sleep(delay_seconds)
elif status == "error":
# Non-rate-limit errors: shorter backoff but still exponential
error_retries += 1
delay_seconds = min(30 * error_retries, 300) # Max 5 minutes
print("\nSession encountered an error")
print("Will retry with a fresh session...")
await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...")
await asyncio.sleep(delay_seconds)
# Small delay between sessions
if max_iterations is None or iteration < max_iterations:

151
test_agent.py Normal file
View File

@@ -0,0 +1,151 @@
"""
Unit tests for agent.py rate limit handling functions.
Tests the parse_retry_after() and is_rate_limit_error() functions
added for improved rate limit handling (Issue #41).
"""
import re
import unittest
from typing import Optional
# Copy the constants and functions from agent.py for isolated testing
# (Avoids dependency on claude_agent_sdk which may not be installed)
RATE_LIMIT_PATTERNS = [
"limit reached",
"rate limit",
"rate_limit",
"too many requests",
"quota exceeded",
"please wait",
"try again later",
"429",
"overloaded",
]
def parse_retry_after(error_message: str) -> Optional[int]:
"""
Extract retry-after seconds from various error message formats.
Returns seconds to wait, or None if not parseable.
"""
patterns = [
r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?",
r"try again in\s+(\d+)\s*(?:seconds?|s\b)",
r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
]
for pattern in patterns:
match = re.search(pattern, error_message, re.IGNORECASE)
if match:
return int(match.group(1))
return None
def is_rate_limit_error(error_message: str) -> bool:
"""
Detect if an error message indicates a rate limit.
"""
error_lower = error_message.lower()
return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS)
class TestParseRetryAfter(unittest.TestCase):
"""Tests for parse_retry_after() function."""
def test_retry_after_colon_format(self):
"""Test 'Retry-After: 60' format."""
assert parse_retry_after("Retry-After: 60") == 60
assert parse_retry_after("retry-after: 120") == 120
assert parse_retry_after("retry after: 30 seconds") == 30
def test_retry_after_space_format(self):
"""Test 'retry after 60 seconds' format."""
assert parse_retry_after("retry after 60 seconds") == 60
assert parse_retry_after("Please retry after 120 seconds") == 120
assert parse_retry_after("Retry after 30") == 30
def test_try_again_in_format(self):
"""Test 'try again in X seconds' format."""
assert parse_retry_after("try again in 120 seconds") == 120
assert parse_retry_after("Please try again in 60s") == 60
assert parse_retry_after("Try again in 30 seconds") == 30
def test_seconds_remaining_format(self):
"""Test 'X seconds remaining' format."""
assert parse_retry_after("30 seconds remaining") == 30
assert parse_retry_after("60 seconds left") == 60
assert parse_retry_after("120 seconds until reset") == 120
def test_no_match(self):
"""Test messages that don't contain retry-after info."""
assert parse_retry_after("no match here") is None
assert parse_retry_after("Connection refused") is None
assert parse_retry_after("Internal server error") is None
assert parse_retry_after("") is None
def test_minutes_not_supported(self):
"""Test that minutes are not parsed (by design)."""
# We only support seconds to avoid complexity
assert parse_retry_after("wait 5 minutes") is None
assert parse_retry_after("try again in 2 minutes") is None
class TestIsRateLimitError(unittest.TestCase):
"""Tests for is_rate_limit_error() function."""
def test_rate_limit_patterns(self):
"""Test various rate limit error messages."""
assert is_rate_limit_error("Rate limit exceeded") is True
assert is_rate_limit_error("rate_limit_exceeded") is True
assert is_rate_limit_error("Too many requests") is True
assert is_rate_limit_error("HTTP 429 Too Many Requests") is True
assert is_rate_limit_error("API quota exceeded") is True
assert is_rate_limit_error("Please wait before retrying") is True
assert is_rate_limit_error("Try again later") is True
assert is_rate_limit_error("Server is overloaded") is True
assert is_rate_limit_error("Usage limit reached") is True
def test_case_insensitive(self):
"""Test that detection is case-insensitive."""
assert is_rate_limit_error("RATE LIMIT") is True
assert is_rate_limit_error("Rate Limit") is True
assert is_rate_limit_error("rate limit") is True
assert is_rate_limit_error("RaTe LiMiT") is True
def test_non_rate_limit_errors(self):
"""Test non-rate-limit error messages."""
assert is_rate_limit_error("Connection refused") is False
assert is_rate_limit_error("Authentication failed") is False
assert is_rate_limit_error("Invalid API key") is False
assert is_rate_limit_error("Internal server error") is False
assert is_rate_limit_error("Network timeout") is False
assert is_rate_limit_error("") is False
class TestExponentialBackoff(unittest.TestCase):
"""Test exponential backoff calculations."""
def test_backoff_sequence(self):
"""Test that backoff follows expected sequence."""
# Simulating: min(60 * (2 ** retries), 3600)
expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600
for retries, expected_delay in enumerate(expected):
delay = min(60 * (2 ** retries), 3600)
assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}"
def test_error_backoff_sequence(self):
"""Test error backoff follows expected sequence."""
# Simulating: min(30 * retries, 300)
expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300
for retries in range(1, len(expected) + 1):
delay = min(30 * retries, 300)
expected_delay = expected[retries - 1]
assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}"
if __name__ == "__main__":
unittest.main()