From bf194ad72fdde9a716d351c0b70bcb9644497004 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Mon, 26 Jan 2026 22:56:57 +0100 Subject: [PATCH] fix: improve rate limit handling with exponential backoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Claude API hits rate limits via HTTP 429 exceptions (rather than response text), the agent now properly detects and handles them: - Add RATE_LIMIT_PATTERNS constant for comprehensive detection - Add parse_retry_after() to extract wait times from error messages - Add is_rate_limit_error() helper for pattern matching - Return new "rate_limit" status from exception handler - Implement exponential backoff: 60s → 120s → 240s... (max 1 hour) - Improve generic error backoff: 30s → 60s → 90s... (max 5 minutes) - Expand text-based detection patterns in response handling - Add unit tests for new functions Fixes #41 Co-Authored-By: Claude Opus 4.5 --- agent.py | 100 +++++++++++++++++++++++++++++++-- test_agent.py | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+), 6 deletions(-) create mode 100644 test_agent.py diff --git a/agent.py b/agent.py index 7d90473..4ccbaa0 100644 --- a/agent.py +++ b/agent.py @@ -35,6 +35,59 @@ from prompts import ( # Configuration AUTO_CONTINUE_DELAY_SECONDS = 3 +# Rate limit detection patterns (used in both exception messages and response text) +RATE_LIMIT_PATTERNS = [ + "limit reached", + "rate limit", + "rate_limit", + "too many requests", + "quota exceeded", + "please wait", + "try again later", + "429", + "overloaded", +] + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Returns seconds to wait, or None if not parseable. + """ + # Common patterns: + # "retry after 60 seconds" + # "Retry-After: 120" + # "try again in 5 seconds" + # "30 seconds remaining" + + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + + Args: + error_message: The error message to check + + Returns: + True if the error appears to be rate-limit related + """ + error_lower = error_message.lower() + return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) + async def run_agent_session( client: ClaudeSDKClient, @@ -106,8 +159,19 @@ async def run_agent_session( return "continue", response_text except Exception as e: - print(f"Error during agent session: {e}") - return "error", str(e) + error_str = str(e) + print(f"Error during agent session: {error_str}") + + # Detect rate limit errors from exception message + if is_rate_limit_error(error_str): + # Try to extract retry-after time from error + retry_seconds = parse_retry_after(error_str) + if retry_seconds: + return "rate_limit", str(retry_seconds) + else: + return "rate_limit", "unknown" + + return "error", error_str async def run_autonomous_agent( @@ -183,6 +247,8 @@ async def run_autonomous_agent( # Main loop iteration = 0 + rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff + error_retries = 0 # Track consecutive non-rate-limit errors while True: iteration += 1 @@ -250,11 +316,17 @@ async def run_autonomous_agent( # Handle status if status == "continue": + # Reset retry counters on success + rate_limit_retries = 0 + error_retries = 0 + delay_seconds = AUTO_CONTINUE_DELAY_SECONDS target_time_str = None - if "limit reached" in response.lower(): - print("Claude Agent SDK indicated limit reached.") + # Check for rate limit indicators in response text + response_lower = response.lower() + if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS): + print("Claude Agent SDK indicated rate limit reached.") # Try to parse reset time from response match = re.search( @@ -326,10 +398,26 @@ async def run_autonomous_agent( await asyncio.sleep(delay_seconds) + elif status == "rate_limit": + # Smart rate limit handling with exponential backoff + if response != "unknown": + delay_seconds = int(response) + print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") + else: + # Use exponential backoff when retry-after unknown + delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) # Max 1 hour + rate_limit_retries += 1 + print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...") + + await asyncio.sleep(delay_seconds) + elif status == "error": + # Non-rate-limit errors: shorter backoff but still exponential + error_retries += 1 + delay_seconds = min(30 * error_retries, 300) # Max 5 minutes print("\nSession encountered an error") - print("Will retry with a fresh session...") - await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS) + print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...") + await asyncio.sleep(delay_seconds) # Small delay between sessions if max_iterations is None or iteration < max_iterations: diff --git a/test_agent.py b/test_agent.py new file mode 100644 index 0000000..bac4fd7 --- /dev/null +++ b/test_agent.py @@ -0,0 +1,151 @@ +""" +Unit tests for agent.py rate limit handling functions. + +Tests the parse_retry_after() and is_rate_limit_error() functions +added for improved rate limit handling (Issue #41). +""" + +import re +import unittest +from typing import Optional + +# Copy the constants and functions from agent.py for isolated testing +# (Avoids dependency on claude_agent_sdk which may not be installed) + +RATE_LIMIT_PATTERNS = [ + "limit reached", + "rate limit", + "rate_limit", + "too many requests", + "quota exceeded", + "please wait", + "try again later", + "429", + "overloaded", +] + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Returns seconds to wait, or None if not parseable. + """ + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + """ + error_lower = error_message.lower() + return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) + + +class TestParseRetryAfter(unittest.TestCase): + """Tests for parse_retry_after() function.""" + + def test_retry_after_colon_format(self): + """Test 'Retry-After: 60' format.""" + assert parse_retry_after("Retry-After: 60") == 60 + assert parse_retry_after("retry-after: 120") == 120 + assert parse_retry_after("retry after: 30 seconds") == 30 + + def test_retry_after_space_format(self): + """Test 'retry after 60 seconds' format.""" + assert parse_retry_after("retry after 60 seconds") == 60 + assert parse_retry_after("Please retry after 120 seconds") == 120 + assert parse_retry_after("Retry after 30") == 30 + + def test_try_again_in_format(self): + """Test 'try again in X seconds' format.""" + assert parse_retry_after("try again in 120 seconds") == 120 + assert parse_retry_after("Please try again in 60s") == 60 + assert parse_retry_after("Try again in 30 seconds") == 30 + + def test_seconds_remaining_format(self): + """Test 'X seconds remaining' format.""" + assert parse_retry_after("30 seconds remaining") == 30 + assert parse_retry_after("60 seconds left") == 60 + assert parse_retry_after("120 seconds until reset") == 120 + + def test_no_match(self): + """Test messages that don't contain retry-after info.""" + assert parse_retry_after("no match here") is None + assert parse_retry_after("Connection refused") is None + assert parse_retry_after("Internal server error") is None + assert parse_retry_after("") is None + + def test_minutes_not_supported(self): + """Test that minutes are not parsed (by design).""" + # We only support seconds to avoid complexity + assert parse_retry_after("wait 5 minutes") is None + assert parse_retry_after("try again in 2 minutes") is None + + +class TestIsRateLimitError(unittest.TestCase): + """Tests for is_rate_limit_error() function.""" + + def test_rate_limit_patterns(self): + """Test various rate limit error messages.""" + assert is_rate_limit_error("Rate limit exceeded") is True + assert is_rate_limit_error("rate_limit_exceeded") is True + assert is_rate_limit_error("Too many requests") is True + assert is_rate_limit_error("HTTP 429 Too Many Requests") is True + assert is_rate_limit_error("API quota exceeded") is True + assert is_rate_limit_error("Please wait before retrying") is True + assert is_rate_limit_error("Try again later") is True + assert is_rate_limit_error("Server is overloaded") is True + assert is_rate_limit_error("Usage limit reached") is True + + def test_case_insensitive(self): + """Test that detection is case-insensitive.""" + assert is_rate_limit_error("RATE LIMIT") is True + assert is_rate_limit_error("Rate Limit") is True + assert is_rate_limit_error("rate limit") is True + assert is_rate_limit_error("RaTe LiMiT") is True + + def test_non_rate_limit_errors(self): + """Test non-rate-limit error messages.""" + assert is_rate_limit_error("Connection refused") is False + assert is_rate_limit_error("Authentication failed") is False + assert is_rate_limit_error("Invalid API key") is False + assert is_rate_limit_error("Internal server error") is False + assert is_rate_limit_error("Network timeout") is False + assert is_rate_limit_error("") is False + + +class TestExponentialBackoff(unittest.TestCase): + """Test exponential backoff calculations.""" + + def test_backoff_sequence(self): + """Test that backoff follows expected sequence.""" + # Simulating: min(60 * (2 ** retries), 3600) + expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600 + for retries, expected_delay in enumerate(expected): + delay = min(60 * (2 ** retries), 3600) + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + def test_error_backoff_sequence(self): + """Test error backoff follows expected sequence.""" + # Simulating: min(30 * retries, 300) + expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300 + for retries in range(1, len(expected) + 1): + delay = min(30 * retries, 300) + expected_delay = expected[retries - 1] + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + +if __name__ == "__main__": + unittest.main()