fix: improve rate limit handling with exponential backoff

When Claude API hits rate limits via HTTP 429 exceptions (rather than response text), the agent now properly detects and handles them: - Add RATE_LIMIT_PATTERNS constant for comprehensive detection - Add parse_retry_after() to extract wait times from error messages - Add is_rate_limit_error() helper for pattern matching - Return new "rate_limit" status from exception handler - Implement exponential backoff: 60s → 120s → 240s... (max 1 hour) - Improve generic error backoff: 30s → 60s → 90s... (max 5 minutes) - Expand text-based detection patterns in response handling - Add unit tests for new functions Fixes #41 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-19 11:53:09 +00:00 · 2026-01-26 22:56:57 +01:00
parent 910ca34eac
commit bf194ad72f
2 changed files with 245 additions and 6 deletions
--- a/agent.py
+++ b/agent.py
@@ -35,6 +35,59 @@ from prompts import (
 # Configuration
 AUTO_CONTINUE_DELAY_SECONDS = 3

+# Rate limit detection patterns (used in both exception messages and response text)
+RATE_LIMIT_PATTERNS = [
+    "limit reached",
+    "rate limit",
+    "rate_limit",
+    "too many requests",
+    "quota exceeded",
+    "please wait",
+    "try again later",
+    "429",
+    "overloaded",
+]
+
+
+def parse_retry_after(error_message: str) -> Optional[int]:
+    """
+    Extract retry-after seconds from various error message formats.
+
+    Returns seconds to wait, or None if not parseable.
+    """
+    # Common patterns:
+    # "retry after 60 seconds"
+    # "Retry-After: 120"
+    # "try again in 5 seconds"
+    # "30 seconds remaining"
+
+    patterns = [
+        r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?",
+        r"try again in\s+(\d+)\s*(?:seconds?|s\b)",
+        r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
+    ]
+
+    for pattern in patterns:
+        match = re.search(pattern, error_message, re.IGNORECASE)
+        if match:
+            return int(match.group(1))
+
+    return None
+
+
+def is_rate_limit_error(error_message: str) -> bool:
+    """
+    Detect if an error message indicates a rate limit.
+
+    Args:
+        error_message: The error message to check
+
+    Returns:
+        True if the error appears to be rate-limit related
+    """
+    error_lower = error_message.lower()
+    return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS)
+

 async def run_agent_session(
    client: ClaudeSDKClient,
@@ -106,8 +159,19 @@ async def run_agent_session(
        return "continue", response_text

    except Exception as e:
-        print(f"Error during agent session: {e}")
-        return "error", str(e)
+        error_str = str(e)
+        print(f"Error during agent session: {error_str}")
+
+        # Detect rate limit errors from exception message
+        if is_rate_limit_error(error_str):
+            # Try to extract retry-after time from error
+            retry_seconds = parse_retry_after(error_str)
+            if retry_seconds:
+                return "rate_limit", str(retry_seconds)
+            else:
+                return "rate_limit", "unknown"
+
+        return "error", error_str


 async def run_autonomous_agent(
@@ -183,6 +247,8 @@ async def run_autonomous_agent(

    # Main loop
    iteration = 0
+    rate_limit_retries = 0  # Track consecutive rate limit errors for exponential backoff
+    error_retries = 0  # Track consecutive non-rate-limit errors

    while True:
        iteration += 1
@@ -250,11 +316,17 @@ async def run_autonomous_agent(

        # Handle status
        if status == "continue":
+            # Reset retry counters on success
+            rate_limit_retries = 0
+            error_retries = 0
+
            delay_seconds = AUTO_CONTINUE_DELAY_SECONDS
            target_time_str = None

-            if "limit reached" in response.lower():
-                print("Claude Agent SDK indicated limit reached.")
+            # Check for rate limit indicators in response text
+            response_lower = response.lower()
+            if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS):
+                print("Claude Agent SDK indicated rate limit reached.")

                # Try to parse reset time from response
                match = re.search(
@@ -326,10 +398,26 @@ async def run_autonomous_agent(

            await asyncio.sleep(delay_seconds)

+        elif status == "rate_limit":
+            # Smart rate limit handling with exponential backoff
+            if response != "unknown":
+                delay_seconds = int(response)
+                print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...")
+            else:
+                # Use exponential backoff when retry-after unknown
+                delay_seconds = min(60 * (2 ** rate_limit_retries), 3600)  # Max 1 hour
+                rate_limit_retries += 1
+                print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...")
+
+            await asyncio.sleep(delay_seconds)
+
        elif status == "error":
+            # Non-rate-limit errors: shorter backoff but still exponential
+            error_retries += 1
+            delay_seconds = min(30 * error_retries, 300)  # Max 5 minutes
            print("\nSession encountered an error")
-            print("Will retry with a fresh session...")
-            await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
+            print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...")
+            await asyncio.sleep(delay_seconds)

        # Small delay between sessions
        if max_iterations is None or iteration < max_iterations: