Merge pull request #109 from cabana8471-arch/fix/rate-limit-handling

fix: improve rate limit handling with exponential backoff
This commit is contained in:
Leon van Zyl
2026-02-01 10:35:48 +02:00
committed by GitHub
3 changed files with 382 additions and 11 deletions

View File

@@ -23,7 +23,12 @@ if sys.platform == "win32":
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True)
from client import create_client
from progress import count_passing_tests, has_features, print_progress_summary, print_session_header
from progress import (
count_passing_tests,
has_features,
print_progress_summary,
print_session_header,
)
from prompts import (
copy_spec_to_project,
get_coding_prompt,
@@ -31,6 +36,13 @@ from prompts import (
get_single_feature_prompt,
get_testing_prompt,
)
from rate_limit_utils import (
calculate_error_backoff,
calculate_rate_limit_backoff,
clamp_retry_delay,
is_rate_limit_error,
parse_retry_after,
)
# Configuration
AUTO_CONTINUE_DELAY_SECONDS = 3
@@ -106,8 +118,19 @@ async def run_agent_session(
return "continue", response_text
except Exception as e:
print(f"Error during agent session: {e}")
return "error", str(e)
error_str = str(e)
print(f"Error during agent session: {error_str}")
# Detect rate limit errors from exception message
if is_rate_limit_error(error_str):
# Try to extract retry-after time from error
retry_seconds = parse_retry_after(error_str)
if retry_seconds is not None:
return "rate_limit", str(retry_seconds)
else:
return "rate_limit", "unknown"
return "error", error_str
async def run_autonomous_agent(
@@ -183,6 +206,8 @@ async def run_autonomous_agent(
# Main loop
iteration = 0
rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff
error_retries = 0 # Track consecutive non-rate-limit errors
while True:
iteration += 1
@@ -250,13 +275,28 @@ async def run_autonomous_agent(
# Handle status
if status == "continue":
# Reset error retries on success; rate-limit retries reset only if no signal
error_retries = 0
reset_rate_limit_retries = True
delay_seconds = AUTO_CONTINUE_DELAY_SECONDS
target_time_str = None
if "limit reached" in response.lower():
print("Claude Agent SDK indicated limit reached.")
# Check for rate limit indicators in response text
if is_rate_limit_error(response):
print("Claude Agent SDK indicated rate limit reached.")
reset_rate_limit_retries = False
# Try to parse reset time from response
# Try to extract retry-after from response text first
retry_seconds = parse_retry_after(response)
if retry_seconds is not None:
delay_seconds = clamp_retry_delay(retry_seconds)
else:
# Use exponential backoff when retry-after unknown
delay_seconds = calculate_rate_limit_backoff(rate_limit_retries)
rate_limit_retries += 1
# Try to parse reset time from response (more specific format)
match = re.search(
r"(?i)\bresets(?:\s+at)?\s+(\d+)(?::(\d+))?\s*(am|pm)\s*\(([^)]+)\)",
response,
@@ -285,9 +325,7 @@ async def run_autonomous_agent(
target += timedelta(days=1)
delta = target - now
delay_seconds = min(
delta.total_seconds(), 24 * 60 * 60
) # Clamp to 24 hours max
delay_seconds = clamp_retry_delay(int(delta.total_seconds()))
target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z")
except Exception as e:
@@ -324,12 +362,41 @@ async def run_autonomous_agent(
print(f"\nSingle-feature mode: Feature #{feature_id} session complete.")
break
# Reset rate limit retries only if no rate limit signal was detected
if reset_rate_limit_retries:
rate_limit_retries = 0
await asyncio.sleep(delay_seconds)
elif status == "rate_limit":
# Smart rate limit handling with exponential backoff
# Reset error counter so mixed events don't inflate delays
error_retries = 0
if response != "unknown":
try:
delay_seconds = clamp_retry_delay(int(response))
except (ValueError, TypeError):
# Malformed value - fall through to exponential backoff
response = "unknown"
if response == "unknown":
# Use exponential backoff when retry-after unknown or malformed
delay_seconds = calculate_rate_limit_backoff(rate_limit_retries)
rate_limit_retries += 1
print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...")
else:
print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...")
await asyncio.sleep(delay_seconds)
elif status == "error":
# Non-rate-limit errors: linear backoff capped at 5 minutes
# Reset rate limit counter so mixed events don't inflate delays
rate_limit_retries = 0
error_retries += 1
delay_seconds = calculate_error_backoff(error_retries)
print("\nSession encountered an error")
print("Will retry with a fresh session...")
await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS)
print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...")
await asyncio.sleep(delay_seconds)
# Small delay between sessions
if max_iterations is None or iteration < max_iterations:

124
rate_limit_utils.py Normal file
View File

@@ -0,0 +1,124 @@
"""
Rate Limit Utilities
====================
Shared utilities for detecting and handling API rate limits.
Used by both agent.py (production) and test_rate_limit_utils.py (tests).
"""
import re
from typing import Optional
# Regex patterns for rate limit detection (used in both exception messages and response text)
# These patterns use word boundaries to avoid false positives like "PR #429" or "please wait while I..."
RATE_LIMIT_REGEX_PATTERNS = [
r"\brate[_\s]?limit", # "rate limit", "rate_limit", "ratelimit"
r"\btoo\s+many\s+requests", # "too many requests"
r"\bhttp\s*429\b", # "http 429", "http429"
r"\bstatus\s*429\b", # "status 429", "status429"
r"\berror\s*429\b", # "error 429", "error429"
r"\b429\s+too\s+many", # "429 too many"
r"\boverloaded\b", # "overloaded"
r"\bquota\s*exceeded\b", # "quota exceeded"
]
# Compiled regex for efficient matching
_RATE_LIMIT_REGEX = re.compile(
"|".join(RATE_LIMIT_REGEX_PATTERNS),
re.IGNORECASE
)
def parse_retry_after(error_message: str) -> Optional[int]:
"""
Extract retry-after seconds from various error message formats.
Handles common formats:
- "Retry-After: 60"
- "retry after 60 seconds"
- "try again in 5 seconds"
- "30 seconds remaining"
Args:
error_message: The error message to parse
Returns:
Seconds to wait, or None if not parseable.
"""
# Patterns require explicit "seconds" or "s" unit, OR no unit at all (end of string/sentence)
# This prevents matching "30 minutes" or "1 hour" since those have non-seconds units
patterns = [
r"retry.?after[:\s]+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit
r"retry.?after[:\s]+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence
r"try again in\s+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit
r"try again in\s+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence
r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
]
for pattern in patterns:
match = re.search(pattern, error_message, re.IGNORECASE)
if match:
return int(match.group(1))
return None
def is_rate_limit_error(error_message: str) -> bool:
"""
Detect if an error message indicates a rate limit.
Uses regex patterns with word boundaries to avoid false positives
like "PR #429", "please wait while I...", or "Node v14.29.0".
Args:
error_message: The error message to check
Returns:
True if the message indicates a rate limit, False otherwise.
"""
return bool(_RATE_LIMIT_REGEX.search(error_message))
def calculate_rate_limit_backoff(retries: int) -> int:
"""
Calculate exponential backoff for rate limits.
Formula: min(60 * 2^retries, 3600) - caps at 1 hour
Sequence: 60s, 120s, 240s, 480s, 960s, 1920s, 3600s...
Args:
retries: Number of consecutive rate limit retries (0-indexed)
Returns:
Delay in seconds (clamped to 1-3600 range)
"""
return int(min(max(60 * (2 ** retries), 1), 3600))
def calculate_error_backoff(retries: int) -> int:
"""
Calculate linear backoff for non-rate-limit errors.
Formula: min(30 * retries, 300) - caps at 5 minutes
Sequence: 30s, 60s, 90s, 120s, ... 300s
Args:
retries: Number of consecutive error retries (1-indexed)
Returns:
Delay in seconds (clamped to 1-300 range)
"""
return min(max(30 * retries, 1), 300)
def clamp_retry_delay(delay_seconds: int) -> int:
"""
Clamp a retry delay to a safe range (1-3600 seconds).
Args:
delay_seconds: The raw delay value
Returns:
Delay clamped to 1-3600 seconds
"""
return min(max(delay_seconds, 1), 3600)

180
test_rate_limit_utils.py Normal file
View File

@@ -0,0 +1,180 @@
"""
Unit tests for rate limit handling functions.
Tests the parse_retry_after(), is_rate_limit_error(), and backoff calculation
functions from rate_limit_utils.py (shared module).
"""
import unittest
from rate_limit_utils import (
calculate_error_backoff,
calculate_rate_limit_backoff,
clamp_retry_delay,
is_rate_limit_error,
parse_retry_after,
)
class TestParseRetryAfter(unittest.TestCase):
"""Tests for parse_retry_after() function."""
def test_retry_after_colon_format(self):
"""Test 'Retry-After: 60' format."""
assert parse_retry_after("Retry-After: 60") == 60
assert parse_retry_after("retry-after: 120") == 120
assert parse_retry_after("retry after: 30 seconds") == 30
def test_retry_after_space_format(self):
"""Test 'retry after 60 seconds' format."""
assert parse_retry_after("retry after 60 seconds") == 60
assert parse_retry_after("Please retry after 120 seconds") == 120
assert parse_retry_after("Retry after 30") == 30
def test_try_again_in_format(self):
"""Test 'try again in X seconds' format."""
assert parse_retry_after("try again in 120 seconds") == 120
assert parse_retry_after("Please try again in 60s") == 60
assert parse_retry_after("Try again in 30 seconds") == 30
def test_seconds_remaining_format(self):
"""Test 'X seconds remaining' format."""
assert parse_retry_after("30 seconds remaining") == 30
assert parse_retry_after("60 seconds left") == 60
assert parse_retry_after("120 seconds until reset") == 120
def test_no_match(self):
"""Test messages that don't contain retry-after info."""
assert parse_retry_after("no match here") is None
assert parse_retry_after("Connection refused") is None
assert parse_retry_after("Internal server error") is None
assert parse_retry_after("") is None
def test_minutes_not_supported(self):
"""Test that minutes are not parsed (by design)."""
# We only support seconds to avoid complexity
# These patterns should NOT match when followed by minute/hour units
assert parse_retry_after("wait 5 minutes") is None
assert parse_retry_after("try again in 2 minutes") is None
assert parse_retry_after("retry after 5 minutes") is None
assert parse_retry_after("retry after 1 hour") is None
assert parse_retry_after("try again in 30 min") is None
class TestIsRateLimitError(unittest.TestCase):
"""Tests for is_rate_limit_error() function."""
def test_rate_limit_patterns(self):
"""Test various rate limit error messages."""
assert is_rate_limit_error("Rate limit exceeded") is True
assert is_rate_limit_error("rate_limit_exceeded") is True
assert is_rate_limit_error("Too many requests") is True
assert is_rate_limit_error("HTTP 429 Too Many Requests") is True
assert is_rate_limit_error("API quota exceeded") is True
assert is_rate_limit_error("Server is overloaded") is True
def test_specific_429_patterns(self):
"""Test that 429 is detected with proper context."""
assert is_rate_limit_error("http 429") is True
assert is_rate_limit_error("HTTP429") is True
assert is_rate_limit_error("status 429") is True
assert is_rate_limit_error("error 429") is True
assert is_rate_limit_error("429 too many requests") is True
def test_case_insensitive(self):
"""Test that detection is case-insensitive."""
assert is_rate_limit_error("RATE LIMIT") is True
assert is_rate_limit_error("Rate Limit") is True
assert is_rate_limit_error("rate limit") is True
assert is_rate_limit_error("RaTe LiMiT") is True
def test_non_rate_limit_errors(self):
"""Test non-rate-limit error messages."""
assert is_rate_limit_error("Connection refused") is False
assert is_rate_limit_error("Authentication failed") is False
assert is_rate_limit_error("Invalid API key") is False
assert is_rate_limit_error("Internal server error") is False
assert is_rate_limit_error("Network timeout") is False
assert is_rate_limit_error("") is False
class TestFalsePositives(unittest.TestCase):
"""Verify non-rate-limit messages don't trigger detection."""
def test_version_numbers_with_429(self):
"""Version numbers should not trigger."""
assert is_rate_limit_error("Node v14.29.0") is False
assert is_rate_limit_error("Python 3.12.429") is False
assert is_rate_limit_error("Version 2.429 released") is False
def test_issue_and_pr_numbers(self):
"""Issue/PR numbers should not trigger."""
assert is_rate_limit_error("See PR #429") is False
assert is_rate_limit_error("Fixed in issue 429") is False
assert is_rate_limit_error("Closes #429") is False
def test_line_numbers(self):
"""Line numbers in errors should not trigger."""
assert is_rate_limit_error("Error at line 429") is False
assert is_rate_limit_error("See file.py:429") is False
def test_port_numbers(self):
"""Port numbers should not trigger."""
assert is_rate_limit_error("port 4293") is False
assert is_rate_limit_error("localhost:4290") is False
def test_legitimate_wait_messages(self):
"""Legitimate wait instructions should not trigger."""
# These would fail if "please wait" pattern still exists
assert is_rate_limit_error("Please wait for the build to complete") is False
assert is_rate_limit_error("Please wait while I analyze this") is False
def test_retry_discussion_messages(self):
"""Messages discussing retry logic should not trigger."""
# These would fail if "try again later" pattern still exists
assert is_rate_limit_error("Try again later after maintenance") is False
assert is_rate_limit_error("The user should try again later") is False
def test_limit_discussion_messages(self):
"""Messages discussing limits should not trigger (removed pattern)."""
# These would fail if "limit reached" pattern still exists
assert is_rate_limit_error("File size limit reached") is False
assert is_rate_limit_error("Memory limit reached, consider optimization") is False
class TestBackoffFunctions(unittest.TestCase):
"""Test backoff calculation functions from rate_limit_utils."""
def test_rate_limit_backoff_sequence(self):
"""Test that rate limit backoff follows expected exponential sequence."""
expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600
for retries, expected_delay in enumerate(expected):
delay = calculate_rate_limit_backoff(retries)
assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}"
def test_error_backoff_sequence(self):
"""Test that error backoff follows expected linear sequence."""
expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300
for retries in range(1, len(expected) + 1):
delay = calculate_error_backoff(retries)
expected_delay = expected[retries - 1]
assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}"
def test_clamp_retry_delay(self):
"""Test that retry delay is clamped to valid range."""
# Values within range stay the same
assert clamp_retry_delay(60) == 60
assert clamp_retry_delay(1800) == 1800
assert clamp_retry_delay(3600) == 3600
# Values below minimum get clamped to 1
assert clamp_retry_delay(0) == 1
assert clamp_retry_delay(-10) == 1
# Values above maximum get clamped to 3600
assert clamp_retry_delay(7200) == 3600
assert clamp_retry_delay(86400) == 3600
if __name__ == "__main__":
unittest.main()