diff --git a/agent.py b/agent.py index 7d90473..9bd2c3b 100644 --- a/agent.py +++ b/agent.py @@ -23,7 +23,12 @@ if sys.platform == "win32": sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True) from client import create_client -from progress import count_passing_tests, has_features, print_progress_summary, print_session_header +from progress import ( + count_passing_tests, + has_features, + print_progress_summary, + print_session_header, +) from prompts import ( copy_spec_to_project, get_coding_prompt, @@ -31,6 +36,13 @@ from prompts import ( get_single_feature_prompt, get_testing_prompt, ) +from rate_limit_utils import ( + calculate_error_backoff, + calculate_rate_limit_backoff, + clamp_retry_delay, + is_rate_limit_error, + parse_retry_after, +) # Configuration AUTO_CONTINUE_DELAY_SECONDS = 3 @@ -106,8 +118,19 @@ async def run_agent_session( return "continue", response_text except Exception as e: - print(f"Error during agent session: {e}") - return "error", str(e) + error_str = str(e) + print(f"Error during agent session: {error_str}") + + # Detect rate limit errors from exception message + if is_rate_limit_error(error_str): + # Try to extract retry-after time from error + retry_seconds = parse_retry_after(error_str) + if retry_seconds is not None: + return "rate_limit", str(retry_seconds) + else: + return "rate_limit", "unknown" + + return "error", error_str async def run_autonomous_agent( @@ -183,6 +206,8 @@ async def run_autonomous_agent( # Main loop iteration = 0 + rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff + error_retries = 0 # Track consecutive non-rate-limit errors while True: iteration += 1 @@ -250,13 +275,28 @@ async def run_autonomous_agent( # Handle status if status == "continue": + # Reset error retries on success; rate-limit retries reset only if no signal + error_retries = 0 + reset_rate_limit_retries = True + delay_seconds = AUTO_CONTINUE_DELAY_SECONDS target_time_str = None - if "limit reached" in response.lower(): - print("Claude Agent SDK indicated limit reached.") + # Check for rate limit indicators in response text + if is_rate_limit_error(response): + print("Claude Agent SDK indicated rate limit reached.") + reset_rate_limit_retries = False - # Try to parse reset time from response + # Try to extract retry-after from response text first + retry_seconds = parse_retry_after(response) + if retry_seconds is not None: + delay_seconds = clamp_retry_delay(retry_seconds) + else: + # Use exponential backoff when retry-after unknown + delay_seconds = calculate_rate_limit_backoff(rate_limit_retries) + rate_limit_retries += 1 + + # Try to parse reset time from response (more specific format) match = re.search( r"(?i)\bresets(?:\s+at)?\s+(\d+)(?::(\d+))?\s*(am|pm)\s*\(([^)]+)\)", response, @@ -285,9 +325,7 @@ async def run_autonomous_agent( target += timedelta(days=1) delta = target - now - delay_seconds = min( - delta.total_seconds(), 24 * 60 * 60 - ) # Clamp to 24 hours max + delay_seconds = clamp_retry_delay(int(delta.total_seconds())) target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z") except Exception as e: @@ -324,12 +362,41 @@ async def run_autonomous_agent( print(f"\nSingle-feature mode: Feature #{feature_id} session complete.") break + # Reset rate limit retries only if no rate limit signal was detected + if reset_rate_limit_retries: + rate_limit_retries = 0 + + await asyncio.sleep(delay_seconds) + + elif status == "rate_limit": + # Smart rate limit handling with exponential backoff + # Reset error counter so mixed events don't inflate delays + error_retries = 0 + if response != "unknown": + try: + delay_seconds = clamp_retry_delay(int(response)) + except (ValueError, TypeError): + # Malformed value - fall through to exponential backoff + response = "unknown" + if response == "unknown": + # Use exponential backoff when retry-after unknown or malformed + delay_seconds = calculate_rate_limit_backoff(rate_limit_retries) + rate_limit_retries += 1 + print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...") + else: + print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") + await asyncio.sleep(delay_seconds) elif status == "error": + # Non-rate-limit errors: linear backoff capped at 5 minutes + # Reset rate limit counter so mixed events don't inflate delays + rate_limit_retries = 0 + error_retries += 1 + delay_seconds = calculate_error_backoff(error_retries) print("\nSession encountered an error") - print("Will retry with a fresh session...") - await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS) + print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...") + await asyncio.sleep(delay_seconds) # Small delay between sessions if max_iterations is None or iteration < max_iterations: diff --git a/rate_limit_utils.py b/rate_limit_utils.py new file mode 100644 index 0000000..9bdbb22 --- /dev/null +++ b/rate_limit_utils.py @@ -0,0 +1,124 @@ +""" +Rate Limit Utilities +==================== + +Shared utilities for detecting and handling API rate limits. +Used by both agent.py (production) and test_rate_limit_utils.py (tests). +""" + +import re +from typing import Optional + +# Regex patterns for rate limit detection (used in both exception messages and response text) +# These patterns use word boundaries to avoid false positives like "PR #429" or "please wait while I..." +RATE_LIMIT_REGEX_PATTERNS = [ + r"\brate[_\s]?limit", # "rate limit", "rate_limit", "ratelimit" + r"\btoo\s+many\s+requests", # "too many requests" + r"\bhttp\s*429\b", # "http 429", "http429" + r"\bstatus\s*429\b", # "status 429", "status429" + r"\berror\s*429\b", # "error 429", "error429" + r"\b429\s+too\s+many", # "429 too many" + r"\boverloaded\b", # "overloaded" + r"\bquota\s*exceeded\b", # "quota exceeded" +] + +# Compiled regex for efficient matching +_RATE_LIMIT_REGEX = re.compile( + "|".join(RATE_LIMIT_REGEX_PATTERNS), + re.IGNORECASE +) + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Handles common formats: + - "Retry-After: 60" + - "retry after 60 seconds" + - "try again in 5 seconds" + - "30 seconds remaining" + + Args: + error_message: The error message to parse + + Returns: + Seconds to wait, or None if not parseable. + """ + # Patterns require explicit "seconds" or "s" unit, OR no unit at all (end of string/sentence) + # This prevents matching "30 minutes" or "1 hour" since those have non-seconds units + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit + r"retry.?after[:\s]+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit + r"try again in\s+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + + Uses regex patterns with word boundaries to avoid false positives + like "PR #429", "please wait while I...", or "Node v14.29.0". + + Args: + error_message: The error message to check + + Returns: + True if the message indicates a rate limit, False otherwise. + """ + return bool(_RATE_LIMIT_REGEX.search(error_message)) + + +def calculate_rate_limit_backoff(retries: int) -> int: + """ + Calculate exponential backoff for rate limits. + + Formula: min(60 * 2^retries, 3600) - caps at 1 hour + Sequence: 60s, 120s, 240s, 480s, 960s, 1920s, 3600s... + + Args: + retries: Number of consecutive rate limit retries (0-indexed) + + Returns: + Delay in seconds (clamped to 1-3600 range) + """ + return int(min(max(60 * (2 ** retries), 1), 3600)) + + +def calculate_error_backoff(retries: int) -> int: + """ + Calculate linear backoff for non-rate-limit errors. + + Formula: min(30 * retries, 300) - caps at 5 minutes + Sequence: 30s, 60s, 90s, 120s, ... 300s + + Args: + retries: Number of consecutive error retries (1-indexed) + + Returns: + Delay in seconds (clamped to 1-300 range) + """ + return min(max(30 * retries, 1), 300) + + +def clamp_retry_delay(delay_seconds: int) -> int: + """ + Clamp a retry delay to a safe range (1-3600 seconds). + + Args: + delay_seconds: The raw delay value + + Returns: + Delay clamped to 1-3600 seconds + """ + return min(max(delay_seconds, 1), 3600) diff --git a/test_rate_limit_utils.py b/test_rate_limit_utils.py new file mode 100644 index 0000000..55ecaa7 --- /dev/null +++ b/test_rate_limit_utils.py @@ -0,0 +1,180 @@ +""" +Unit tests for rate limit handling functions. + +Tests the parse_retry_after(), is_rate_limit_error(), and backoff calculation +functions from rate_limit_utils.py (shared module). +""" + +import unittest + +from rate_limit_utils import ( + calculate_error_backoff, + calculate_rate_limit_backoff, + clamp_retry_delay, + is_rate_limit_error, + parse_retry_after, +) + + +class TestParseRetryAfter(unittest.TestCase): + """Tests for parse_retry_after() function.""" + + def test_retry_after_colon_format(self): + """Test 'Retry-After: 60' format.""" + assert parse_retry_after("Retry-After: 60") == 60 + assert parse_retry_after("retry-after: 120") == 120 + assert parse_retry_after("retry after: 30 seconds") == 30 + + def test_retry_after_space_format(self): + """Test 'retry after 60 seconds' format.""" + assert parse_retry_after("retry after 60 seconds") == 60 + assert parse_retry_after("Please retry after 120 seconds") == 120 + assert parse_retry_after("Retry after 30") == 30 + + def test_try_again_in_format(self): + """Test 'try again in X seconds' format.""" + assert parse_retry_after("try again in 120 seconds") == 120 + assert parse_retry_after("Please try again in 60s") == 60 + assert parse_retry_after("Try again in 30 seconds") == 30 + + def test_seconds_remaining_format(self): + """Test 'X seconds remaining' format.""" + assert parse_retry_after("30 seconds remaining") == 30 + assert parse_retry_after("60 seconds left") == 60 + assert parse_retry_after("120 seconds until reset") == 120 + + def test_no_match(self): + """Test messages that don't contain retry-after info.""" + assert parse_retry_after("no match here") is None + assert parse_retry_after("Connection refused") is None + assert parse_retry_after("Internal server error") is None + assert parse_retry_after("") is None + + def test_minutes_not_supported(self): + """Test that minutes are not parsed (by design).""" + # We only support seconds to avoid complexity + # These patterns should NOT match when followed by minute/hour units + assert parse_retry_after("wait 5 minutes") is None + assert parse_retry_after("try again in 2 minutes") is None + assert parse_retry_after("retry after 5 minutes") is None + assert parse_retry_after("retry after 1 hour") is None + assert parse_retry_after("try again in 30 min") is None + + +class TestIsRateLimitError(unittest.TestCase): + """Tests for is_rate_limit_error() function.""" + + def test_rate_limit_patterns(self): + """Test various rate limit error messages.""" + assert is_rate_limit_error("Rate limit exceeded") is True + assert is_rate_limit_error("rate_limit_exceeded") is True + assert is_rate_limit_error("Too many requests") is True + assert is_rate_limit_error("HTTP 429 Too Many Requests") is True + assert is_rate_limit_error("API quota exceeded") is True + assert is_rate_limit_error("Server is overloaded") is True + + def test_specific_429_patterns(self): + """Test that 429 is detected with proper context.""" + assert is_rate_limit_error("http 429") is True + assert is_rate_limit_error("HTTP429") is True + assert is_rate_limit_error("status 429") is True + assert is_rate_limit_error("error 429") is True + assert is_rate_limit_error("429 too many requests") is True + + def test_case_insensitive(self): + """Test that detection is case-insensitive.""" + assert is_rate_limit_error("RATE LIMIT") is True + assert is_rate_limit_error("Rate Limit") is True + assert is_rate_limit_error("rate limit") is True + assert is_rate_limit_error("RaTe LiMiT") is True + + def test_non_rate_limit_errors(self): + """Test non-rate-limit error messages.""" + assert is_rate_limit_error("Connection refused") is False + assert is_rate_limit_error("Authentication failed") is False + assert is_rate_limit_error("Invalid API key") is False + assert is_rate_limit_error("Internal server error") is False + assert is_rate_limit_error("Network timeout") is False + assert is_rate_limit_error("") is False + + +class TestFalsePositives(unittest.TestCase): + """Verify non-rate-limit messages don't trigger detection.""" + + def test_version_numbers_with_429(self): + """Version numbers should not trigger.""" + assert is_rate_limit_error("Node v14.29.0") is False + assert is_rate_limit_error("Python 3.12.429") is False + assert is_rate_limit_error("Version 2.429 released") is False + + def test_issue_and_pr_numbers(self): + """Issue/PR numbers should not trigger.""" + assert is_rate_limit_error("See PR #429") is False + assert is_rate_limit_error("Fixed in issue 429") is False + assert is_rate_limit_error("Closes #429") is False + + def test_line_numbers(self): + """Line numbers in errors should not trigger.""" + assert is_rate_limit_error("Error at line 429") is False + assert is_rate_limit_error("See file.py:429") is False + + def test_port_numbers(self): + """Port numbers should not trigger.""" + assert is_rate_limit_error("port 4293") is False + assert is_rate_limit_error("localhost:4290") is False + + def test_legitimate_wait_messages(self): + """Legitimate wait instructions should not trigger.""" + # These would fail if "please wait" pattern still exists + assert is_rate_limit_error("Please wait for the build to complete") is False + assert is_rate_limit_error("Please wait while I analyze this") is False + + def test_retry_discussion_messages(self): + """Messages discussing retry logic should not trigger.""" + # These would fail if "try again later" pattern still exists + assert is_rate_limit_error("Try again later after maintenance") is False + assert is_rate_limit_error("The user should try again later") is False + + def test_limit_discussion_messages(self): + """Messages discussing limits should not trigger (removed pattern).""" + # These would fail if "limit reached" pattern still exists + assert is_rate_limit_error("File size limit reached") is False + assert is_rate_limit_error("Memory limit reached, consider optimization") is False + + +class TestBackoffFunctions(unittest.TestCase): + """Test backoff calculation functions from rate_limit_utils.""" + + def test_rate_limit_backoff_sequence(self): + """Test that rate limit backoff follows expected exponential sequence.""" + expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600 + for retries, expected_delay in enumerate(expected): + delay = calculate_rate_limit_backoff(retries) + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + def test_error_backoff_sequence(self): + """Test that error backoff follows expected linear sequence.""" + expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300 + for retries in range(1, len(expected) + 1): + delay = calculate_error_backoff(retries) + expected_delay = expected[retries - 1] + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + def test_clamp_retry_delay(self): + """Test that retry delay is clamped to valid range.""" + # Values within range stay the same + assert clamp_retry_delay(60) == 60 + assert clamp_retry_delay(1800) == 1800 + assert clamp_retry_delay(3600) == 3600 + + # Values below minimum get clamped to 1 + assert clamp_retry_delay(0) == 1 + assert clamp_retry_delay(-10) == 1 + + # Values above maximum get clamped to 3600 + assert clamp_retry_delay(7200) == 3600 + assert clamp_retry_delay(86400) == 3600 + + +if __name__ == "__main__": + unittest.main()