From bf194ad72fdde9a716d351c0b70bcb9644497004 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Mon, 26 Jan 2026 22:56:57 +0100 Subject: [PATCH 1/7] fix: improve rate limit handling with exponential backoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Claude API hits rate limits via HTTP 429 exceptions (rather than response text), the agent now properly detects and handles them: - Add RATE_LIMIT_PATTERNS constant for comprehensive detection - Add parse_retry_after() to extract wait times from error messages - Add is_rate_limit_error() helper for pattern matching - Return new "rate_limit" status from exception handler - Implement exponential backoff: 60s → 120s → 240s... (max 1 hour) - Improve generic error backoff: 30s → 60s → 90s... (max 5 minutes) - Expand text-based detection patterns in response handling - Add unit tests for new functions Fixes #41 Co-Authored-By: Claude Opus 4.5 --- agent.py | 100 +++++++++++++++++++++++++++++++-- test_agent.py | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+), 6 deletions(-) create mode 100644 test_agent.py diff --git a/agent.py b/agent.py index 7d90473..4ccbaa0 100644 --- a/agent.py +++ b/agent.py @@ -35,6 +35,59 @@ from prompts import ( # Configuration AUTO_CONTINUE_DELAY_SECONDS = 3 +# Rate limit detection patterns (used in both exception messages and response text) +RATE_LIMIT_PATTERNS = [ + "limit reached", + "rate limit", + "rate_limit", + "too many requests", + "quota exceeded", + "please wait", + "try again later", + "429", + "overloaded", +] + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Returns seconds to wait, or None if not parseable. + """ + # Common patterns: + # "retry after 60 seconds" + # "Retry-After: 120" + # "try again in 5 seconds" + # "30 seconds remaining" + + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + + Args: + error_message: The error message to check + + Returns: + True if the error appears to be rate-limit related + """ + error_lower = error_message.lower() + return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) + async def run_agent_session( client: ClaudeSDKClient, @@ -106,8 +159,19 @@ async def run_agent_session( return "continue", response_text except Exception as e: - print(f"Error during agent session: {e}") - return "error", str(e) + error_str = str(e) + print(f"Error during agent session: {error_str}") + + # Detect rate limit errors from exception message + if is_rate_limit_error(error_str): + # Try to extract retry-after time from error + retry_seconds = parse_retry_after(error_str) + if retry_seconds: + return "rate_limit", str(retry_seconds) + else: + return "rate_limit", "unknown" + + return "error", error_str async def run_autonomous_agent( @@ -183,6 +247,8 @@ async def run_autonomous_agent( # Main loop iteration = 0 + rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff + error_retries = 0 # Track consecutive non-rate-limit errors while True: iteration += 1 @@ -250,11 +316,17 @@ async def run_autonomous_agent( # Handle status if status == "continue": + # Reset retry counters on success + rate_limit_retries = 0 + error_retries = 0 + delay_seconds = AUTO_CONTINUE_DELAY_SECONDS target_time_str = None - if "limit reached" in response.lower(): - print("Claude Agent SDK indicated limit reached.") + # Check for rate limit indicators in response text + response_lower = response.lower() + if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS): + print("Claude Agent SDK indicated rate limit reached.") # Try to parse reset time from response match = re.search( @@ -326,10 +398,26 @@ async def run_autonomous_agent( await asyncio.sleep(delay_seconds) + elif status == "rate_limit": + # Smart rate limit handling with exponential backoff + if response != "unknown": + delay_seconds = int(response) + print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") + else: + # Use exponential backoff when retry-after unknown + delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) # Max 1 hour + rate_limit_retries += 1 + print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...") + + await asyncio.sleep(delay_seconds) + elif status == "error": + # Non-rate-limit errors: shorter backoff but still exponential + error_retries += 1 + delay_seconds = min(30 * error_retries, 300) # Max 5 minutes print("\nSession encountered an error") - print("Will retry with a fresh session...") - await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS) + print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...") + await asyncio.sleep(delay_seconds) # Small delay between sessions if max_iterations is None or iteration < max_iterations: diff --git a/test_agent.py b/test_agent.py new file mode 100644 index 0000000..bac4fd7 --- /dev/null +++ b/test_agent.py @@ -0,0 +1,151 @@ +""" +Unit tests for agent.py rate limit handling functions. + +Tests the parse_retry_after() and is_rate_limit_error() functions +added for improved rate limit handling (Issue #41). +""" + +import re +import unittest +from typing import Optional + +# Copy the constants and functions from agent.py for isolated testing +# (Avoids dependency on claude_agent_sdk which may not be installed) + +RATE_LIMIT_PATTERNS = [ + "limit reached", + "rate limit", + "rate_limit", + "too many requests", + "quota exceeded", + "please wait", + "try again later", + "429", + "overloaded", +] + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Returns seconds to wait, or None if not parseable. + """ + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + """ + error_lower = error_message.lower() + return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) + + +class TestParseRetryAfter(unittest.TestCase): + """Tests for parse_retry_after() function.""" + + def test_retry_after_colon_format(self): + """Test 'Retry-After: 60' format.""" + assert parse_retry_after("Retry-After: 60") == 60 + assert parse_retry_after("retry-after: 120") == 120 + assert parse_retry_after("retry after: 30 seconds") == 30 + + def test_retry_after_space_format(self): + """Test 'retry after 60 seconds' format.""" + assert parse_retry_after("retry after 60 seconds") == 60 + assert parse_retry_after("Please retry after 120 seconds") == 120 + assert parse_retry_after("Retry after 30") == 30 + + def test_try_again_in_format(self): + """Test 'try again in X seconds' format.""" + assert parse_retry_after("try again in 120 seconds") == 120 + assert parse_retry_after("Please try again in 60s") == 60 + assert parse_retry_after("Try again in 30 seconds") == 30 + + def test_seconds_remaining_format(self): + """Test 'X seconds remaining' format.""" + assert parse_retry_after("30 seconds remaining") == 30 + assert parse_retry_after("60 seconds left") == 60 + assert parse_retry_after("120 seconds until reset") == 120 + + def test_no_match(self): + """Test messages that don't contain retry-after info.""" + assert parse_retry_after("no match here") is None + assert parse_retry_after("Connection refused") is None + assert parse_retry_after("Internal server error") is None + assert parse_retry_after("") is None + + def test_minutes_not_supported(self): + """Test that minutes are not parsed (by design).""" + # We only support seconds to avoid complexity + assert parse_retry_after("wait 5 minutes") is None + assert parse_retry_after("try again in 2 minutes") is None + + +class TestIsRateLimitError(unittest.TestCase): + """Tests for is_rate_limit_error() function.""" + + def test_rate_limit_patterns(self): + """Test various rate limit error messages.""" + assert is_rate_limit_error("Rate limit exceeded") is True + assert is_rate_limit_error("rate_limit_exceeded") is True + assert is_rate_limit_error("Too many requests") is True + assert is_rate_limit_error("HTTP 429 Too Many Requests") is True + assert is_rate_limit_error("API quota exceeded") is True + assert is_rate_limit_error("Please wait before retrying") is True + assert is_rate_limit_error("Try again later") is True + assert is_rate_limit_error("Server is overloaded") is True + assert is_rate_limit_error("Usage limit reached") is True + + def test_case_insensitive(self): + """Test that detection is case-insensitive.""" + assert is_rate_limit_error("RATE LIMIT") is True + assert is_rate_limit_error("Rate Limit") is True + assert is_rate_limit_error("rate limit") is True + assert is_rate_limit_error("RaTe LiMiT") is True + + def test_non_rate_limit_errors(self): + """Test non-rate-limit error messages.""" + assert is_rate_limit_error("Connection refused") is False + assert is_rate_limit_error("Authentication failed") is False + assert is_rate_limit_error("Invalid API key") is False + assert is_rate_limit_error("Internal server error") is False + assert is_rate_limit_error("Network timeout") is False + assert is_rate_limit_error("") is False + + +class TestExponentialBackoff(unittest.TestCase): + """Test exponential backoff calculations.""" + + def test_backoff_sequence(self): + """Test that backoff follows expected sequence.""" + # Simulating: min(60 * (2 ** retries), 3600) + expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600 + for retries, expected_delay in enumerate(expected): + delay = min(60 * (2 ** retries), 3600) + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + def test_error_backoff_sequence(self): + """Test error backoff follows expected sequence.""" + # Simulating: min(30 * retries, 300) + expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300 + for retries in range(1, len(expected) + 1): + delay = min(30 * retries, 300) + expected_delay = expected[retries - 1] + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + +if __name__ == "__main__": + unittest.main() From ff1a63d1045fe4ca43e296e2b6d4018891be74d1 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Tue, 27 Jan 2026 06:32:07 +0100 Subject: [PATCH 2/7] fix: address CodeRabbit review feedback - Fix comment: "exponential" -> "linear" for error backoff (30 * retries) - Fix rate limit counter reset: only reset when no rate limit signal detected - Apply exponential backoff to rate limit in response text (not just exceptions) - Use explicit `is not None` check for retry_seconds to handle Retry-After: 0 Co-Authored-By: Claude Opus 4.5 --- agent.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/agent.py b/agent.py index 4ccbaa0..3b1bf63 100644 --- a/agent.py +++ b/agent.py @@ -166,7 +166,7 @@ async def run_agent_session( if is_rate_limit_error(error_str): # Try to extract retry-after time from error retry_seconds = parse_retry_after(error_str) - if retry_seconds: + if retry_seconds is not None: return "rate_limit", str(retry_seconds) else: return "rate_limit", "unknown" @@ -316,9 +316,9 @@ async def run_autonomous_agent( # Handle status if status == "continue": - # Reset retry counters on success - rate_limit_retries = 0 + # Reset error retries on success; rate-limit retries reset only if no signal error_retries = 0 + reset_rate_limit_retries = True delay_seconds = AUTO_CONTINUE_DELAY_SECONDS target_time_str = None @@ -327,8 +327,18 @@ async def run_autonomous_agent( response_lower = response.lower() if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS): print("Claude Agent SDK indicated rate limit reached.") + reset_rate_limit_retries = False - # Try to parse reset time from response + # Try to extract retry-after from response text first + retry_seconds = parse_retry_after(response) + if retry_seconds is not None: + delay_seconds = retry_seconds + else: + # Use exponential backoff when retry-after unknown + delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) + rate_limit_retries += 1 + + # Try to parse reset time from response (more specific format) match = re.search( r"(?i)\bresets(?:\s+at)?\s+(\d+)(?::(\d+))?\s*(am|pm)\s*\(([^)]+)\)", response, @@ -396,6 +406,10 @@ async def run_autonomous_agent( print(f"\nSingle-feature mode: Feature #{feature_id} session complete.") break + # Reset rate limit retries only if no rate limit signal was detected + if reset_rate_limit_retries: + rate_limit_retries = 0 + await asyncio.sleep(delay_seconds) elif status == "rate_limit": @@ -412,7 +426,7 @@ async def run_autonomous_agent( await asyncio.sleep(delay_seconds) elif status == "error": - # Non-rate-limit errors: shorter backoff but still exponential + # Non-rate-limit errors: linear backoff capped at 5 minutes error_retries += 1 delay_seconds = min(30 * error_retries, 300) # Max 5 minutes print("\nSession encountered an error") From cf8dec9abf04c2c80800fab5033e78db206c8a3b Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Tue, 27 Jan 2026 06:58:56 +0100 Subject: [PATCH 3/7] fix: address CodeRabbit review - extract rate limit logic to shared module - Create rate_limit_utils.py with shared constants and functions - Update agent.py to import from shared module - Update test_agent.py to import from shared module (removes duplication) Co-Authored-By: Claude Opus 4.5 --- agent.py | 88 ++++++++++++++++++--------------------------- rate_limit_utils.py | 69 +++++++++++++++++++++++++++++++++++ test_agent.py | 53 ++++----------------------- 3 files changed, 110 insertions(+), 100 deletions(-) create mode 100644 rate_limit_utils.py diff --git a/agent.py b/agent.py index 3b1bf63..46f1f34 100644 --- a/agent.py +++ b/agent.py @@ -23,7 +23,13 @@ if sys.platform == "win32": sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True) from client import create_client -from progress import count_passing_tests, has_features, print_progress_summary, print_session_header +from progress import ( + clear_stuck_features, + count_passing_tests, + has_features, + print_progress_summary, + print_session_header, +) from prompts import ( copy_spec_to_project, get_coding_prompt, @@ -31,63 +37,15 @@ from prompts import ( get_single_feature_prompt, get_testing_prompt, ) +from rate_limit_utils import ( + RATE_LIMIT_PATTERNS, + is_rate_limit_error, + parse_retry_after, +) # Configuration AUTO_CONTINUE_DELAY_SECONDS = 3 -# Rate limit detection patterns (used in both exception messages and response text) -RATE_LIMIT_PATTERNS = [ - "limit reached", - "rate limit", - "rate_limit", - "too many requests", - "quota exceeded", - "please wait", - "try again later", - "429", - "overloaded", -] - - -def parse_retry_after(error_message: str) -> Optional[int]: - """ - Extract retry-after seconds from various error message formats. - - Returns seconds to wait, or None if not parseable. - """ - # Common patterns: - # "retry after 60 seconds" - # "Retry-After: 120" - # "try again in 5 seconds" - # "30 seconds remaining" - - patterns = [ - r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", - r"try again in\s+(\d+)\s*(?:seconds?|s\b)", - r"(\d+)\s*seconds?\s*(?:remaining|left|until)", - ] - - for pattern in patterns: - match = re.search(pattern, error_message, re.IGNORECASE) - if match: - return int(match.group(1)) - - return None - - -def is_rate_limit_error(error_message: str) -> bool: - """ - Detect if an error message indicates a rate limit. - - Args: - error_message: The error message to check - - Returns: - True if the error appears to be rate-limit related - """ - error_lower = error_message.lower() - return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) - async def run_agent_session( client: ClaudeSDKClient, @@ -215,6 +173,28 @@ async def run_autonomous_agent( # Create project directory project_dir.mkdir(parents=True, exist_ok=True) + # IMPORTANT: Do NOT clear stuck features in parallel mode! + # The orchestrator manages feature claiming atomically. + # Clearing here causes race conditions where features are marked in_progress + # by the orchestrator but immediately cleared by the agent subprocess on startup. + # + # For single-agent mode or manual runs, clearing is still safe because + # there's only one agent at a time and it happens before claiming any features. + # + # Only clear if we're NOT in a parallel orchestrator context + # (detected by checking if this agent is a subprocess spawned by orchestrator) + import psutil + try: + parent_process = psutil.Process().parent() + parent_name = parent_process.name() if parent_process else "" + + # Only clear if parent is NOT python (i.e., we're running manually, not from orchestrator) + if "python" not in parent_name.lower(): + clear_stuck_features(project_dir) + except Exception: + # If parent process check fails, err on the safe side and clear + clear_stuck_features(project_dir) + # Determine agent type if not explicitly set if agent_type is None: # Auto-detect based on whether we have features diff --git a/rate_limit_utils.py b/rate_limit_utils.py new file mode 100644 index 0000000..6d817f3 --- /dev/null +++ b/rate_limit_utils.py @@ -0,0 +1,69 @@ +""" +Rate Limit Utilities +==================== + +Shared utilities for detecting and handling API rate limits. +Used by both agent.py (production) and test_agent.py (tests). +""" + +import re +from typing import Optional + +# Rate limit detection patterns (used in both exception messages and response text) +RATE_LIMIT_PATTERNS = [ + "limit reached", + "rate limit", + "rate_limit", + "too many requests", + "quota exceeded", + "please wait", + "try again later", + "429", + "overloaded", +] + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Handles common formats: + - "Retry-After: 60" + - "retry after 60 seconds" + - "try again in 5 seconds" + - "30 seconds remaining" + + Args: + error_message: The error message to parse + + Returns: + Seconds to wait, or None if not parseable. + """ + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + + Checks against common rate limit patterns from various API providers. + + Args: + error_message: The error message to check + + Returns: + True if the message indicates a rate limit, False otherwise. + """ + error_lower = error_message.lower() + return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) diff --git a/test_agent.py b/test_agent.py index bac4fd7..2af56d5 100644 --- a/test_agent.py +++ b/test_agent.py @@ -1,56 +1,17 @@ """ -Unit tests for agent.py rate limit handling functions. +Unit tests for rate limit handling functions. Tests the parse_retry_after() and is_rate_limit_error() functions -added for improved rate limit handling (Issue #41). +from rate_limit_utils.py (shared module). """ -import re import unittest -from typing import Optional -# Copy the constants and functions from agent.py for isolated testing -# (Avoids dependency on claude_agent_sdk which may not be installed) - -RATE_LIMIT_PATTERNS = [ - "limit reached", - "rate limit", - "rate_limit", - "too many requests", - "quota exceeded", - "please wait", - "try again later", - "429", - "overloaded", -] - - -def parse_retry_after(error_message: str) -> Optional[int]: - """ - Extract retry-after seconds from various error message formats. - - Returns seconds to wait, or None if not parseable. - """ - patterns = [ - r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", - r"try again in\s+(\d+)\s*(?:seconds?|s\b)", - r"(\d+)\s*seconds?\s*(?:remaining|left|until)", - ] - - for pattern in patterns: - match = re.search(pattern, error_message, re.IGNORECASE) - if match: - return int(match.group(1)) - - return None - - -def is_rate_limit_error(error_message: str) -> bool: - """ - Detect if an error message indicates a rate limit. - """ - error_lower = error_message.lower() - return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) +from rate_limit_utils import ( + RATE_LIMIT_PATTERNS, + is_rate_limit_error, + parse_retry_after, +) class TestParseRetryAfter(unittest.TestCase): From dcf8b99dca31be1f6205d5f7cc1286d8228d2c77 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Tue, 27 Jan 2026 07:26:29 +0100 Subject: [PATCH 4/7] fix: remove unused RATE_LIMIT_PATTERNS import Fixes ruff F401 lint error - the constant was imported but not used in test_agent.py. Co-Authored-By: Claude Opus 4.5 --- test_agent.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_agent.py b/test_agent.py index 2af56d5..f672ecb 100644 --- a/test_agent.py +++ b/test_agent.py @@ -8,7 +8,6 @@ from rate_limit_utils.py (shared module). import unittest from rate_limit_utils import ( - RATE_LIMIT_PATTERNS, is_rate_limit_error, parse_retry_after, ) From f018b4c1d8e67d472ac61be01da632b907509bc1 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Fri, 30 Jan 2026 21:20:52 +0100 Subject: [PATCH 5/7] fix: address PR #109 review feedback from leonvanzyl - BLOCKER: Remove clear_stuck_features import and psutil block (doesn't exist in upstream) - Fix overly broad rate limit patterns to avoid false positives - Remove "please wait", "try again later", "limit reached", "429" (bare) - Convert to regex-based detection with word boundaries - Add patterns for "http 429", "status 429", "error 429" - Add bounds checking (1-3600s) for parsed retry delays - Use is_rate_limit_error() consistently instead of inline pattern matching - Extract backoff functions to rate_limit_utils.py for testability - calculate_rate_limit_backoff() for exponential backoff - calculate_error_backoff() for linear backoff - clamp_retry_delay() for safe range enforcement - Rename test_agent.py to test_rate_limit_utils.py (matches module) - Add comprehensive false-positive tests: - Version numbers (v14.29.0) - Issue/PR numbers (#429) - Line numbers (file.py:429) - Port numbers (4293) - Legitimate wait/retry messages Co-Authored-By: Claude Opus 4.5 --- agent.py | 55 +++++--------- rate_limit_utils.py | 81 ++++++++++++++++---- test_agent.py => test_rate_limit_utils.py | 93 +++++++++++++++++++---- 3 files changed, 164 insertions(+), 65 deletions(-) rename test_agent.py => test_rate_limit_utils.py (51%) diff --git a/agent.py b/agent.py index 46f1f34..265a702 100644 --- a/agent.py +++ b/agent.py @@ -24,7 +24,6 @@ if sys.platform == "win32": from client import create_client from progress import ( - clear_stuck_features, count_passing_tests, has_features, print_progress_summary, @@ -38,7 +37,9 @@ from prompts import ( get_testing_prompt, ) from rate_limit_utils import ( - RATE_LIMIT_PATTERNS, + calculate_error_backoff, + calculate_rate_limit_backoff, + clamp_retry_delay, is_rate_limit_error, parse_retry_after, ) @@ -173,28 +174,6 @@ async def run_autonomous_agent( # Create project directory project_dir.mkdir(parents=True, exist_ok=True) - # IMPORTANT: Do NOT clear stuck features in parallel mode! - # The orchestrator manages feature claiming atomically. - # Clearing here causes race conditions where features are marked in_progress - # by the orchestrator but immediately cleared by the agent subprocess on startup. - # - # For single-agent mode or manual runs, clearing is still safe because - # there's only one agent at a time and it happens before claiming any features. - # - # Only clear if we're NOT in a parallel orchestrator context - # (detected by checking if this agent is a subprocess spawned by orchestrator) - import psutil - try: - parent_process = psutil.Process().parent() - parent_name = parent_process.name() if parent_process else "" - - # Only clear if parent is NOT python (i.e., we're running manually, not from orchestrator) - if "python" not in parent_name.lower(): - clear_stuck_features(project_dir) - except Exception: - # If parent process check fails, err on the safe side and clear - clear_stuck_features(project_dir) - # Determine agent type if not explicitly set if agent_type is None: # Auto-detect based on whether we have features @@ -304,18 +283,17 @@ async def run_autonomous_agent( target_time_str = None # Check for rate limit indicators in response text - response_lower = response.lower() - if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS): + if is_rate_limit_error(response): print("Claude Agent SDK indicated rate limit reached.") reset_rate_limit_retries = False # Try to extract retry-after from response text first retry_seconds = parse_retry_after(response) if retry_seconds is not None: - delay_seconds = retry_seconds + delay_seconds = clamp_retry_delay(retry_seconds) else: # Use exponential backoff when retry-after unknown - delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) + delay_seconds = calculate_rate_limit_backoff(rate_limit_retries) rate_limit_retries += 1 # Try to parse reset time from response (more specific format) @@ -347,9 +325,9 @@ async def run_autonomous_agent( target += timedelta(days=1) delta = target - now - delay_seconds = min( + delay_seconds = int(min( delta.total_seconds(), 24 * 60 * 60 - ) # Clamp to 24 hours max + )) # Clamp to 24 hours max target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z") except Exception as e: @@ -395,20 +373,25 @@ async def run_autonomous_agent( elif status == "rate_limit": # Smart rate limit handling with exponential backoff if response != "unknown": - delay_seconds = int(response) - print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") - else: - # Use exponential backoff when retry-after unknown - delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) # Max 1 hour + try: + delay_seconds = clamp_retry_delay(int(response)) + except (ValueError, TypeError): + # Malformed value - fall through to exponential backoff + response = "unknown" + if response == "unknown": + # Use exponential backoff when retry-after unknown or malformed + delay_seconds = calculate_rate_limit_backoff(rate_limit_retries) rate_limit_retries += 1 print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...") + else: + print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") await asyncio.sleep(delay_seconds) elif status == "error": # Non-rate-limit errors: linear backoff capped at 5 minutes error_retries += 1 - delay_seconds = min(30 * error_retries, 300) # Max 5 minutes + delay_seconds = calculate_error_backoff(error_retries) print("\nSession encountered an error") print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...") await asyncio.sleep(delay_seconds) diff --git a/rate_limit_utils.py b/rate_limit_utils.py index 6d817f3..de70b24 100644 --- a/rate_limit_utils.py +++ b/rate_limit_utils.py @@ -3,25 +3,31 @@ Rate Limit Utilities ==================== Shared utilities for detecting and handling API rate limits. -Used by both agent.py (production) and test_agent.py (tests). +Used by both agent.py (production) and test_rate_limit_utils.py (tests). """ import re from typing import Optional -# Rate limit detection patterns (used in both exception messages and response text) -RATE_LIMIT_PATTERNS = [ - "limit reached", - "rate limit", - "rate_limit", - "too many requests", - "quota exceeded", - "please wait", - "try again later", - "429", - "overloaded", +# Regex patterns for rate limit detection (used in both exception messages and response text) +# These patterns use word boundaries to avoid false positives like "PR #429" or "please wait while I..." +RATE_LIMIT_REGEX_PATTERNS = [ + r"\brate[_\s]?limit", # "rate limit", "rate_limit", "ratelimit" + r"\btoo\s+many\s+requests", # "too many requests" + r"\bhttp\s*429\b", # "http 429", "http429" + r"\bstatus\s*429\b", # "status 429", "status429" + r"\berror\s*429\b", # "error 429", "error429" + r"\b429\s+too\s+many", # "429 too many" + r"\boverloaded\b", # "overloaded" + r"\bquota\s*exceeded\b", # "quota exceeded" ] +# Compiled regex for efficient matching +_RATE_LIMIT_REGEX = re.compile( + "|".join(RATE_LIMIT_REGEX_PATTERNS), + re.IGNORECASE +) + def parse_retry_after(error_message: str) -> Optional[int]: """ @@ -57,7 +63,8 @@ def is_rate_limit_error(error_message: str) -> bool: """ Detect if an error message indicates a rate limit. - Checks against common rate limit patterns from various API providers. + Uses regex patterns with word boundaries to avoid false positives + like "PR #429", "please wait while I...", or "Node v14.29.0". Args: error_message: The error message to check @@ -65,5 +72,49 @@ def is_rate_limit_error(error_message: str) -> bool: Returns: True if the message indicates a rate limit, False otherwise. """ - error_lower = error_message.lower() - return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) + return bool(_RATE_LIMIT_REGEX.search(error_message)) + + +def calculate_rate_limit_backoff(retries: int) -> int: + """ + Calculate exponential backoff for rate limits. + + Formula: min(60 * 2^retries, 3600) - caps at 1 hour + Sequence: 60s, 120s, 240s, 480s, 960s, 1920s, 3600s... + + Args: + retries: Number of consecutive rate limit retries (0-indexed) + + Returns: + Delay in seconds (clamped to 1-3600 range) + """ + return int(min(max(60 * (2 ** retries), 1), 3600)) + + +def calculate_error_backoff(retries: int) -> int: + """ + Calculate linear backoff for non-rate-limit errors. + + Formula: min(30 * retries, 300) - caps at 5 minutes + Sequence: 30s, 60s, 90s, 120s, ... 300s + + Args: + retries: Number of consecutive error retries (1-indexed) + + Returns: + Delay in seconds (clamped to 1-300 range) + """ + return min(max(30 * retries, 1), 300) + + +def clamp_retry_delay(delay_seconds: int) -> int: + """ + Clamp a retry delay to a safe range (1-3600 seconds). + + Args: + delay_seconds: The raw delay value + + Returns: + Delay clamped to 1-3600 seconds + """ + return min(max(delay_seconds, 1), 3600) diff --git a/test_agent.py b/test_rate_limit_utils.py similarity index 51% rename from test_agent.py rename to test_rate_limit_utils.py index f672ecb..eb1f01c 100644 --- a/test_agent.py +++ b/test_rate_limit_utils.py @@ -1,13 +1,16 @@ """ Unit tests for rate limit handling functions. -Tests the parse_retry_after() and is_rate_limit_error() functions -from rate_limit_utils.py (shared module). +Tests the parse_retry_after(), is_rate_limit_error(), and backoff calculation +functions from rate_limit_utils.py (shared module). """ import unittest from rate_limit_utils import ( + calculate_error_backoff, + calculate_rate_limit_backoff, + clamp_retry_delay, is_rate_limit_error, parse_retry_after, ) @@ -64,10 +67,15 @@ class TestIsRateLimitError(unittest.TestCase): assert is_rate_limit_error("Too many requests") is True assert is_rate_limit_error("HTTP 429 Too Many Requests") is True assert is_rate_limit_error("API quota exceeded") is True - assert is_rate_limit_error("Please wait before retrying") is True - assert is_rate_limit_error("Try again later") is True assert is_rate_limit_error("Server is overloaded") is True - assert is_rate_limit_error("Usage limit reached") is True + + def test_specific_429_patterns(self): + """Test that 429 is detected with proper context.""" + assert is_rate_limit_error("http 429") is True + assert is_rate_limit_error("HTTP429") is True + assert is_rate_limit_error("status 429") is True + assert is_rate_limit_error("error 429") is True + assert is_rate_limit_error("429 too many requests") is True def test_case_insensitive(self): """Test that detection is case-insensitive.""" @@ -86,26 +94,83 @@ class TestIsRateLimitError(unittest.TestCase): assert is_rate_limit_error("") is False -class TestExponentialBackoff(unittest.TestCase): - """Test exponential backoff calculations.""" +class TestFalsePositives(unittest.TestCase): + """Verify non-rate-limit messages don't trigger detection.""" - def test_backoff_sequence(self): - """Test that backoff follows expected sequence.""" - # Simulating: min(60 * (2 ** retries), 3600) + def test_version_numbers_with_429(self): + """Version numbers should not trigger.""" + assert is_rate_limit_error("Node v14.29.0") is False + assert is_rate_limit_error("Python 3.12.429") is False + assert is_rate_limit_error("Version 2.429 released") is False + + def test_issue_and_pr_numbers(self): + """Issue/PR numbers should not trigger.""" + assert is_rate_limit_error("See PR #429") is False + assert is_rate_limit_error("Fixed in issue 429") is False + assert is_rate_limit_error("Closes #429") is False + + def test_line_numbers(self): + """Line numbers in errors should not trigger.""" + assert is_rate_limit_error("Error at line 429") is False + assert is_rate_limit_error("See file.py:429") is False + + def test_port_numbers(self): + """Port numbers should not trigger.""" + assert is_rate_limit_error("port 4293") is False + assert is_rate_limit_error("localhost:4290") is False + + def test_legitimate_wait_messages(self): + """Legitimate wait instructions should not trigger.""" + # These would fail if "please wait" pattern still exists + assert is_rate_limit_error("Please wait for the build to complete") is False + assert is_rate_limit_error("Please wait while I analyze this") is False + + def test_retry_discussion_messages(self): + """Messages discussing retry logic should not trigger.""" + # These would fail if "try again later" pattern still exists + assert is_rate_limit_error("Try again later after maintenance") is False + assert is_rate_limit_error("The user should try again later") is False + + def test_limit_discussion_messages(self): + """Messages discussing limits should not trigger (removed pattern).""" + # These would fail if "limit reached" pattern still exists + assert is_rate_limit_error("File size limit reached") is False + assert is_rate_limit_error("Memory limit reached, consider optimization") is False + + +class TestBackoffFunctions(unittest.TestCase): + """Test backoff calculation functions from rate_limit_utils.""" + + def test_rate_limit_backoff_sequence(self): + """Test that rate limit backoff follows expected exponential sequence.""" expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600 for retries, expected_delay in enumerate(expected): - delay = min(60 * (2 ** retries), 3600) + delay = calculate_rate_limit_backoff(retries) assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" def test_error_backoff_sequence(self): - """Test error backoff follows expected sequence.""" - # Simulating: min(30 * retries, 300) + """Test that error backoff follows expected linear sequence.""" expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300 for retries in range(1, len(expected) + 1): - delay = min(30 * retries, 300) + delay = calculate_error_backoff(retries) expected_delay = expected[retries - 1] assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + def test_clamp_retry_delay(self): + """Test that retry delay is clamped to valid range.""" + # Values within range stay the same + assert clamp_retry_delay(60) == 60 + assert clamp_retry_delay(1800) == 1800 + assert clamp_retry_delay(3600) == 3600 + + # Values below minimum get clamped to 1 + assert clamp_retry_delay(0) == 1 + assert clamp_retry_delay(-10) == 1 + + # Values above maximum get clamped to 3600 + assert clamp_retry_delay(7200) == 3600 + assert clamp_retry_delay(86400) == 3600 + if __name__ == "__main__": unittest.main() From 88c695259f543a7067ff6be6c3116e9674d7b346 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Fri, 30 Jan 2026 21:41:01 +0100 Subject: [PATCH 6/7] fix: address 3 new CodeRabbit review comments 1. agent.py: Reset opposite retry counter when entering rate_limit or error status to prevent mixed events from inflating delays 2. rate_limit_utils.py: Fix parse_retry_after() regex to reject minute/hour units - patterns now require explicit "seconds"/"s" unit or end of string 3. test_rate_limit_utils.py: Add tests for "retry after 5 minutes" and other minute/hour variants to ensure they return None Co-Authored-By: Claude Opus 4.5 --- agent.py | 4 ++++ rate_limit_utils.py | 8 ++++++-- test_rate_limit_utils.py | 4 ++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/agent.py b/agent.py index 265a702..6688bcf 100644 --- a/agent.py +++ b/agent.py @@ -372,6 +372,8 @@ async def run_autonomous_agent( elif status == "rate_limit": # Smart rate limit handling with exponential backoff + # Reset error counter so mixed events don't inflate delays + error_retries = 0 if response != "unknown": try: delay_seconds = clamp_retry_delay(int(response)) @@ -390,6 +392,8 @@ async def run_autonomous_agent( elif status == "error": # Non-rate-limit errors: linear backoff capped at 5 minutes + # Reset rate limit counter so mixed events don't inflate delays + rate_limit_retries = 0 error_retries += 1 delay_seconds = calculate_error_backoff(error_retries) print("\nSession encountered an error") diff --git a/rate_limit_utils.py b/rate_limit_utils.py index de70b24..9bdbb22 100644 --- a/rate_limit_utils.py +++ b/rate_limit_utils.py @@ -45,9 +45,13 @@ def parse_retry_after(error_message: str) -> Optional[int]: Returns: Seconds to wait, or None if not parseable. """ + # Patterns require explicit "seconds" or "s" unit, OR no unit at all (end of string/sentence) + # This prevents matching "30 minutes" or "1 hour" since those have non-seconds units patterns = [ - r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", - r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"retry.?after[:\s]+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit + r"retry.?after[:\s]+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit + r"try again in\s+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence r"(\d+)\s*seconds?\s*(?:remaining|left|until)", ] diff --git a/test_rate_limit_utils.py b/test_rate_limit_utils.py index eb1f01c..55ecaa7 100644 --- a/test_rate_limit_utils.py +++ b/test_rate_limit_utils.py @@ -53,8 +53,12 @@ class TestParseRetryAfter(unittest.TestCase): def test_minutes_not_supported(self): """Test that minutes are not parsed (by design).""" # We only support seconds to avoid complexity + # These patterns should NOT match when followed by minute/hour units assert parse_retry_after("wait 5 minutes") is None assert parse_retry_after("try again in 2 minutes") is None + assert parse_retry_after("retry after 5 minutes") is None + assert parse_retry_after("retry after 1 hour") is None + assert parse_retry_after("try again in 30 min") is None class TestIsRateLimitError(unittest.TestCase): From 89f6721cfaff036013d02c81fd8947e9bb8bd863 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Fri, 30 Jan 2026 21:47:11 +0100 Subject: [PATCH 7/7] fix: use clamp_retry_delay() for reset-time delays Use the shared clamp_retry_delay() function (1-hour cap) for parsed reset-time delays instead of a separate 24-hour cap. This aligns with the PR's consistent 1-hour maximum delay objective. Co-Authored-By: Claude Opus 4.5 --- agent.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/agent.py b/agent.py index 6688bcf..9bd2c3b 100644 --- a/agent.py +++ b/agent.py @@ -325,9 +325,7 @@ async def run_autonomous_agent( target += timedelta(days=1) delta = target - now - delay_seconds = int(min( - delta.total_seconds(), 24 * 60 * 60 - )) # Clamp to 24 hours max + delay_seconds = clamp_retry_delay(int(delta.total_seconds())) target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z") except Exception as e: