mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-02-01 15:03:36 +00:00
Token reduction (~40% per session, ~2.3M fewer tokens per 200-feature project): - Agent-type-specific tool lists: coding 9, testing 5, init 5 (was 19 for all) - Right-sized max_turns: coding 300, testing 100 (was 1000 for all) - Trimmed coding prompt template (~150 lines removed) - Streamlined testing prompt with batch support - YOLO mode now strips browser testing instructions from prompt - Added Grep, WebFetch, WebSearch to expand project session Performance improvements: - Rate limit retries start at ~15s with jitter (was fixed 60s) - Post-spawn delay reduced to 0.5s (was 2s) - Orchestrator consolidated to 1 DB query per loop (was 5-7) - Testing agents batch 3 features per session (was 1) - Smart context compaction preserves critical state, discards noise Bug fixes: - Removed ghost feature_release_testing MCP tool (wasted tokens every test session) - Forward all 9 Vertex AI env vars to chat sessions (was missing 3) - Fix DetachedInstanceError risk in test batch ORM access - Prevent duplicate testing of same features in parallel mode Code deduplication: - _get_project_path(): 9 copies -> 1 shared utility (project_helpers.py) - validate_project_name(): 9 copies -> 2 variants in 1 file (validation.py) - ROOT_DIR: 10 copies -> 1 definition (chat_constants.py) - API_ENV_VARS: 4 copies -> 1 source of truth (env_constants.py) Security hardening: - Unified sensitive directory blocklist (14 dirs, was two divergent lists) - Cached get_blocked_paths() for O(1) directory listing checks - Terminal security warning when ALLOW_REMOTE=1 exposes WebSocket - 20 new security tests for EXTRA_READ_PATHS blocking - Extracted _validate_command_list() and _validate_pkill_processes() helpers Type safety: - 87 mypy errors -> 0 across 58 source files - Installed types-PyYAML for proper yaml stub types - Fixed SQLAlchemy Column[T] coercions across all routers Dead code removed: - 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs - 7 unused npm packages removed (Radix UI components with 0 imports) - AgentAvatar.tsx reduced from 615 -> 119 lines (SVGs extracted to mascotData.tsx) New CLI options: - --testing-batch-size (1-5) for parallel mode test batching - --testing-feature-ids for direct multi-feature testing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
133 lines
4.1 KiB
Python
133 lines
4.1 KiB
Python
"""
|
|
Rate Limit Utilities
|
|
====================
|
|
|
|
Shared utilities for detecting and handling API rate limits.
|
|
Used by both agent.py (production) and test_rate_limit_utils.py (tests).
|
|
"""
|
|
|
|
import random
|
|
import re
|
|
from typing import Optional
|
|
|
|
# Regex patterns for rate limit detection (used in both exception messages and response text)
|
|
# These patterns use word boundaries to avoid false positives like "PR #429" or "please wait while I..."
|
|
RATE_LIMIT_REGEX_PATTERNS = [
|
|
r"\brate[_\s]?limit", # "rate limit", "rate_limit", "ratelimit"
|
|
r"\btoo\s+many\s+requests", # "too many requests"
|
|
r"\bhttp\s*429\b", # "http 429", "http429"
|
|
r"\bstatus\s*429\b", # "status 429", "status429"
|
|
r"\berror\s*429\b", # "error 429", "error429"
|
|
r"\b429\s+too\s+many", # "429 too many"
|
|
r"\b(?:server|api|system)\s+(?:is\s+)?overloaded\b", # "server is overloaded", "api overloaded"
|
|
r"\bquota\s*exceeded\b", # "quota exceeded"
|
|
]
|
|
|
|
# Compiled regex for efficient matching
|
|
_RATE_LIMIT_REGEX = re.compile(
|
|
"|".join(RATE_LIMIT_REGEX_PATTERNS),
|
|
re.IGNORECASE
|
|
)
|
|
|
|
|
|
def parse_retry_after(error_message: str) -> Optional[int]:
|
|
"""
|
|
Extract retry-after seconds from various error message formats.
|
|
|
|
Handles common formats:
|
|
- "Retry-After: 60"
|
|
- "retry after 60 seconds"
|
|
- "try again in 5 seconds"
|
|
- "30 seconds remaining"
|
|
|
|
Args:
|
|
error_message: The error message to parse
|
|
|
|
Returns:
|
|
Seconds to wait, or None if not parseable.
|
|
"""
|
|
# Patterns require explicit "seconds" or "s" unit, OR no unit at all (end of string/sentence)
|
|
# This prevents matching "30 minutes" or "1 hour" since those have non-seconds units
|
|
patterns = [
|
|
r"retry.?after[:\s]+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit
|
|
r"retry.?after[:\s]+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence
|
|
r"try again in\s+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit
|
|
r"try again in\s+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence
|
|
r"(\d+)\s*seconds?\s*(?:remaining|left|until)",
|
|
]
|
|
|
|
for pattern in patterns:
|
|
match = re.search(pattern, error_message, re.IGNORECASE)
|
|
if match:
|
|
return int(match.group(1))
|
|
|
|
return None
|
|
|
|
|
|
def is_rate_limit_error(error_message: str) -> bool:
|
|
"""
|
|
Detect if an error message indicates a rate limit.
|
|
|
|
Uses regex patterns with word boundaries to avoid false positives
|
|
like "PR #429", "please wait while I...", or "Node v14.29.0".
|
|
|
|
Args:
|
|
error_message: The error message to check
|
|
|
|
Returns:
|
|
True if the message indicates a rate limit, False otherwise.
|
|
"""
|
|
return bool(_RATE_LIMIT_REGEX.search(error_message))
|
|
|
|
|
|
def calculate_rate_limit_backoff(retries: int) -> int:
|
|
"""
|
|
Calculate exponential backoff with jitter for rate limits.
|
|
|
|
Base formula: min(15 * 2^retries, 3600)
|
|
Jitter: adds 0-30% random jitter to prevent thundering herd.
|
|
Base sequence: ~15-20s, ~30-40s, ~60-78s, ~120-156s, ...
|
|
|
|
The lower starting delay (15s vs 60s) allows faster recovery from
|
|
transient rate limits, while jitter prevents synchronized retries
|
|
when multiple agents hit limits simultaneously.
|
|
|
|
Args:
|
|
retries: Number of consecutive rate limit retries (0-indexed)
|
|
|
|
Returns:
|
|
Delay in seconds (clamped to 1-3600 range, with jitter)
|
|
"""
|
|
base = int(min(max(15 * (2 ** retries), 1), 3600))
|
|
jitter = random.uniform(0, base * 0.3)
|
|
return int(base + jitter)
|
|
|
|
|
|
def calculate_error_backoff(retries: int) -> int:
|
|
"""
|
|
Calculate linear backoff for non-rate-limit errors.
|
|
|
|
Formula: min(30 * retries, 300) - caps at 5 minutes
|
|
Sequence: 30s, 60s, 90s, 120s, ... 300s
|
|
|
|
Args:
|
|
retries: Number of consecutive error retries (1-indexed)
|
|
|
|
Returns:
|
|
Delay in seconds (clamped to 1-300 range)
|
|
"""
|
|
return min(max(30 * retries, 1), 300)
|
|
|
|
|
|
def clamp_retry_delay(delay_seconds: int) -> int:
|
|
"""
|
|
Clamp a retry delay to a safe range (1-3600 seconds).
|
|
|
|
Args:
|
|
delay_seconds: The raw delay value
|
|
|
|
Returns:
|
|
Delay clamped to 1-3600 seconds
|
|
"""
|
|
return min(max(delay_seconds, 1), 3600)
|