From 43494c337f512aa6159deba76ea466b79cc90318 Mon Sep 17 00:00:00 2001 From: chrislangston Date: Mon, 26 Jan 2026 11:02:29 -0500 Subject: [PATCH 01/24] feat: support custom Opus model via ANTHROPIC_DEFAULT_OPUS_MODEL env var Allow Foundry and custom deployments to override the default Opus model by setting the ANTHROPIC_DEFAULT_OPUS_MODEL environment variable. Co-Authored-By: Claude Opus 4.5 --- registry.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/registry.py b/registry.py index 20d31df..2949bf6 100644 --- a/registry.py +++ b/registry.py @@ -39,7 +39,8 @@ AVAILABLE_MODELS = [ VALID_MODELS = [m["id"] for m in AVAILABLE_MODELS] # Default model and settings -DEFAULT_MODEL = "claude-opus-4-5-20251101" +# Respect ANTHROPIC_DEFAULT_OPUS_MODEL env var for Foundry/custom deployments +DEFAULT_MODEL = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101") DEFAULT_YOLO_MODE = False # SQLite connection settings From bf194ad72fdde9a716d351c0b70bcb9644497004 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Mon, 26 Jan 2026 22:56:57 +0100 Subject: [PATCH 02/24] fix: improve rate limit handling with exponential backoff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Claude API hits rate limits via HTTP 429 exceptions (rather than response text), the agent now properly detects and handles them: - Add RATE_LIMIT_PATTERNS constant for comprehensive detection - Add parse_retry_after() to extract wait times from error messages - Add is_rate_limit_error() helper for pattern matching - Return new "rate_limit" status from exception handler - Implement exponential backoff: 60s → 120s → 240s... (max 1 hour) - Improve generic error backoff: 30s → 60s → 90s... (max 5 minutes) - Expand text-based detection patterns in response handling - Add unit tests for new functions Fixes #41 Co-Authored-By: Claude Opus 4.5 --- agent.py | 100 +++++++++++++++++++++++++++++++-- test_agent.py | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 245 insertions(+), 6 deletions(-) create mode 100644 test_agent.py diff --git a/agent.py b/agent.py index 7d90473..4ccbaa0 100644 --- a/agent.py +++ b/agent.py @@ -35,6 +35,59 @@ from prompts import ( # Configuration AUTO_CONTINUE_DELAY_SECONDS = 3 +# Rate limit detection patterns (used in both exception messages and response text) +RATE_LIMIT_PATTERNS = [ + "limit reached", + "rate limit", + "rate_limit", + "too many requests", + "quota exceeded", + "please wait", + "try again later", + "429", + "overloaded", +] + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Returns seconds to wait, or None if not parseable. + """ + # Common patterns: + # "retry after 60 seconds" + # "Retry-After: 120" + # "try again in 5 seconds" + # "30 seconds remaining" + + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + + Args: + error_message: The error message to check + + Returns: + True if the error appears to be rate-limit related + """ + error_lower = error_message.lower() + return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) + async def run_agent_session( client: ClaudeSDKClient, @@ -106,8 +159,19 @@ async def run_agent_session( return "continue", response_text except Exception as e: - print(f"Error during agent session: {e}") - return "error", str(e) + error_str = str(e) + print(f"Error during agent session: {error_str}") + + # Detect rate limit errors from exception message + if is_rate_limit_error(error_str): + # Try to extract retry-after time from error + retry_seconds = parse_retry_after(error_str) + if retry_seconds: + return "rate_limit", str(retry_seconds) + else: + return "rate_limit", "unknown" + + return "error", error_str async def run_autonomous_agent( @@ -183,6 +247,8 @@ async def run_autonomous_agent( # Main loop iteration = 0 + rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff + error_retries = 0 # Track consecutive non-rate-limit errors while True: iteration += 1 @@ -250,11 +316,17 @@ async def run_autonomous_agent( # Handle status if status == "continue": + # Reset retry counters on success + rate_limit_retries = 0 + error_retries = 0 + delay_seconds = AUTO_CONTINUE_DELAY_SECONDS target_time_str = None - if "limit reached" in response.lower(): - print("Claude Agent SDK indicated limit reached.") + # Check for rate limit indicators in response text + response_lower = response.lower() + if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS): + print("Claude Agent SDK indicated rate limit reached.") # Try to parse reset time from response match = re.search( @@ -326,10 +398,26 @@ async def run_autonomous_agent( await asyncio.sleep(delay_seconds) + elif status == "rate_limit": + # Smart rate limit handling with exponential backoff + if response != "unknown": + delay_seconds = int(response) + print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") + else: + # Use exponential backoff when retry-after unknown + delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) # Max 1 hour + rate_limit_retries += 1 + print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...") + + await asyncio.sleep(delay_seconds) + elif status == "error": + # Non-rate-limit errors: shorter backoff but still exponential + error_retries += 1 + delay_seconds = min(30 * error_retries, 300) # Max 5 minutes print("\nSession encountered an error") - print("Will retry with a fresh session...") - await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS) + print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...") + await asyncio.sleep(delay_seconds) # Small delay between sessions if max_iterations is None or iteration < max_iterations: diff --git a/test_agent.py b/test_agent.py new file mode 100644 index 0000000..bac4fd7 --- /dev/null +++ b/test_agent.py @@ -0,0 +1,151 @@ +""" +Unit tests for agent.py rate limit handling functions. + +Tests the parse_retry_after() and is_rate_limit_error() functions +added for improved rate limit handling (Issue #41). +""" + +import re +import unittest +from typing import Optional + +# Copy the constants and functions from agent.py for isolated testing +# (Avoids dependency on claude_agent_sdk which may not be installed) + +RATE_LIMIT_PATTERNS = [ + "limit reached", + "rate limit", + "rate_limit", + "too many requests", + "quota exceeded", + "please wait", + "try again later", + "429", + "overloaded", +] + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Returns seconds to wait, or None if not parseable. + """ + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + """ + error_lower = error_message.lower() + return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) + + +class TestParseRetryAfter(unittest.TestCase): + """Tests for parse_retry_after() function.""" + + def test_retry_after_colon_format(self): + """Test 'Retry-After: 60' format.""" + assert parse_retry_after("Retry-After: 60") == 60 + assert parse_retry_after("retry-after: 120") == 120 + assert parse_retry_after("retry after: 30 seconds") == 30 + + def test_retry_after_space_format(self): + """Test 'retry after 60 seconds' format.""" + assert parse_retry_after("retry after 60 seconds") == 60 + assert parse_retry_after("Please retry after 120 seconds") == 120 + assert parse_retry_after("Retry after 30") == 30 + + def test_try_again_in_format(self): + """Test 'try again in X seconds' format.""" + assert parse_retry_after("try again in 120 seconds") == 120 + assert parse_retry_after("Please try again in 60s") == 60 + assert parse_retry_after("Try again in 30 seconds") == 30 + + def test_seconds_remaining_format(self): + """Test 'X seconds remaining' format.""" + assert parse_retry_after("30 seconds remaining") == 30 + assert parse_retry_after("60 seconds left") == 60 + assert parse_retry_after("120 seconds until reset") == 120 + + def test_no_match(self): + """Test messages that don't contain retry-after info.""" + assert parse_retry_after("no match here") is None + assert parse_retry_after("Connection refused") is None + assert parse_retry_after("Internal server error") is None + assert parse_retry_after("") is None + + def test_minutes_not_supported(self): + """Test that minutes are not parsed (by design).""" + # We only support seconds to avoid complexity + assert parse_retry_after("wait 5 minutes") is None + assert parse_retry_after("try again in 2 minutes") is None + + +class TestIsRateLimitError(unittest.TestCase): + """Tests for is_rate_limit_error() function.""" + + def test_rate_limit_patterns(self): + """Test various rate limit error messages.""" + assert is_rate_limit_error("Rate limit exceeded") is True + assert is_rate_limit_error("rate_limit_exceeded") is True + assert is_rate_limit_error("Too many requests") is True + assert is_rate_limit_error("HTTP 429 Too Many Requests") is True + assert is_rate_limit_error("API quota exceeded") is True + assert is_rate_limit_error("Please wait before retrying") is True + assert is_rate_limit_error("Try again later") is True + assert is_rate_limit_error("Server is overloaded") is True + assert is_rate_limit_error("Usage limit reached") is True + + def test_case_insensitive(self): + """Test that detection is case-insensitive.""" + assert is_rate_limit_error("RATE LIMIT") is True + assert is_rate_limit_error("Rate Limit") is True + assert is_rate_limit_error("rate limit") is True + assert is_rate_limit_error("RaTe LiMiT") is True + + def test_non_rate_limit_errors(self): + """Test non-rate-limit error messages.""" + assert is_rate_limit_error("Connection refused") is False + assert is_rate_limit_error("Authentication failed") is False + assert is_rate_limit_error("Invalid API key") is False + assert is_rate_limit_error("Internal server error") is False + assert is_rate_limit_error("Network timeout") is False + assert is_rate_limit_error("") is False + + +class TestExponentialBackoff(unittest.TestCase): + """Test exponential backoff calculations.""" + + def test_backoff_sequence(self): + """Test that backoff follows expected sequence.""" + # Simulating: min(60 * (2 ** retries), 3600) + expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600 + for retries, expected_delay in enumerate(expected): + delay = min(60 * (2 ** retries), 3600) + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + def test_error_backoff_sequence(self): + """Test error backoff follows expected sequence.""" + # Simulating: min(30 * retries, 300) + expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300 + for retries in range(1, len(expected) + 1): + delay = min(30 * retries, 300) + expected_delay = expected[retries - 1] + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + +if __name__ == "__main__": + unittest.main() From ff1a63d1045fe4ca43e296e2b6d4018891be74d1 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Tue, 27 Jan 2026 06:32:07 +0100 Subject: [PATCH 03/24] fix: address CodeRabbit review feedback - Fix comment: "exponential" -> "linear" for error backoff (30 * retries) - Fix rate limit counter reset: only reset when no rate limit signal detected - Apply exponential backoff to rate limit in response text (not just exceptions) - Use explicit `is not None` check for retry_seconds to handle Retry-After: 0 Co-Authored-By: Claude Opus 4.5 --- agent.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/agent.py b/agent.py index 4ccbaa0..3b1bf63 100644 --- a/agent.py +++ b/agent.py @@ -166,7 +166,7 @@ async def run_agent_session( if is_rate_limit_error(error_str): # Try to extract retry-after time from error retry_seconds = parse_retry_after(error_str) - if retry_seconds: + if retry_seconds is not None: return "rate_limit", str(retry_seconds) else: return "rate_limit", "unknown" @@ -316,9 +316,9 @@ async def run_autonomous_agent( # Handle status if status == "continue": - # Reset retry counters on success - rate_limit_retries = 0 + # Reset error retries on success; rate-limit retries reset only if no signal error_retries = 0 + reset_rate_limit_retries = True delay_seconds = AUTO_CONTINUE_DELAY_SECONDS target_time_str = None @@ -327,8 +327,18 @@ async def run_autonomous_agent( response_lower = response.lower() if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS): print("Claude Agent SDK indicated rate limit reached.") + reset_rate_limit_retries = False - # Try to parse reset time from response + # Try to extract retry-after from response text first + retry_seconds = parse_retry_after(response) + if retry_seconds is not None: + delay_seconds = retry_seconds + else: + # Use exponential backoff when retry-after unknown + delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) + rate_limit_retries += 1 + + # Try to parse reset time from response (more specific format) match = re.search( r"(?i)\bresets(?:\s+at)?\s+(\d+)(?::(\d+))?\s*(am|pm)\s*\(([^)]+)\)", response, @@ -396,6 +406,10 @@ async def run_autonomous_agent( print(f"\nSingle-feature mode: Feature #{feature_id} session complete.") break + # Reset rate limit retries only if no rate limit signal was detected + if reset_rate_limit_retries: + rate_limit_retries = 0 + await asyncio.sleep(delay_seconds) elif status == "rate_limit": @@ -412,7 +426,7 @@ async def run_autonomous_agent( await asyncio.sleep(delay_seconds) elif status == "error": - # Non-rate-limit errors: shorter backoff but still exponential + # Non-rate-limit errors: linear backoff capped at 5 minutes error_retries += 1 delay_seconds = min(30 * error_retries, 300) # Max 5 minutes print("\nSession encountered an error") From cf8dec9abf04c2c80800fab5033e78db206c8a3b Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Tue, 27 Jan 2026 06:58:56 +0100 Subject: [PATCH 04/24] fix: address CodeRabbit review - extract rate limit logic to shared module - Create rate_limit_utils.py with shared constants and functions - Update agent.py to import from shared module - Update test_agent.py to import from shared module (removes duplication) Co-Authored-By: Claude Opus 4.5 --- agent.py | 88 ++++++++++++++++++--------------------------- rate_limit_utils.py | 69 +++++++++++++++++++++++++++++++++++ test_agent.py | 53 ++++----------------------- 3 files changed, 110 insertions(+), 100 deletions(-) create mode 100644 rate_limit_utils.py diff --git a/agent.py b/agent.py index 3b1bf63..46f1f34 100644 --- a/agent.py +++ b/agent.py @@ -23,7 +23,13 @@ if sys.platform == "win32": sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True) from client import create_client -from progress import count_passing_tests, has_features, print_progress_summary, print_session_header +from progress import ( + clear_stuck_features, + count_passing_tests, + has_features, + print_progress_summary, + print_session_header, +) from prompts import ( copy_spec_to_project, get_coding_prompt, @@ -31,63 +37,15 @@ from prompts import ( get_single_feature_prompt, get_testing_prompt, ) +from rate_limit_utils import ( + RATE_LIMIT_PATTERNS, + is_rate_limit_error, + parse_retry_after, +) # Configuration AUTO_CONTINUE_DELAY_SECONDS = 3 -# Rate limit detection patterns (used in both exception messages and response text) -RATE_LIMIT_PATTERNS = [ - "limit reached", - "rate limit", - "rate_limit", - "too many requests", - "quota exceeded", - "please wait", - "try again later", - "429", - "overloaded", -] - - -def parse_retry_after(error_message: str) -> Optional[int]: - """ - Extract retry-after seconds from various error message formats. - - Returns seconds to wait, or None if not parseable. - """ - # Common patterns: - # "retry after 60 seconds" - # "Retry-After: 120" - # "try again in 5 seconds" - # "30 seconds remaining" - - patterns = [ - r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", - r"try again in\s+(\d+)\s*(?:seconds?|s\b)", - r"(\d+)\s*seconds?\s*(?:remaining|left|until)", - ] - - for pattern in patterns: - match = re.search(pattern, error_message, re.IGNORECASE) - if match: - return int(match.group(1)) - - return None - - -def is_rate_limit_error(error_message: str) -> bool: - """ - Detect if an error message indicates a rate limit. - - Args: - error_message: The error message to check - - Returns: - True if the error appears to be rate-limit related - """ - error_lower = error_message.lower() - return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) - async def run_agent_session( client: ClaudeSDKClient, @@ -215,6 +173,28 @@ async def run_autonomous_agent( # Create project directory project_dir.mkdir(parents=True, exist_ok=True) + # IMPORTANT: Do NOT clear stuck features in parallel mode! + # The orchestrator manages feature claiming atomically. + # Clearing here causes race conditions where features are marked in_progress + # by the orchestrator but immediately cleared by the agent subprocess on startup. + # + # For single-agent mode or manual runs, clearing is still safe because + # there's only one agent at a time and it happens before claiming any features. + # + # Only clear if we're NOT in a parallel orchestrator context + # (detected by checking if this agent is a subprocess spawned by orchestrator) + import psutil + try: + parent_process = psutil.Process().parent() + parent_name = parent_process.name() if parent_process else "" + + # Only clear if parent is NOT python (i.e., we're running manually, not from orchestrator) + if "python" not in parent_name.lower(): + clear_stuck_features(project_dir) + except Exception: + # If parent process check fails, err on the safe side and clear + clear_stuck_features(project_dir) + # Determine agent type if not explicitly set if agent_type is None: # Auto-detect based on whether we have features diff --git a/rate_limit_utils.py b/rate_limit_utils.py new file mode 100644 index 0000000..6d817f3 --- /dev/null +++ b/rate_limit_utils.py @@ -0,0 +1,69 @@ +""" +Rate Limit Utilities +==================== + +Shared utilities for detecting and handling API rate limits. +Used by both agent.py (production) and test_agent.py (tests). +""" + +import re +from typing import Optional + +# Rate limit detection patterns (used in both exception messages and response text) +RATE_LIMIT_PATTERNS = [ + "limit reached", + "rate limit", + "rate_limit", + "too many requests", + "quota exceeded", + "please wait", + "try again later", + "429", + "overloaded", +] + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Handles common formats: + - "Retry-After: 60" + - "retry after 60 seconds" + - "try again in 5 seconds" + - "30 seconds remaining" + + Args: + error_message: The error message to parse + + Returns: + Seconds to wait, or None if not parseable. + """ + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + + Checks against common rate limit patterns from various API providers. + + Args: + error_message: The error message to check + + Returns: + True if the message indicates a rate limit, False otherwise. + """ + error_lower = error_message.lower() + return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) diff --git a/test_agent.py b/test_agent.py index bac4fd7..2af56d5 100644 --- a/test_agent.py +++ b/test_agent.py @@ -1,56 +1,17 @@ """ -Unit tests for agent.py rate limit handling functions. +Unit tests for rate limit handling functions. Tests the parse_retry_after() and is_rate_limit_error() functions -added for improved rate limit handling (Issue #41). +from rate_limit_utils.py (shared module). """ -import re import unittest -from typing import Optional -# Copy the constants and functions from agent.py for isolated testing -# (Avoids dependency on claude_agent_sdk which may not be installed) - -RATE_LIMIT_PATTERNS = [ - "limit reached", - "rate limit", - "rate_limit", - "too many requests", - "quota exceeded", - "please wait", - "try again later", - "429", - "overloaded", -] - - -def parse_retry_after(error_message: str) -> Optional[int]: - """ - Extract retry-after seconds from various error message formats. - - Returns seconds to wait, or None if not parseable. - """ - patterns = [ - r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", - r"try again in\s+(\d+)\s*(?:seconds?|s\b)", - r"(\d+)\s*seconds?\s*(?:remaining|left|until)", - ] - - for pattern in patterns: - match = re.search(pattern, error_message, re.IGNORECASE) - if match: - return int(match.group(1)) - - return None - - -def is_rate_limit_error(error_message: str) -> bool: - """ - Detect if an error message indicates a rate limit. - """ - error_lower = error_message.lower() - return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) +from rate_limit_utils import ( + RATE_LIMIT_PATTERNS, + is_rate_limit_error, + parse_retry_after, +) class TestParseRetryAfter(unittest.TestCase): From dcf8b99dca31be1f6205d5f7cc1286d8228d2c77 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Tue, 27 Jan 2026 07:26:29 +0100 Subject: [PATCH 05/24] fix: remove unused RATE_LIMIT_PATTERNS import Fixes ruff F401 lint error - the constant was imported but not used in test_agent.py. Co-Authored-By: Claude Opus 4.5 --- test_agent.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_agent.py b/test_agent.py index 2af56d5..f672ecb 100644 --- a/test_agent.py +++ b/test_agent.py @@ -8,7 +8,6 @@ from rate_limit_utils.py (shared module). import unittest from rate_limit_utils import ( - RATE_LIMIT_PATTERNS, is_rate_limit_error, parse_retry_after, ) From f018b4c1d8e67d472ac61be01da632b907509bc1 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Fri, 30 Jan 2026 21:20:52 +0100 Subject: [PATCH 06/24] fix: address PR #109 review feedback from leonvanzyl - BLOCKER: Remove clear_stuck_features import and psutil block (doesn't exist in upstream) - Fix overly broad rate limit patterns to avoid false positives - Remove "please wait", "try again later", "limit reached", "429" (bare) - Convert to regex-based detection with word boundaries - Add patterns for "http 429", "status 429", "error 429" - Add bounds checking (1-3600s) for parsed retry delays - Use is_rate_limit_error() consistently instead of inline pattern matching - Extract backoff functions to rate_limit_utils.py for testability - calculate_rate_limit_backoff() for exponential backoff - calculate_error_backoff() for linear backoff - clamp_retry_delay() for safe range enforcement - Rename test_agent.py to test_rate_limit_utils.py (matches module) - Add comprehensive false-positive tests: - Version numbers (v14.29.0) - Issue/PR numbers (#429) - Line numbers (file.py:429) - Port numbers (4293) - Legitimate wait/retry messages Co-Authored-By: Claude Opus 4.5 --- agent.py | 55 +++++--------- rate_limit_utils.py | 81 ++++++++++++++++---- test_agent.py => test_rate_limit_utils.py | 93 +++++++++++++++++++---- 3 files changed, 164 insertions(+), 65 deletions(-) rename test_agent.py => test_rate_limit_utils.py (51%) diff --git a/agent.py b/agent.py index 46f1f34..265a702 100644 --- a/agent.py +++ b/agent.py @@ -24,7 +24,6 @@ if sys.platform == "win32": from client import create_client from progress import ( - clear_stuck_features, count_passing_tests, has_features, print_progress_summary, @@ -38,7 +37,9 @@ from prompts import ( get_testing_prompt, ) from rate_limit_utils import ( - RATE_LIMIT_PATTERNS, + calculate_error_backoff, + calculate_rate_limit_backoff, + clamp_retry_delay, is_rate_limit_error, parse_retry_after, ) @@ -173,28 +174,6 @@ async def run_autonomous_agent( # Create project directory project_dir.mkdir(parents=True, exist_ok=True) - # IMPORTANT: Do NOT clear stuck features in parallel mode! - # The orchestrator manages feature claiming atomically. - # Clearing here causes race conditions where features are marked in_progress - # by the orchestrator but immediately cleared by the agent subprocess on startup. - # - # For single-agent mode or manual runs, clearing is still safe because - # there's only one agent at a time and it happens before claiming any features. - # - # Only clear if we're NOT in a parallel orchestrator context - # (detected by checking if this agent is a subprocess spawned by orchestrator) - import psutil - try: - parent_process = psutil.Process().parent() - parent_name = parent_process.name() if parent_process else "" - - # Only clear if parent is NOT python (i.e., we're running manually, not from orchestrator) - if "python" not in parent_name.lower(): - clear_stuck_features(project_dir) - except Exception: - # If parent process check fails, err on the safe side and clear - clear_stuck_features(project_dir) - # Determine agent type if not explicitly set if agent_type is None: # Auto-detect based on whether we have features @@ -304,18 +283,17 @@ async def run_autonomous_agent( target_time_str = None # Check for rate limit indicators in response text - response_lower = response.lower() - if any(pattern in response_lower for pattern in RATE_LIMIT_PATTERNS): + if is_rate_limit_error(response): print("Claude Agent SDK indicated rate limit reached.") reset_rate_limit_retries = False # Try to extract retry-after from response text first retry_seconds = parse_retry_after(response) if retry_seconds is not None: - delay_seconds = retry_seconds + delay_seconds = clamp_retry_delay(retry_seconds) else: # Use exponential backoff when retry-after unknown - delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) + delay_seconds = calculate_rate_limit_backoff(rate_limit_retries) rate_limit_retries += 1 # Try to parse reset time from response (more specific format) @@ -347,9 +325,9 @@ async def run_autonomous_agent( target += timedelta(days=1) delta = target - now - delay_seconds = min( + delay_seconds = int(min( delta.total_seconds(), 24 * 60 * 60 - ) # Clamp to 24 hours max + )) # Clamp to 24 hours max target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z") except Exception as e: @@ -395,20 +373,25 @@ async def run_autonomous_agent( elif status == "rate_limit": # Smart rate limit handling with exponential backoff if response != "unknown": - delay_seconds = int(response) - print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") - else: - # Use exponential backoff when retry-after unknown - delay_seconds = min(60 * (2 ** rate_limit_retries), 3600) # Max 1 hour + try: + delay_seconds = clamp_retry_delay(int(response)) + except (ValueError, TypeError): + # Malformed value - fall through to exponential backoff + response = "unknown" + if response == "unknown": + # Use exponential backoff when retry-after unknown or malformed + delay_seconds = calculate_rate_limit_backoff(rate_limit_retries) rate_limit_retries += 1 print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...") + else: + print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") await asyncio.sleep(delay_seconds) elif status == "error": # Non-rate-limit errors: linear backoff capped at 5 minutes error_retries += 1 - delay_seconds = min(30 * error_retries, 300) # Max 5 minutes + delay_seconds = calculate_error_backoff(error_retries) print("\nSession encountered an error") print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...") await asyncio.sleep(delay_seconds) diff --git a/rate_limit_utils.py b/rate_limit_utils.py index 6d817f3..de70b24 100644 --- a/rate_limit_utils.py +++ b/rate_limit_utils.py @@ -3,25 +3,31 @@ Rate Limit Utilities ==================== Shared utilities for detecting and handling API rate limits. -Used by both agent.py (production) and test_agent.py (tests). +Used by both agent.py (production) and test_rate_limit_utils.py (tests). """ import re from typing import Optional -# Rate limit detection patterns (used in both exception messages and response text) -RATE_LIMIT_PATTERNS = [ - "limit reached", - "rate limit", - "rate_limit", - "too many requests", - "quota exceeded", - "please wait", - "try again later", - "429", - "overloaded", +# Regex patterns for rate limit detection (used in both exception messages and response text) +# These patterns use word boundaries to avoid false positives like "PR #429" or "please wait while I..." +RATE_LIMIT_REGEX_PATTERNS = [ + r"\brate[_\s]?limit", # "rate limit", "rate_limit", "ratelimit" + r"\btoo\s+many\s+requests", # "too many requests" + r"\bhttp\s*429\b", # "http 429", "http429" + r"\bstatus\s*429\b", # "status 429", "status429" + r"\berror\s*429\b", # "error 429", "error429" + r"\b429\s+too\s+many", # "429 too many" + r"\boverloaded\b", # "overloaded" + r"\bquota\s*exceeded\b", # "quota exceeded" ] +# Compiled regex for efficient matching +_RATE_LIMIT_REGEX = re.compile( + "|".join(RATE_LIMIT_REGEX_PATTERNS), + re.IGNORECASE +) + def parse_retry_after(error_message: str) -> Optional[int]: """ @@ -57,7 +63,8 @@ def is_rate_limit_error(error_message: str) -> bool: """ Detect if an error message indicates a rate limit. - Checks against common rate limit patterns from various API providers. + Uses regex patterns with word boundaries to avoid false positives + like "PR #429", "please wait while I...", or "Node v14.29.0". Args: error_message: The error message to check @@ -65,5 +72,49 @@ def is_rate_limit_error(error_message: str) -> bool: Returns: True if the message indicates a rate limit, False otherwise. """ - error_lower = error_message.lower() - return any(pattern in error_lower for pattern in RATE_LIMIT_PATTERNS) + return bool(_RATE_LIMIT_REGEX.search(error_message)) + + +def calculate_rate_limit_backoff(retries: int) -> int: + """ + Calculate exponential backoff for rate limits. + + Formula: min(60 * 2^retries, 3600) - caps at 1 hour + Sequence: 60s, 120s, 240s, 480s, 960s, 1920s, 3600s... + + Args: + retries: Number of consecutive rate limit retries (0-indexed) + + Returns: + Delay in seconds (clamped to 1-3600 range) + """ + return int(min(max(60 * (2 ** retries), 1), 3600)) + + +def calculate_error_backoff(retries: int) -> int: + """ + Calculate linear backoff for non-rate-limit errors. + + Formula: min(30 * retries, 300) - caps at 5 minutes + Sequence: 30s, 60s, 90s, 120s, ... 300s + + Args: + retries: Number of consecutive error retries (1-indexed) + + Returns: + Delay in seconds (clamped to 1-300 range) + """ + return min(max(30 * retries, 1), 300) + + +def clamp_retry_delay(delay_seconds: int) -> int: + """ + Clamp a retry delay to a safe range (1-3600 seconds). + + Args: + delay_seconds: The raw delay value + + Returns: + Delay clamped to 1-3600 seconds + """ + return min(max(delay_seconds, 1), 3600) diff --git a/test_agent.py b/test_rate_limit_utils.py similarity index 51% rename from test_agent.py rename to test_rate_limit_utils.py index f672ecb..eb1f01c 100644 --- a/test_agent.py +++ b/test_rate_limit_utils.py @@ -1,13 +1,16 @@ """ Unit tests for rate limit handling functions. -Tests the parse_retry_after() and is_rate_limit_error() functions -from rate_limit_utils.py (shared module). +Tests the parse_retry_after(), is_rate_limit_error(), and backoff calculation +functions from rate_limit_utils.py (shared module). """ import unittest from rate_limit_utils import ( + calculate_error_backoff, + calculate_rate_limit_backoff, + clamp_retry_delay, is_rate_limit_error, parse_retry_after, ) @@ -64,10 +67,15 @@ class TestIsRateLimitError(unittest.TestCase): assert is_rate_limit_error("Too many requests") is True assert is_rate_limit_error("HTTP 429 Too Many Requests") is True assert is_rate_limit_error("API quota exceeded") is True - assert is_rate_limit_error("Please wait before retrying") is True - assert is_rate_limit_error("Try again later") is True assert is_rate_limit_error("Server is overloaded") is True - assert is_rate_limit_error("Usage limit reached") is True + + def test_specific_429_patterns(self): + """Test that 429 is detected with proper context.""" + assert is_rate_limit_error("http 429") is True + assert is_rate_limit_error("HTTP429") is True + assert is_rate_limit_error("status 429") is True + assert is_rate_limit_error("error 429") is True + assert is_rate_limit_error("429 too many requests") is True def test_case_insensitive(self): """Test that detection is case-insensitive.""" @@ -86,26 +94,83 @@ class TestIsRateLimitError(unittest.TestCase): assert is_rate_limit_error("") is False -class TestExponentialBackoff(unittest.TestCase): - """Test exponential backoff calculations.""" +class TestFalsePositives(unittest.TestCase): + """Verify non-rate-limit messages don't trigger detection.""" - def test_backoff_sequence(self): - """Test that backoff follows expected sequence.""" - # Simulating: min(60 * (2 ** retries), 3600) + def test_version_numbers_with_429(self): + """Version numbers should not trigger.""" + assert is_rate_limit_error("Node v14.29.0") is False + assert is_rate_limit_error("Python 3.12.429") is False + assert is_rate_limit_error("Version 2.429 released") is False + + def test_issue_and_pr_numbers(self): + """Issue/PR numbers should not trigger.""" + assert is_rate_limit_error("See PR #429") is False + assert is_rate_limit_error("Fixed in issue 429") is False + assert is_rate_limit_error("Closes #429") is False + + def test_line_numbers(self): + """Line numbers in errors should not trigger.""" + assert is_rate_limit_error("Error at line 429") is False + assert is_rate_limit_error("See file.py:429") is False + + def test_port_numbers(self): + """Port numbers should not trigger.""" + assert is_rate_limit_error("port 4293") is False + assert is_rate_limit_error("localhost:4290") is False + + def test_legitimate_wait_messages(self): + """Legitimate wait instructions should not trigger.""" + # These would fail if "please wait" pattern still exists + assert is_rate_limit_error("Please wait for the build to complete") is False + assert is_rate_limit_error("Please wait while I analyze this") is False + + def test_retry_discussion_messages(self): + """Messages discussing retry logic should not trigger.""" + # These would fail if "try again later" pattern still exists + assert is_rate_limit_error("Try again later after maintenance") is False + assert is_rate_limit_error("The user should try again later") is False + + def test_limit_discussion_messages(self): + """Messages discussing limits should not trigger (removed pattern).""" + # These would fail if "limit reached" pattern still exists + assert is_rate_limit_error("File size limit reached") is False + assert is_rate_limit_error("Memory limit reached, consider optimization") is False + + +class TestBackoffFunctions(unittest.TestCase): + """Test backoff calculation functions from rate_limit_utils.""" + + def test_rate_limit_backoff_sequence(self): + """Test that rate limit backoff follows expected exponential sequence.""" expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600 for retries, expected_delay in enumerate(expected): - delay = min(60 * (2 ** retries), 3600) + delay = calculate_rate_limit_backoff(retries) assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" def test_error_backoff_sequence(self): - """Test error backoff follows expected sequence.""" - # Simulating: min(30 * retries, 300) + """Test that error backoff follows expected linear sequence.""" expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300 for retries in range(1, len(expected) + 1): - delay = min(30 * retries, 300) + delay = calculate_error_backoff(retries) expected_delay = expected[retries - 1] assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + def test_clamp_retry_delay(self): + """Test that retry delay is clamped to valid range.""" + # Values within range stay the same + assert clamp_retry_delay(60) == 60 + assert clamp_retry_delay(1800) == 1800 + assert clamp_retry_delay(3600) == 3600 + + # Values below minimum get clamped to 1 + assert clamp_retry_delay(0) == 1 + assert clamp_retry_delay(-10) == 1 + + # Values above maximum get clamped to 3600 + assert clamp_retry_delay(7200) == 3600 + assert clamp_retry_delay(86400) == 3600 + if __name__ == "__main__": unittest.main() From 88c695259f543a7067ff6be6c3116e9674d7b346 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Fri, 30 Jan 2026 21:41:01 +0100 Subject: [PATCH 07/24] fix: address 3 new CodeRabbit review comments 1. agent.py: Reset opposite retry counter when entering rate_limit or error status to prevent mixed events from inflating delays 2. rate_limit_utils.py: Fix parse_retry_after() regex to reject minute/hour units - patterns now require explicit "seconds"/"s" unit or end of string 3. test_rate_limit_utils.py: Add tests for "retry after 5 minutes" and other minute/hour variants to ensure they return None Co-Authored-By: Claude Opus 4.5 --- agent.py | 4 ++++ rate_limit_utils.py | 8 ++++++-- test_rate_limit_utils.py | 4 ++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/agent.py b/agent.py index 265a702..6688bcf 100644 --- a/agent.py +++ b/agent.py @@ -372,6 +372,8 @@ async def run_autonomous_agent( elif status == "rate_limit": # Smart rate limit handling with exponential backoff + # Reset error counter so mixed events don't inflate delays + error_retries = 0 if response != "unknown": try: delay_seconds = clamp_retry_delay(int(response)) @@ -390,6 +392,8 @@ async def run_autonomous_agent( elif status == "error": # Non-rate-limit errors: linear backoff capped at 5 minutes + # Reset rate limit counter so mixed events don't inflate delays + rate_limit_retries = 0 error_retries += 1 delay_seconds = calculate_error_backoff(error_retries) print("\nSession encountered an error") diff --git a/rate_limit_utils.py b/rate_limit_utils.py index de70b24..9bdbb22 100644 --- a/rate_limit_utils.py +++ b/rate_limit_utils.py @@ -45,9 +45,13 @@ def parse_retry_after(error_message: str) -> Optional[int]: Returns: Seconds to wait, or None if not parseable. """ + # Patterns require explicit "seconds" or "s" unit, OR no unit at all (end of string/sentence) + # This prevents matching "30 minutes" or "1 hour" since those have non-seconds units patterns = [ - r"retry.?after[:\s]+(\d+)\s*(?:seconds?)?", - r"try again in\s+(\d+)\s*(?:seconds?|s\b)", + r"retry.?after[:\s]+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit + r"retry.?after[:\s]+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit + r"try again in\s+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence r"(\d+)\s*seconds?\s*(?:remaining|left|until)", ] diff --git a/test_rate_limit_utils.py b/test_rate_limit_utils.py index eb1f01c..55ecaa7 100644 --- a/test_rate_limit_utils.py +++ b/test_rate_limit_utils.py @@ -53,8 +53,12 @@ class TestParseRetryAfter(unittest.TestCase): def test_minutes_not_supported(self): """Test that minutes are not parsed (by design).""" # We only support seconds to avoid complexity + # These patterns should NOT match when followed by minute/hour units assert parse_retry_after("wait 5 minutes") is None assert parse_retry_after("try again in 2 minutes") is None + assert parse_retry_after("retry after 5 minutes") is None + assert parse_retry_after("retry after 1 hour") is None + assert parse_retry_after("try again in 30 min") is None class TestIsRateLimitError(unittest.TestCase): From 89f6721cfaff036013d02c81fd8947e9bb8bd863 Mon Sep 17 00:00:00 2001 From: cabana8471 Date: Fri, 30 Jan 2026 21:47:11 +0100 Subject: [PATCH 08/24] fix: use clamp_retry_delay() for reset-time delays Use the shared clamp_retry_delay() function (1-hour cap) for parsed reset-time delays instead of a separate 24-hour cap. This aligns with the PR's consistent 1-hour maximum delay objective. Co-Authored-By: Claude Opus 4.5 --- agent.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/agent.py b/agent.py index 6688bcf..9bd2c3b 100644 --- a/agent.py +++ b/agent.py @@ -325,9 +325,7 @@ async def run_autonomous_agent( target += timedelta(days=1) delta = target - now - delay_seconds = int(min( - delta.total_seconds(), 24 * 60 * 60 - )) # Clamp to 24 hours max + delay_seconds = clamp_retry_delay(int(delta.total_seconds())) target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z") except Exception as e: From 338622b734b194f5ef81326437621afdd72dbe59 Mon Sep 17 00:00:00 2001 From: nogataka Date: Sat, 31 Jan 2026 12:57:45 +0900 Subject: [PATCH 09/24] fix: chat screen layout broken in new project creation flow Root cause: NewProjectModal was rendered inside ProjectSelector (in header), causing the fixed inset-0 container to be constrained by the dropdown DOM tree. Changes: - NewProjectModal.tsx: Use createPortal to render chat screen at document.body level - SpecCreationChat.tsx: Change h-full to h-screen for explicit viewport height - SpecCreationChat.tsx: Add min-h-0 to messages area for proper flexbox scrolling This fixes the chat screen not displaying full-screen when creating a new project with Claude. --- ui/src/components/NewProjectModal.tsx | 10 ++++++---- ui/src/components/SpecCreationChat.tsx | 9 ++++----- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/ui/src/components/NewProjectModal.tsx b/ui/src/components/NewProjectModal.tsx index 38e567f..4b46023 100644 --- a/ui/src/components/NewProjectModal.tsx +++ b/ui/src/components/NewProjectModal.tsx @@ -10,6 +10,7 @@ */ import { useState } from 'react' +import { createPortal } from 'react-dom' import { Bot, FileEdit, ArrowRight, ArrowLeft, Loader2, CheckCircle2, Folder } from 'lucide-react' import { useCreateProject } from '../hooks/useProjects' import { SpecCreationChat } from './SpecCreationChat' @@ -200,10 +201,10 @@ export function NewProjectModal({ } } - // Full-screen chat view + // Full-screen chat view - use portal to render at body level if (step === 'chat') { - return ( -
+ return createPortal( +
-
+
, + document.body ) } diff --git a/ui/src/components/SpecCreationChat.tsx b/ui/src/components/SpecCreationChat.tsx index c96a1f2..184b26e 100644 --- a/ui/src/components/SpecCreationChat.tsx +++ b/ui/src/components/SpecCreationChat.tsx @@ -228,7 +228,7 @@ export function SpecCreationChat({ } return ( -
+
{/* Header */}
@@ -303,7 +303,7 @@ export function SpecCreationChat({ )} {/* Messages area */} -
+
{messages.length === 0 && !isLoading && (
@@ -451,9 +451,8 @@ export function SpecCreationChat({ {/* Completion footer */} {isComplete && ( -
+
{initializerStatus === 'starting' ? ( From a050fd1543d0910922fbf22f94a3dc063b504be8 Mon Sep 17 00:00:00 2001 From: nogataka Date: Sat, 31 Jan 2026 16:33:28 +0900 Subject: [PATCH 10/24] fix: auto-start agent after spec creation from empty Kanban When creating a spec from an empty Kanban board (via "Create Spec" button), the agent was not automatically starting after clicking "Continue to Project". Root cause: The SpecCreationChat component in App.tsx had an onComplete handler that only closed the chat and refreshed queries, but did not call startAgent(). This was different from the NewProjectModal flow which correctly started the agent. Changes: - Add startAgent import to App.tsx - Update onComplete handler to call startAgent() with yoloMode and maxConcurrency Co-Authored-By: Claude Opus 4.5 --- ui/src/App.tsx | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ui/src/App.tsx b/ui/src/App.tsx index 6c8fa00..a78a4aa 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -28,7 +28,7 @@ import { KeyboardShortcutsHelp } from './components/KeyboardShortcutsHelp' import { ThemeSelector } from './components/ThemeSelector' import { ResetProjectModal } from './components/ResetProjectModal' import { ProjectSetupRequired } from './components/ProjectSetupRequired' -import { getDependencyGraph } from './lib/api' +import { getDependencyGraph, startAgent } from './lib/api' import { Loader2, Settings, Moon, Sun, RotateCcw } from 'lucide-react' import type { Feature } from './lib/types' import { Button } from '@/components/ui/button' @@ -495,7 +495,16 @@ function App() {
{ + onComplete={async (_specPath, yoloMode) => { + // Auto-start the agent after spec creation (same as NewProjectModal) + try { + await startAgent(selectedProject, { + yoloMode: yoloMode ?? false, + maxConcurrency: 3, + }) + } catch (err) { + console.error('Failed to start agent:', err) + } setShowSpecChat(false) // Refresh projects to update has_spec queryClient.invalidateQueries({ queryKey: ['projects'] }) From f2eb468c464ef929dc7990211b832615b30be750 Mon Sep 17 00:00:00 2001 From: chrislangston Date: Sat, 31 Jan 2026 10:51:04 -0500 Subject: [PATCH 11/24] fix: add env-provided DEFAULT_MODEL to VALID_MODELS for validation consistency When ANTHROPIC_DEFAULT_OPUS_MODEL env var is set to a custom model ID, that model was not present in VALID_MODELS (derived from AVAILABLE_MODELS), causing potential validation failures in server/schemas.py validators. This fix dynamically appends the env-provided DEFAULT_MODEL to VALID_MODELS when set, ensuring validators accept the runtime default. The merge is idempotent (only adds if missing) and doesn't alter AVAILABLE_MODELS semantics. Addresses CodeRabbit review feedback on PR #147. --- registry.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/registry.py b/registry.py index 2e67f3c..f53e8bd 100644 --- a/registry.py +++ b/registry.py @@ -41,6 +41,11 @@ VALID_MODELS = [m["id"] for m in AVAILABLE_MODELS] # Default model and settings # Respect ANTHROPIC_DEFAULT_OPUS_MODEL env var for Foundry/custom deployments DEFAULT_MODEL = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101") + +# Ensure env-provided DEFAULT_MODEL is in VALID_MODELS for validation consistency +# (idempotent: only adds if missing, doesn't alter AVAILABLE_MODELS semantics) +if DEFAULT_MODEL not in VALID_MODELS: + VALID_MODELS.append(DEFAULT_MODEL) DEFAULT_YOLO_MODE = False # SQLite connection settings From 4336252b30edb2625199232ed64424f01e3f9ece Mon Sep 17 00:00:00 2001 From: chrislangston Date: Sat, 31 Jan 2026 11:22:19 -0500 Subject: [PATCH 12/24] fix: guard against empty/whitespace ANTHROPIC_DEFAULT_OPUS_MODEL values Trim the env var value and fall back to the default model when the trimmed result is empty. This prevents invalid empty strings from being appended to VALID_MODELS. Addresses CodeRabbit review feedback on PR #147. --- registry.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/registry.py b/registry.py index f53e8bd..cd52c7b 100644 --- a/registry.py +++ b/registry.py @@ -40,11 +40,15 @@ VALID_MODELS = [m["id"] for m in AVAILABLE_MODELS] # Default model and settings # Respect ANTHROPIC_DEFAULT_OPUS_MODEL env var for Foundry/custom deployments -DEFAULT_MODEL = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL", "claude-opus-4-5-20251101") +# Guard against empty/whitespace values by trimming and falling back when blank +_env_default_model = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL") +if _env_default_model is not None: + _env_default_model = _env_default_model.strip() +DEFAULT_MODEL = _env_default_model or "claude-opus-4-5-20251101" # Ensure env-provided DEFAULT_MODEL is in VALID_MODELS for validation consistency # (idempotent: only adds if missing, doesn't alter AVAILABLE_MODELS semantics) -if DEFAULT_MODEL not in VALID_MODELS: +if DEFAULT_MODEL and DEFAULT_MODEL not in VALID_MODELS: VALID_MODELS.append(DEFAULT_MODEL) DEFAULT_YOLO_MODE = False From 6609a0f7d6b6ab910f38cf27816b16f39890256b Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 09:15:24 +0200 Subject: [PATCH 13/24] fix: prevent PendingRollbackError and add MCP tool support for sessions - Add explicit session.rollback() in exception handlers for database context managers in features.py, schedules.py, and database.py get_db() to prevent SQLAlchemy PendingRollbackError on failed transactions - Add EXPAND_FEATURE_TOOLS to expand session security settings allow list so the expand skill can use the MCP tools it references - Update assistant session prompt to direct the LLM to call MCP tools directly for feature creation instead of suggesting CLI commands Cherry-picked fixes from PR #92 (closed) with cleaner implementation. Co-Authored-By: Claude Opus 4.5 --- api/database.py | 3 +++ server/routers/features.py | 3 +++ server/routers/schedules.py | 3 +++ server/services/assistant_chat_session.py | 18 ++++++++++++------ server/services/expand_chat_session.py | 1 + 5 files changed, 22 insertions(+), 6 deletions(-) diff --git a/api/database.py b/api/database.py index 90dc49a..6dd4676 100644 --- a/api/database.py +++ b/api/database.py @@ -429,5 +429,8 @@ def get_db() -> Session: db = _session_maker() try: yield db + except Exception: + db.rollback() + raise finally: db.close() diff --git a/server/routers/features.py b/server/routers/features.py index c4c9c27..a0e1664 100644 --- a/server/routers/features.py +++ b/server/routers/features.py @@ -71,6 +71,9 @@ def get_db_session(project_dir: Path): session = SessionLocal() try: yield session + except Exception: + session.rollback() + raise finally: session.close() diff --git a/server/routers/schedules.py b/server/routers/schedules.py index 2a11ba3..b97ecc8 100644 --- a/server/routers/schedules.py +++ b/server/routers/schedules.py @@ -84,6 +84,9 @@ def _get_db_session(project_name: str) -> Generator[Tuple[Session, Path], None, db = SessionLocal() try: yield db, project_path + except Exception: + db.rollback() + raise finally: db.close() diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py index f15eee8..1fb26e1 100755 --- a/server/services/assistant_chat_session.py +++ b/server/services/assistant_chat_session.py @@ -90,6 +90,8 @@ def get_system_prompt(project_name: str, project_dir: Path) -> str: Your role is to help users understand the codebase, answer questions about features, and manage the project backlog. You can READ files and CREATE/MANAGE features, but you cannot modify source code. +You have MCP tools available for feature management. Use them directly by calling the tool -- do not suggest CLI commands, bash commands, or curl commands to the user. You can create features yourself using the feature_create and feature_create_bulk tools. + ## What You CAN Do **Codebase Analysis (Read-Only):** @@ -134,17 +136,21 @@ If the user asks you to modify code, explain that you're a project assistant and ## Creating Features -When a user asks to add a feature, gather the following information: -1. **Category**: A grouping like "Authentication", "API", "UI", "Database" -2. **Name**: A concise, descriptive name -3. **Description**: What the feature should do -4. **Steps**: How to verify/implement the feature (as a list) +When a user asks to add a feature, use the `feature_create` or `feature_create_bulk` MCP tools directly: + +For a **single feature**, call `feature_create` with: +- category: A grouping like "Authentication", "API", "UI", "Database" +- name: A concise, descriptive name +- description: What the feature should do +- steps: List of verification/implementation steps + +For **multiple features**, call `feature_create_bulk` with an array of feature objects. You can ask clarifying questions if the user's request is vague, or make reasonable assumptions for simple requests. **Example interaction:** User: "Add a feature for S3 sync" -You: I'll create that feature. Let me add it to the backlog... +You: I'll create that feature now. [calls feature_create with appropriate parameters] You: Done! I've added "S3 Sync Integration" to your backlog. It's now visible on the kanban board. diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py index 58dd50d..6829372 100644 --- a/server/services/expand_chat_session.py +++ b/server/services/expand_chat_session.py @@ -162,6 +162,7 @@ class ExpandChatSession: "allow": [ "Read(./**)", "Glob(./**)", + *EXPAND_FEATURE_TOOLS, ], }, } From d8a8c83447f28da5be4cb04b438cd97d4af733d7 Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 09:45:20 +0200 Subject: [PATCH 14/24] fix: prevent SQLite corruption in parallel mode with atomic operations Replace ineffective threading.Lock() with atomic SQL operations for cross-process safety. Key changes: - Add SQLAlchemy event hooks (do_connect/do_begin) for BEGIN IMMEDIATE transactions in api/database.py - Add atomic_transaction() context manager for multi-statement ops - Convert all feature MCP write operations to atomic UPDATE...WHERE with compare-and-swap patterns (feature_claim, mark_passing, etc.) - Add WHERE passes=0 state guard to feature_mark_passing - Add WAL checkpoint on shutdown and idempotent cleanup() in parallel_orchestrator.py with async-safe signal handling - Wrap SQLite connections with contextlib.closing() in progress.py - Add thread-safe engine cache with double-checked locking in assistant_database.py - Migrate to SQLAlchemy 2.0 DeclarativeBase across all modules Inspired by PR #108 (cabana8471-arch), with fixes for nested BEGIN EXCLUSIVE bug and missing state guards. Closes #106 Co-Authored-By: Claude Opus 4.5 --- api/database.py | 125 +++++++-- mcp_server/feature_mcp.py | 362 ++++++++++++++------------ parallel_orchestrator.py | 105 +++++++- progress.py | 102 ++++---- registry.py | 7 +- server/services/assistant_database.py | 43 ++- 6 files changed, 489 insertions(+), 255 deletions(-) diff --git a/api/database.py b/api/database.py index 6dd4676..2a732fe 100644 --- a/api/database.py +++ b/api/database.py @@ -8,7 +8,7 @@ SQLite database schema for feature storage using SQLAlchemy. import sys from datetime import datetime, timezone from pathlib import Path -from typing import Optional +from typing import Generator, Optional def _utc_now() -> datetime: @@ -26,13 +26,16 @@ from sqlalchemy import ( String, Text, create_engine, + event, text, ) -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import Session, relationship, sessionmaker +from sqlalchemy.orm import DeclarativeBase, Session, relationship, sessionmaker from sqlalchemy.types import JSON -Base = declarative_base() + +class Base(DeclarativeBase): + """SQLAlchemy 2.0 style declarative base.""" + pass class Feature(Base): @@ -307,11 +310,11 @@ def _migrate_add_schedules_tables(engine) -> None: # Create schedules table if missing if "schedules" not in existing_tables: - Schedule.__table__.create(bind=engine) + Schedule.__table__.create(bind=engine) # type: ignore[attr-defined] # Create schedule_overrides table if missing if "schedule_overrides" not in existing_tables: - ScheduleOverride.__table__.create(bind=engine) + ScheduleOverride.__table__.create(bind=engine) # type: ignore[attr-defined] # Add crash_count column if missing (for upgrades) if "schedules" in existing_tables: @@ -332,6 +335,35 @@ def _migrate_add_schedules_tables(engine) -> None: conn.commit() +def _configure_sqlite_immediate_transactions(engine) -> None: + """Configure engine for IMMEDIATE transactions via event hooks. + + Per SQLAlchemy docs: https://docs.sqlalchemy.org/en/20/dialects/sqlite.html + + This replaces fragile pysqlite implicit transaction handling with explicit + BEGIN IMMEDIATE at transaction start. Benefits: + - Acquires write lock immediately, preventing stale reads + - Works correctly regardless of prior ORM operations + - Future-proof: won't break when pysqlite legacy mode is removed in Python 3.16 + """ + @event.listens_for(engine, "connect") + def do_connect(dbapi_connection, connection_record): + # Disable pysqlite's implicit transaction handling + dbapi_connection.isolation_level = None + + # Set busy_timeout on raw connection before any transactions + cursor = dbapi_connection.cursor() + try: + cursor.execute("PRAGMA busy_timeout=30000") + finally: + cursor.close() + + @event.listens_for(engine, "begin") + def do_begin(conn): + # Use IMMEDIATE for all transactions to prevent stale reads + conn.exec_driver_sql("BEGIN IMMEDIATE") + + def create_database(project_dir: Path) -> tuple: """ Create database and return engine + session maker. @@ -351,21 +383,37 @@ def create_database(project_dir: Path) -> tuple: return _engine_cache[cache_key] db_url = get_database_url(project_dir) - engine = create_engine(db_url, connect_args={ - "check_same_thread": False, - "timeout": 30 # Wait up to 30s for locks - }) - Base.metadata.create_all(bind=engine) # Choose journal mode based on filesystem type # WAL mode doesn't work reliably on network filesystems and can cause corruption is_network = _is_network_path(project_dir) journal_mode = "DELETE" if is_network else "WAL" + engine = create_engine(db_url, connect_args={ + "check_same_thread": False, + "timeout": 30 # Wait up to 30s for locks + }) + + # Set journal mode BEFORE configuring event hooks + # PRAGMA journal_mode must run outside of a transaction, and our event hooks + # start a transaction with BEGIN IMMEDIATE on every operation with engine.connect() as conn: - conn.execute(text(f"PRAGMA journal_mode={journal_mode}")) - conn.execute(text("PRAGMA busy_timeout=30000")) - conn.commit() + # Get raw DBAPI connection to execute PRAGMA outside transaction + raw_conn = conn.connection.dbapi_connection + if raw_conn is None: + raise RuntimeError("Failed to get raw DBAPI connection") + cursor = raw_conn.cursor() + try: + cursor.execute(f"PRAGMA journal_mode={journal_mode}") + cursor.execute("PRAGMA busy_timeout=30000") + finally: + cursor.close() + + # Configure IMMEDIATE transactions via event hooks AFTER setting PRAGMAs + # This must happen before create_all() and migrations run + _configure_sqlite_immediate_transactions(engine) + + Base.metadata.create_all(bind=engine) # Migrate existing databases _migrate_add_in_progress_column(engine) @@ -417,7 +465,7 @@ def set_session_maker(session_maker: sessionmaker) -> None: _session_maker = session_maker -def get_db() -> Session: +def get_db() -> Generator[Session, None, None]: """ Dependency for FastAPI to get database session. @@ -434,3 +482,50 @@ def get_db() -> Session: raise finally: db.close() + + +# ============================================================================= +# Atomic Transaction Helpers for Parallel Mode +# ============================================================================= +# These helpers prevent database corruption when multiple processes access the +# same SQLite database concurrently. They use IMMEDIATE transactions which +# acquire write locks at the start (preventing stale reads) and atomic +# UPDATE ... WHERE clauses (preventing check-then-modify races). + + +from contextlib import contextmanager + + +@contextmanager +def atomic_transaction(session_maker): + """Context manager for atomic SQLite transactions. + + Acquires a write lock immediately via BEGIN IMMEDIATE (configured by + engine event hooks), preventing stale reads in read-modify-write patterns. + This is essential for preventing race conditions in parallel mode. + + Args: + session_maker: SQLAlchemy sessionmaker + + Yields: + SQLAlchemy session with automatic commit/rollback + + Example: + with atomic_transaction(session_maker) as session: + # All reads in this block are protected by write lock + feature = session.query(Feature).filter(...).first() + feature.priority = new_priority + # Commit happens automatically on exit + """ + session = session_maker() + try: + yield session + session.commit() + except Exception: + try: + session.rollback() + except Exception: + pass # Don't let rollback failure mask original error + raise + finally: + session.close() diff --git a/mcp_server/feature_mcp.py b/mcp_server/feature_mcp.py index a394f1e..a7f2691 100755 --- a/mcp_server/feature_mcp.py +++ b/mcp_server/feature_mcp.py @@ -30,18 +30,18 @@ orchestrator, not by agents. Agents receive pre-assigned feature IDs. import json import os import sys -import threading from contextlib import asynccontextmanager from pathlib import Path from typing import Annotated from mcp.server.fastmcp import FastMCP from pydantic import BaseModel, Field +from sqlalchemy import text # Add parent directory to path so we can import from api module sys.path.insert(0, str(Path(__file__).parent.parent)) -from api.database import Feature, create_database +from api.database import Feature, atomic_transaction, create_database from api.dependency_resolver import ( MAX_DEPENDENCIES_PER_FEATURE, compute_scheduling_scores, @@ -96,8 +96,9 @@ class BulkCreateInput(BaseModel): _session_maker = None _engine = None -# Lock for priority assignment to prevent race conditions -_priority_lock = threading.Lock() +# NOTE: The old threading.Lock() was removed because it only worked per-process, +# not cross-process. In parallel mode, multiple MCP servers run in separate +# processes, so the lock was useless. We now use atomic SQL operations instead. @asynccontextmanager @@ -243,15 +244,25 @@ def feature_mark_passing( """ session = get_session() try: - feature = session.query(Feature).filter(Feature.id == feature_id).first() - - if feature is None: - return json.dumps({"error": f"Feature with ID {feature_id} not found"}) - - feature.passes = True - feature.in_progress = False + # Atomic update with state guard - prevents double-pass in parallel mode + result = session.execute(text(""" + UPDATE features + SET passes = 1, in_progress = 0 + WHERE id = :id AND passes = 0 + """), {"id": feature_id}) session.commit() + if result.rowcount == 0: + # Check why the update didn't match + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if feature is None: + return json.dumps({"error": f"Feature with ID {feature_id} not found"}) + if feature.passes: + return json.dumps({"error": f"Feature with ID {feature_id} is already passing"}) + return json.dumps({"error": "Failed to mark feature passing for unknown reason"}) + + # Get the feature name for the response + feature = session.query(Feature).filter(Feature.id == feature_id).first() return json.dumps({"success": True, "feature_id": feature_id, "name": feature.name}) except Exception as e: session.rollback() @@ -284,14 +295,20 @@ def feature_mark_failing( """ session = get_session() try: + # Check if feature exists first feature = session.query(Feature).filter(Feature.id == feature_id).first() - if feature is None: return json.dumps({"error": f"Feature with ID {feature_id} not found"}) - feature.passes = False - feature.in_progress = False + # Atomic update for parallel safety + session.execute(text(""" + UPDATE features + SET passes = 0, in_progress = 0 + WHERE id = :id + """), {"id": feature_id}) session.commit() + + # Refresh to get updated state session.refresh(feature) return json.dumps({ @@ -337,25 +354,28 @@ def feature_skip( return json.dumps({"error": "Cannot skip a feature that is already passing"}) old_priority = feature.priority + name = feature.name - # Use lock to prevent race condition in priority assignment - with _priority_lock: - # Get max priority and set this feature to max + 1 - max_priority_result = session.query(Feature.priority).order_by(Feature.priority.desc()).first() - new_priority = (max_priority_result[0] + 1) if max_priority_result else 1 - - feature.priority = new_priority - feature.in_progress = False - session.commit() + # Atomic update: set priority to max+1 in a single statement + # This prevents race conditions where two features get the same priority + session.execute(text(""" + UPDATE features + SET priority = (SELECT COALESCE(MAX(priority), 0) + 1 FROM features), + in_progress = 0 + WHERE id = :id + """), {"id": feature_id}) + session.commit() + # Refresh to get new priority session.refresh(feature) + new_priority = feature.priority return json.dumps({ - "id": feature.id, - "name": feature.name, + "id": feature_id, + "name": name, "old_priority": old_priority, "new_priority": new_priority, - "message": f"Feature '{feature.name}' moved to end of queue" + "message": f"Feature '{name}' moved to end of queue" }) except Exception as e: session.rollback() @@ -381,21 +401,27 @@ def feature_mark_in_progress( """ session = get_session() try: - feature = session.query(Feature).filter(Feature.id == feature_id).first() - - if feature is None: - return json.dumps({"error": f"Feature with ID {feature_id} not found"}) - - if feature.passes: - return json.dumps({"error": f"Feature with ID {feature_id} is already passing"}) - - if feature.in_progress: - return json.dumps({"error": f"Feature with ID {feature_id} is already in-progress"}) - - feature.in_progress = True + # Atomic claim: only succeeds if feature is not already claimed or passing + result = session.execute(text(""" + UPDATE features + SET in_progress = 1 + WHERE id = :id AND passes = 0 AND in_progress = 0 + """), {"id": feature_id}) session.commit() - session.refresh(feature) + if result.rowcount == 0: + # Check why the claim failed + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if feature is None: + return json.dumps({"error": f"Feature with ID {feature_id} not found"}) + if feature.passes: + return json.dumps({"error": f"Feature with ID {feature_id} is already passing"}) + if feature.in_progress: + return json.dumps({"error": f"Feature with ID {feature_id} is already in-progress"}) + return json.dumps({"error": "Failed to mark feature in-progress for unknown reason"}) + + # Fetch the claimed feature + feature = session.query(Feature).filter(Feature.id == feature_id).first() return json.dumps(feature.to_dict()) except Exception as e: session.rollback() @@ -421,24 +447,35 @@ def feature_claim_and_get( """ session = get_session() try: + # First check if feature exists feature = session.query(Feature).filter(Feature.id == feature_id).first() - if feature is None: return json.dumps({"error": f"Feature with ID {feature_id} not found"}) if feature.passes: return json.dumps({"error": f"Feature with ID {feature_id} is already passing"}) - # Idempotent: if already in-progress, just return details - already_claimed = feature.in_progress - if not already_claimed: - feature.in_progress = True - session.commit() - session.refresh(feature) + # Try atomic claim: only succeeds if not already claimed + result = session.execute(text(""" + UPDATE features + SET in_progress = 1 + WHERE id = :id AND passes = 0 AND in_progress = 0 + """), {"id": feature_id}) + session.commit() - result = feature.to_dict() - result["already_claimed"] = already_claimed - return json.dumps(result) + # Determine if we claimed it or it was already claimed + already_claimed = result.rowcount == 0 + if already_claimed: + # Verify it's in_progress (not some other failure condition) + session.refresh(feature) + if not feature.in_progress: + return json.dumps({"error": f"Failed to claim feature {feature_id} for unknown reason"}) + + # Refresh to get current state + session.refresh(feature) + result_dict = feature.to_dict() + result_dict["already_claimed"] = already_claimed + return json.dumps(result_dict) except Exception as e: session.rollback() return json.dumps({"error": f"Failed to claim feature: {str(e)}"}) @@ -463,15 +500,20 @@ def feature_clear_in_progress( """ session = get_session() try: + # Check if feature exists feature = session.query(Feature).filter(Feature.id == feature_id).first() - if feature is None: return json.dumps({"error": f"Feature with ID {feature_id} not found"}) - feature.in_progress = False + # Atomic update - idempotent, safe in parallel mode + session.execute(text(""" + UPDATE features + SET in_progress = 0 + WHERE id = :id + """), {"id": feature_id}) session.commit() - session.refresh(feature) + session.refresh(feature) return json.dumps(feature.to_dict()) except Exception as e: session.rollback() @@ -506,13 +548,14 @@ def feature_create_bulk( Returns: JSON with: created (int) - number of features created, with_dependencies (int) """ - session = get_session() try: - # Use lock to prevent race condition in priority assignment - with _priority_lock: - # Get the starting priority - max_priority_result = session.query(Feature.priority).order_by(Feature.priority.desc()).first() - start_priority = (max_priority_result[0] + 1) if max_priority_result else 1 + # Use atomic transaction for bulk inserts to prevent priority conflicts + with atomic_transaction(_session_maker) as session: + # Get the starting priority atomically within the transaction + result = session.execute(text(""" + SELECT COALESCE(MAX(priority), 0) FROM features + """)).fetchone() + start_priority = (result[0] or 0) + 1 # First pass: validate all features and their index-based dependencies for i, feature_data in enumerate(features): @@ -546,7 +589,7 @@ def feature_create_bulk( "error": f"Feature at index {i} cannot depend on feature at index {idx} (forward reference not allowed)" }) - # Second pass: create all features + # Second pass: create all features with reserved priorities created_features: list[Feature] = [] for i, feature_data in enumerate(features): db_feature = Feature( @@ -574,17 +617,13 @@ def feature_create_bulk( created_features[i].dependencies = sorted(dep_ids) deps_count += 1 - session.commit() - - return json.dumps({ - "created": len(created_features), - "with_dependencies": deps_count - }) + # Commit happens automatically on context manager exit + return json.dumps({ + "created": len(created_features), + "with_dependencies": deps_count + }) except Exception as e: - session.rollback() return json.dumps({"error": str(e)}) - finally: - session.close() @mcp.tool() @@ -608,13 +647,14 @@ def feature_create( Returns: JSON with the created feature details including its ID """ - session = get_session() try: - # Use lock to prevent race condition in priority assignment - with _priority_lock: - # Get the next priority - max_priority_result = session.query(Feature.priority).order_by(Feature.priority.desc()).first() - next_priority = (max_priority_result[0] + 1) if max_priority_result else 1 + # Use atomic transaction to prevent priority collisions + with atomic_transaction(_session_maker) as session: + # Get the next priority atomically within the transaction + result = session.execute(text(""" + SELECT COALESCE(MAX(priority), 0) + 1 FROM features + """)).fetchone() + next_priority = result[0] db_feature = Feature( priority=next_priority, @@ -626,20 +666,18 @@ def feature_create( in_progress=False, ) session.add(db_feature) - session.commit() + session.flush() # Get the ID - session.refresh(db_feature) + feature_dict = db_feature.to_dict() + # Commit happens automatically on context manager exit return json.dumps({ "success": True, "message": f"Created feature: {name}", - "feature": db_feature.to_dict() + "feature": feature_dict }) except Exception as e: - session.rollback() return json.dumps({"error": str(e)}) - finally: - session.close() @mcp.tool() @@ -659,52 +697,49 @@ def feature_add_dependency( Returns: JSON with success status and updated dependencies list, or error message """ - session = get_session() try: - # Security: Self-reference check + # Security: Self-reference check (can do before transaction) if feature_id == dependency_id: return json.dumps({"error": "A feature cannot depend on itself"}) - feature = session.query(Feature).filter(Feature.id == feature_id).first() - dependency = session.query(Feature).filter(Feature.id == dependency_id).first() + # Use atomic transaction for consistent cycle detection + with atomic_transaction(_session_maker) as session: + feature = session.query(Feature).filter(Feature.id == feature_id).first() + dependency = session.query(Feature).filter(Feature.id == dependency_id).first() - if not feature: - return json.dumps({"error": f"Feature {feature_id} not found"}) - if not dependency: - return json.dumps({"error": f"Dependency feature {dependency_id} not found"}) + if not feature: + return json.dumps({"error": f"Feature {feature_id} not found"}) + if not dependency: + return json.dumps({"error": f"Dependency feature {dependency_id} not found"}) - current_deps = feature.dependencies or [] + current_deps = feature.dependencies or [] - # Security: Max dependencies limit - if len(current_deps) >= MAX_DEPENDENCIES_PER_FEATURE: - return json.dumps({"error": f"Maximum {MAX_DEPENDENCIES_PER_FEATURE} dependencies allowed per feature"}) + # Security: Max dependencies limit + if len(current_deps) >= MAX_DEPENDENCIES_PER_FEATURE: + return json.dumps({"error": f"Maximum {MAX_DEPENDENCIES_PER_FEATURE} dependencies allowed per feature"}) - # Check if already exists - if dependency_id in current_deps: - return json.dumps({"error": "Dependency already exists"}) + # Check if already exists + if dependency_id in current_deps: + return json.dumps({"error": "Dependency already exists"}) - # Security: Circular dependency check - # would_create_circular_dependency(features, source_id, target_id) - # source_id = feature gaining the dependency, target_id = feature being depended upon - all_features = [f.to_dict() for f in session.query(Feature).all()] - if would_create_circular_dependency(all_features, feature_id, dependency_id): - return json.dumps({"error": "Cannot add: would create circular dependency"}) + # Security: Circular dependency check + # Within IMMEDIATE transaction, snapshot is protected by write lock + all_features = [f.to_dict() for f in session.query(Feature).all()] + if would_create_circular_dependency(all_features, feature_id, dependency_id): + return json.dumps({"error": "Cannot add: would create circular dependency"}) - # Add dependency - current_deps.append(dependency_id) - feature.dependencies = sorted(current_deps) - session.commit() + # Add dependency atomically + new_deps = sorted(current_deps + [dependency_id]) + feature.dependencies = new_deps + # Commit happens automatically on context manager exit - return json.dumps({ - "success": True, - "feature_id": feature_id, - "dependencies": feature.dependencies - }) + return json.dumps({ + "success": True, + "feature_id": feature_id, + "dependencies": new_deps + }) except Exception as e: - session.rollback() return json.dumps({"error": f"Failed to add dependency: {str(e)}"}) - finally: - session.close() @mcp.tool() @@ -721,30 +756,29 @@ def feature_remove_dependency( Returns: JSON with success status and updated dependencies list, or error message """ - session = get_session() try: - feature = session.query(Feature).filter(Feature.id == feature_id).first() - if not feature: - return json.dumps({"error": f"Feature {feature_id} not found"}) + # Use atomic transaction for consistent read-modify-write + with atomic_transaction(_session_maker) as session: + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if not feature: + return json.dumps({"error": f"Feature {feature_id} not found"}) - current_deps = feature.dependencies or [] - if dependency_id not in current_deps: - return json.dumps({"error": "Dependency does not exist"}) + current_deps = feature.dependencies or [] + if dependency_id not in current_deps: + return json.dumps({"error": "Dependency does not exist"}) - current_deps.remove(dependency_id) - feature.dependencies = current_deps if current_deps else None - session.commit() + # Remove dependency atomically + new_deps = [d for d in current_deps if d != dependency_id] + feature.dependencies = new_deps if new_deps else None + # Commit happens automatically on context manager exit - return json.dumps({ - "success": True, - "feature_id": feature_id, - "dependencies": feature.dependencies or [] - }) + return json.dumps({ + "success": True, + "feature_id": feature_id, + "dependencies": new_deps + }) except Exception as e: - session.rollback() return json.dumps({"error": f"Failed to remove dependency: {str(e)}"}) - finally: - session.close() @mcp.tool() @@ -897,9 +931,8 @@ def feature_set_dependencies( Returns: JSON with success status and updated dependencies list, or error message """ - session = get_session() try: - # Security: Self-reference check + # Security: Self-reference check (can do before transaction) if feature_id in dependency_ids: return json.dumps({"error": "A feature cannot depend on itself"}) @@ -911,45 +944,44 @@ def feature_set_dependencies( if len(dependency_ids) != len(set(dependency_ids)): return json.dumps({"error": "Duplicate dependencies not allowed"}) - feature = session.query(Feature).filter(Feature.id == feature_id).first() - if not feature: - return json.dumps({"error": f"Feature {feature_id} not found"}) + # Use atomic transaction for consistent cycle detection + with atomic_transaction(_session_maker) as session: + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if not feature: + return json.dumps({"error": f"Feature {feature_id} not found"}) - # Validate all dependencies exist - all_feature_ids = {f.id for f in session.query(Feature).all()} - missing = [d for d in dependency_ids if d not in all_feature_ids] - if missing: - return json.dumps({"error": f"Dependencies not found: {missing}"}) + # Validate all dependencies exist + all_feature_ids = {f.id for f in session.query(Feature).all()} + missing = [d for d in dependency_ids if d not in all_feature_ids] + if missing: + return json.dumps({"error": f"Dependencies not found: {missing}"}) - # Check for circular dependencies - all_features = [f.to_dict() for f in session.query(Feature).all()] - # Temporarily update the feature's dependencies for cycle check - test_features = [] - for f in all_features: - if f["id"] == feature_id: - test_features.append({**f, "dependencies": dependency_ids}) - else: - test_features.append(f) + # Check for circular dependencies + # Within IMMEDIATE transaction, snapshot is protected by write lock + all_features = [f.to_dict() for f in session.query(Feature).all()] + test_features = [] + for f in all_features: + if f["id"] == feature_id: + test_features.append({**f, "dependencies": dependency_ids}) + else: + test_features.append(f) - for dep_id in dependency_ids: - # source_id = feature_id (gaining dep), target_id = dep_id (being depended upon) - if would_create_circular_dependency(test_features, feature_id, dep_id): - return json.dumps({"error": f"Cannot add dependency {dep_id}: would create circular dependency"}) + for dep_id in dependency_ids: + if would_create_circular_dependency(test_features, feature_id, dep_id): + return json.dumps({"error": f"Cannot add dependency {dep_id}: would create circular dependency"}) - # Set dependencies - feature.dependencies = sorted(dependency_ids) if dependency_ids else None - session.commit() + # Set dependencies atomically + sorted_deps = sorted(dependency_ids) if dependency_ids else None + feature.dependencies = sorted_deps + # Commit happens automatically on context manager exit - return json.dumps({ - "success": True, - "feature_id": feature_id, - "dependencies": feature.dependencies or [] - }) + return json.dumps({ + "success": True, + "feature_id": feature_id, + "dependencies": sorted_deps or [] + }) except Exception as e: - session.rollback() return json.dumps({"error": f"Failed to set dependencies: {str(e)}"}) - finally: - session.close() if __name__ == "__main__": diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py index 574cbd2..6e8bb54 100644 --- a/parallel_orchestrator.py +++ b/parallel_orchestrator.py @@ -19,7 +19,9 @@ Usage: """ import asyncio +import atexit import os +import signal import subprocess import sys import threading @@ -27,6 +29,8 @@ from datetime import datetime, timezone from pathlib import Path from typing import Callable, Literal +from sqlalchemy import text + from api.database import Feature, create_database from api.dependency_resolver import are_dependencies_satisfied, compute_scheduling_scores from progress import has_features @@ -139,11 +143,11 @@ class ParallelOrchestrator: self, project_dir: Path, max_concurrency: int = DEFAULT_CONCURRENCY, - model: str = None, + model: str | None = None, yolo_mode: bool = False, testing_agent_ratio: int = 1, - on_output: Callable[[int, str], None] = None, - on_status: Callable[[int, str], None] = None, + on_output: Callable[[int, str], None] | None = None, + on_status: Callable[[int, str], None] | None = None, ): """Initialize the orchestrator. @@ -182,14 +186,18 @@ class ParallelOrchestrator: # Track feature failures to prevent infinite retry loops self._failure_counts: dict[int, int] = {} + # Shutdown flag for async-safe signal handling + # Signal handlers only set this flag; cleanup happens in the main loop + self._shutdown_requested = False + # Session tracking for logging/debugging - self.session_start_time: datetime = None + self.session_start_time: datetime | None = None # Event signaled when any agent completes, allowing the main loop to wake # immediately instead of waiting for the full POLL_INTERVAL timeout. # This reduces latency when spawning the next feature after completion. - self._agent_completed_event: asyncio.Event = None # Created in run_loop - self._event_loop: asyncio.AbstractEventLoop = None # Stored for thread-safe signaling + self._agent_completed_event: asyncio.Event | None = None # Created in run_loop + self._event_loop: asyncio.AbstractEventLoop | None = None # Stored for thread-safe signaling # Database session for this orchestrator self._engine, self._session_maker = create_database(project_dir) @@ -375,7 +383,8 @@ class ParallelOrchestrator: session = self.get_session() try: session.expire_all() - return session.query(Feature).filter(Feature.passes == True).count() + count: int = session.query(Feature).filter(Feature.passes == True).count() + return count finally: session.close() @@ -511,11 +520,14 @@ class ParallelOrchestrator: try: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads + # encoding="utf-8" and errors="replace" fix Windows CP1252 issues popen_kwargs = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "text": True, + "encoding": "utf-8", + "errors": "replace", "cwd": str(AUTOCODER_ROOT), # Run from autocoder root for proper imports "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, } @@ -546,7 +558,7 @@ class ParallelOrchestrator: daemon=True ).start() - if self.on_status: + if self.on_status is not None: self.on_status(feature_id, "running") print(f"Started coding agent for feature #{feature_id}", flush=True) @@ -600,11 +612,14 @@ class ParallelOrchestrator: try: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads + # encoding="utf-8" and errors="replace" fix Windows CP1252 issues popen_kwargs = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "text": True, + "encoding": "utf-8", + "errors": "replace", "cwd": str(AUTOCODER_ROOT), "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, } @@ -658,11 +673,14 @@ class ParallelOrchestrator: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads + # encoding="utf-8" and errors="replace" fix Windows CP1252 issues popen_kwargs = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "text": True, + "encoding": "utf-8", + "errors": "replace", "cwd": str(AUTOCODER_ROOT), "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, } @@ -682,7 +700,7 @@ class ParallelOrchestrator: if not line: break print(line.rstrip(), flush=True) - if self.on_output: + if self.on_output is not None: self.on_output(0, line.rstrip()) # Use 0 as feature_id for initializer proc.wait() @@ -716,11 +734,14 @@ class ParallelOrchestrator: ): """Read output from subprocess and emit events.""" try: + if proc.stdout is None: + proc.wait() + return for line in proc.stdout: if abort.is_set(): break line = line.rstrip() - if self.on_output: + if self.on_output is not None: self.on_output(feature_id or 0, line) else: # Both coding and testing agents now use [Feature #X] format @@ -814,6 +835,9 @@ class ParallelOrchestrator: self._signal_agent_completed() return + # feature_id is required for coding agents (always passed from start_feature) + assert feature_id is not None, "feature_id must not be None for coding agents" + # Coding agent completion debug_log.log("COMPLETE", f"Coding agent for feature #{feature_id} finished", return_code=return_code, @@ -855,7 +879,7 @@ class ParallelOrchestrator: failure_count=failure_count) status = "completed" if return_code == 0 else "failed" - if self.on_status: + if self.on_status is not None: self.on_status(feature_id, status) # CRITICAL: This print triggers the WebSocket to emit agent_update with state='error' or 'success' print(f"Feature #{feature_id} {status}", flush=True) @@ -1014,7 +1038,7 @@ class ParallelOrchestrator: debug_log.section("FEATURE LOOP STARTING") loop_iteration = 0 - while self.is_running: + while self.is_running and not self._shutdown_requested: loop_iteration += 1 if loop_iteration <= 3: print(f"[DEBUG] === Loop iteration {loop_iteration} ===", flush=True) @@ -1163,11 +1187,40 @@ class ParallelOrchestrator: "yolo_mode": self.yolo_mode, } + def cleanup(self) -> None: + """Clean up database resources. Safe to call multiple times. + + Forces WAL checkpoint to flush pending writes to main database file, + then disposes engine to close all connections. Prevents stale cache + issues when the orchestrator restarts. + """ + # Atomically grab and clear the engine reference to prevent re-entry + engine = self._engine + self._engine = None + + if engine is None: + return # Already cleaned up + + try: + debug_log.log("CLEANUP", "Forcing WAL checkpoint before dispose") + with engine.connect() as conn: + conn.execute(text("PRAGMA wal_checkpoint(FULL)")) + conn.commit() + debug_log.log("CLEANUP", "WAL checkpoint completed, disposing engine") + except Exception as e: + debug_log.log("CLEANUP", f"WAL checkpoint failed (non-fatal): {e}") + + try: + engine.dispose() + debug_log.log("CLEANUP", "Engine disposed successfully") + except Exception as e: + debug_log.log("CLEANUP", f"Engine dispose failed: {e}") + async def run_parallel_orchestrator( project_dir: Path, max_concurrency: int = DEFAULT_CONCURRENCY, - model: str = None, + model: str | None = None, yolo_mode: bool = False, testing_agent_ratio: int = 1, ) -> None: @@ -1189,11 +1242,37 @@ async def run_parallel_orchestrator( testing_agent_ratio=testing_agent_ratio, ) + # Set up cleanup to run on exit (handles normal exit, exceptions) + def cleanup_handler(): + debug_log.log("CLEANUP", "atexit cleanup handler invoked") + orchestrator.cleanup() + + atexit.register(cleanup_handler) + + # Set up async-safe signal handler for graceful shutdown + # Only sets flags - everything else is unsafe in signal context + def signal_handler(signum, frame): + orchestrator._shutdown_requested = True + orchestrator.is_running = False + + # Register SIGTERM handler for process termination signals + # Note: On Windows, SIGTERM handlers only fire from os.kill() calls within Python. + # External termination (Task Manager, taskkill, Popen.terminate()) uses + # TerminateProcess() which bypasses signal handlers entirely. + signal.signal(signal.SIGTERM, signal_handler) + + # Note: We intentionally do NOT register SIGINT handler + # Let Python raise KeyboardInterrupt naturally so the except block works + try: await orchestrator.run_loop() except KeyboardInterrupt: print("\n\nInterrupted by user. Stopping agents...", flush=True) orchestrator.stop_all() + finally: + # CRITICAL: Always clean up database resources on exit + # This forces WAL checkpoint and disposes connections + orchestrator.cleanup() def main(): diff --git a/progress.py b/progress.py index 0821c90..1f17ae6 100644 --- a/progress.py +++ b/progress.py @@ -10,12 +10,21 @@ import json import os import sqlite3 import urllib.request +from contextlib import closing from datetime import datetime, timezone from pathlib import Path WEBHOOK_URL = os.environ.get("PROGRESS_N8N_WEBHOOK_URL") PROGRESS_CACHE_FILE = ".progress_cache" +# SQLite connection settings for parallel mode safety +SQLITE_TIMEOUT = 30 # seconds to wait for locks + + +def _get_connection(db_file: Path) -> sqlite3.Connection: + """Get a SQLite connection with proper timeout settings for parallel mode.""" + return sqlite3.connect(db_file, timeout=SQLITE_TIMEOUT) + def has_features(project_dir: Path) -> bool: """ @@ -31,8 +40,6 @@ def has_features(project_dir: Path) -> bool: Returns False if no features exist (initializer needs to run). """ - import sqlite3 - # Check legacy JSON file first json_file = project_dir / "feature_list.json" if json_file.exists(): @@ -44,12 +51,11 @@ def has_features(project_dir: Path) -> bool: return False try: - conn = sqlite3.connect(db_file) - cursor = conn.cursor() - cursor.execute("SELECT COUNT(*) FROM features") - count = cursor.fetchone()[0] - conn.close() - return count > 0 + with closing(_get_connection(db_file)) as conn: + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM features") + count: int = cursor.fetchone()[0] + return bool(count > 0) except Exception: # Database exists but can't be read or has no features table return False @@ -70,36 +76,35 @@ def count_passing_tests(project_dir: Path) -> tuple[int, int, int]: return 0, 0, 0 try: - conn = sqlite3.connect(db_file) - cursor = conn.cursor() - # Single aggregate query instead of 3 separate COUNT queries - # Handle case where in_progress column doesn't exist yet (legacy DBs) - try: - cursor.execute(""" - SELECT - COUNT(*) as total, - SUM(CASE WHEN passes = 1 THEN 1 ELSE 0 END) as passing, - SUM(CASE WHEN in_progress = 1 THEN 1 ELSE 0 END) as in_progress - FROM features - """) - row = cursor.fetchone() - total = row[0] or 0 - passing = row[1] or 0 - in_progress = row[2] or 0 - except sqlite3.OperationalError: - # Fallback for databases without in_progress column - cursor.execute(""" - SELECT - COUNT(*) as total, - SUM(CASE WHEN passes = 1 THEN 1 ELSE 0 END) as passing - FROM features - """) - row = cursor.fetchone() - total = row[0] or 0 - passing = row[1] or 0 - in_progress = 0 - conn.close() - return passing, in_progress, total + with closing(_get_connection(db_file)) as conn: + cursor = conn.cursor() + # Single aggregate query instead of 3 separate COUNT queries + # Handle case where in_progress column doesn't exist yet (legacy DBs) + try: + cursor.execute(""" + SELECT + COUNT(*) as total, + SUM(CASE WHEN passes = 1 THEN 1 ELSE 0 END) as passing, + SUM(CASE WHEN in_progress = 1 THEN 1 ELSE 0 END) as in_progress + FROM features + """) + row = cursor.fetchone() + total = row[0] or 0 + passing = row[1] or 0 + in_progress = row[2] or 0 + except sqlite3.OperationalError: + # Fallback for databases without in_progress column + cursor.execute(""" + SELECT + COUNT(*) as total, + SUM(CASE WHEN passes = 1 THEN 1 ELSE 0 END) as passing + FROM features + """) + row = cursor.fetchone() + total = row[0] or 0 + passing = row[1] or 0 + in_progress = 0 + return passing, in_progress, total except Exception as e: print(f"[Database error in count_passing_tests: {e}]") return 0, 0, 0 @@ -120,17 +125,16 @@ def get_all_passing_features(project_dir: Path) -> list[dict]: return [] try: - conn = sqlite3.connect(db_file) - cursor = conn.cursor() - cursor.execute( - "SELECT id, category, name FROM features WHERE passes = 1 ORDER BY priority ASC" - ) - features = [ - {"id": row[0], "category": row[1], "name": row[2]} - for row in cursor.fetchall() - ] - conn.close() - return features + with closing(_get_connection(db_file)) as conn: + cursor = conn.cursor() + cursor.execute( + "SELECT id, category, name FROM features WHERE passes = 1 ORDER BY priority ASC" + ) + features = [ + {"id": row[0], "category": row[1], "name": row[2]} + for row in cursor.fetchall() + ] + return features except Exception: return [] diff --git a/registry.py b/registry.py index f84803e..7d0c2af 100644 --- a/registry.py +++ b/registry.py @@ -17,8 +17,7 @@ from pathlib import Path from typing import Any from sqlalchemy import Column, DateTime, Integer, String, create_engine, text -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import DeclarativeBase, sessionmaker # Module logger logger = logging.getLogger(__name__) @@ -75,7 +74,9 @@ class RegistryPermissionDenied(RegistryError): # SQLAlchemy Model # ============================================================================= -Base = declarative_base() +class Base(DeclarativeBase): + """SQLAlchemy 2.0 style declarative base.""" + pass class Project(Base): diff --git a/server/services/assistant_database.py b/server/services/assistant_database.py index f2ade75..0dbfdd3 100644 --- a/server/services/assistant_database.py +++ b/server/services/assistant_database.py @@ -7,21 +7,28 @@ Each project has its own assistant.db file in the project directory. """ import logging +import threading from datetime import datetime, timezone from pathlib import Path from typing import Optional from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text, create_engine, func -from sqlalchemy.orm import declarative_base, relationship, sessionmaker +from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker logger = logging.getLogger(__name__) -Base = declarative_base() +class Base(DeclarativeBase): + """SQLAlchemy 2.0 style declarative base.""" + pass # Engine cache to avoid creating new engines for each request # Key: project directory path (as posix string), Value: SQLAlchemy engine _engine_cache: dict[str, object] = {} +# Lock for thread-safe access to the engine cache +# Prevents race conditions when multiple threads create engines simultaneously +_cache_lock = threading.Lock() + def _utc_now() -> datetime: """Return current UTC time. Replacement for deprecated datetime.utcnow().""" @@ -64,17 +71,33 @@ def get_engine(project_dir: Path): Uses a cache to avoid creating new engines for each request, which improves performance by reusing database connections. + + Thread-safe: Uses a lock to prevent race conditions when multiple threads + try to create engines simultaneously for the same project. """ cache_key = project_dir.as_posix() - if cache_key not in _engine_cache: - db_path = get_db_path(project_dir) - # Use as_posix() for cross-platform compatibility with SQLite connection strings - db_url = f"sqlite:///{db_path.as_posix()}" - engine = create_engine(db_url, echo=False) - Base.metadata.create_all(engine) - _engine_cache[cache_key] = engine - logger.debug(f"Created new database engine for {cache_key}") + # Double-checked locking for thread safety and performance + if cache_key in _engine_cache: + return _engine_cache[cache_key] + + with _cache_lock: + # Check again inside the lock in case another thread created it + if cache_key not in _engine_cache: + db_path = get_db_path(project_dir) + # Use as_posix() for cross-platform compatibility with SQLite connection strings + db_url = f"sqlite:///{db_path.as_posix()}" + engine = create_engine( + db_url, + echo=False, + connect_args={ + "check_same_thread": False, + "timeout": 30, # Wait up to 30s for locks + } + ) + Base.metadata.create_all(engine) + _engine_cache[cache_key] = engine + logger.debug(f"Created new database engine for {cache_key}") return _engine_cache[cache_key] From b1419baf34bdef57dd7ca31737b4766d58d39018 Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 09:58:59 +0200 Subject: [PATCH 15/24] Update PR review command --- .claude/commands/review-pr.md | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/.claude/commands/review-pr.md b/.claude/commands/review-pr.md index 9c9098f..027c807 100644 --- a/.claude/commands/review-pr.md +++ b/.claude/commands/review-pr.md @@ -40,15 +40,36 @@ Pull request(s): $ARGUMENTS - For Medium PRs: spawn 1-2 agents focusing on the most impacted areas - For Complex PRs: spawn up to 3 agents to cover security, performance, and architectural concerns -4. **Vision Alignment Check** +4. **PR Scope & Title Alignment Check** + - Compare the PR title and description against the actual diff content + - Check whether the PR is focused on a single coherent change or contains multiple unrelated changes + - If the title/description describe one thing but the PR contains significantly more (e.g., title says "fix typo in README" but the diff touches 20 files across multiple domains), flag this as a **scope mismatch** + - A scope mismatch is a **merge blocker** — recommend the author split the PR into smaller, focused PRs + - Suggest specific ways to split the PR (e.g., "separate the refactor from the feature addition") + - Reviewing large, unfocused PRs is impractical and error-prone; the review cannot provide adequate assurance for such changes + +5. **Vision Alignment Check** - Read the project's README.md and CLAUDE.md to understand the application's core purpose - Assess whether this PR aligns with the application's intended functionality - If the changes deviate significantly from the core vision or add functionality that doesn't serve the application's purpose, note this in the review - This is not a blocker, but should be flagged for the reviewer's consideration -5. **Safety Assessment** +6. **Safety Assessment** - Provide a review on whether the PR is safe to merge as-is - Provide any feedback in terms of risk level -6. **Improvements** - - Propose any improvements in terms of importance and complexity \ No newline at end of file +7. **Improvements** + - Propose any improvements in terms of importance and complexity + +8. **Merge Recommendation** + - Based on all findings, provide a clear merge/don't-merge recommendation + - If all concerns are minor (cosmetic issues, naming suggestions, small style nits, missing comments, etc.), recommend **merging the PR** and note that the reviewer can address these minor concerns themselves with a quick follow-up commit pushed directly to master + - If there are significant concerns (bugs, security issues, architectural problems, scope mismatch), recommend **not merging** and explain what needs to be resolved first + +9. **TLDR** + - End the review with a `## TLDR` section + - In 3-5 bullet points maximum, summarize: + - What this PR is actually about (one sentence) + - The key concerns, if any (or "no significant concerns") + - **Verdict: MERGE** / **MERGE (with minor follow-up)** / **DON'T MERGE** with a one-line reason + - This section should be scannable in under 10 seconds \ No newline at end of file From e348383c1f807bc1563f7b173e4b439ffb09317b Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 10:24:41 +0200 Subject: [PATCH 16/24] fix: add user-visible error handling for spec creation agent start The onComplete handler in the empty Kanban spec creation flow only logged errors to console.error, leaving users with no feedback when the agent failed to start. This wires up the SpecCreationChat component's built-in error UI (spinner, error banner, retry button) via initializerStatus, initializerError, and onRetryInitializer props. Changes: - Add InitializerStatus type and specInitializerStatus/Error state - Set status to 'starting' before startAgent call (shows spinner) - On error, keep spec chat open so the error banner is visible - On success, close chat and refresh queries (same as before) - Wire up onRetryInitializer to reset state to idle - Reset initializer status on cancel/exit for clean re-entry Co-Authored-By: Claude Opus 4.5 --- ui/src/App.tsx | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/ui/src/App.tsx b/ui/src/App.tsx index a78a4aa..16fc4c0 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -41,6 +41,8 @@ const VIEW_MODE_KEY = 'autocoder-view-mode' // Bottom padding for main content when debug panel is collapsed (40px header + 8px margin) const COLLAPSED_DEBUG_PANEL_CLEARANCE = 48 +type InitializerStatus = 'idle' | 'starting' | 'error' + function App() { // Initialize selected project from localStorage const [selectedProject, setSelectedProject] = useState(() => { @@ -63,6 +65,8 @@ function App() { const [isSpecCreating, setIsSpecCreating] = useState(false) const [showResetModal, setShowResetModal] = useState(false) const [showSpecChat, setShowSpecChat] = useState(false) // For "Create Spec" button in empty kanban + const [specInitializerStatus, setSpecInitializerStatus] = useState('idle') + const [specInitializerError, setSpecInitializerError] = useState(null) const [viewMode, setViewMode] = useState(() => { try { const stored = localStorage.getItem(VIEW_MODE_KEY) @@ -496,22 +500,30 @@ function App() { { - // Auto-start the agent after spec creation (same as NewProjectModal) + setSpecInitializerStatus('starting') try { await startAgent(selectedProject, { yoloMode: yoloMode ?? false, maxConcurrency: 3, }) + // Success — close chat and refresh + setShowSpecChat(false) + setSpecInitializerStatus('idle') + queryClient.invalidateQueries({ queryKey: ['projects'] }) + queryClient.invalidateQueries({ queryKey: ['features', selectedProject] }) } catch (err) { - console.error('Failed to start agent:', err) + setSpecInitializerStatus('error') + setSpecInitializerError(err instanceof Error ? err.message : 'Failed to start agent') } - setShowSpecChat(false) - // Refresh projects to update has_spec - queryClient.invalidateQueries({ queryKey: ['projects'] }) - queryClient.invalidateQueries({ queryKey: ['features', selectedProject] }) }} - onCancel={() => setShowSpecChat(false)} - onExitToProject={() => setShowSpecChat(false)} + onCancel={() => { setShowSpecChat(false); setSpecInitializerStatus('idle') }} + onExitToProject={() => { setShowSpecChat(false); setSpecInitializerStatus('idle') }} + initializerStatus={specInitializerStatus} + initializerError={specInitializerError} + onRetryInitializer={() => { + setSpecInitializerError(null) + setSpecInitializerStatus('idle') + }} />
)} From c4d0c6c9b25874352df2bfc312c450203b583e61 Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 10:39:07 +0200 Subject: [PATCH 17/24] fix: address rate limit detection false positives and reset-time cap - Narrow `\boverloaded\b` regex to require server/api/system context, preventing false positives when Claude discusses method/operator overloading in OOP code (C++, Java, C#, etc.) - Restore 24-hour cap for absolute reset-time delays instead of 1-hour clamp, avoiding unnecessary retry loops when rate limits reset hours in the future - Add test for Retry-After: 0 returning 0 (regression lock for the `is not None` fix) - Add false positive tests for "overloaded" in programming context Co-Authored-By: Claude Opus 4.5 --- agent.py | 2 +- rate_limit_utils.py | 2 +- test_rate_limit_utils.py | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/agent.py b/agent.py index 9bd2c3b..e64f38b 100644 --- a/agent.py +++ b/agent.py @@ -325,7 +325,7 @@ async def run_autonomous_agent( target += timedelta(days=1) delta = target - now - delay_seconds = clamp_retry_delay(int(delta.total_seconds())) + delay_seconds = min(max(int(delta.total_seconds()), 1), 24 * 60 * 60) target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z") except Exception as e: diff --git a/rate_limit_utils.py b/rate_limit_utils.py index 9bdbb22..9c06f68 100644 --- a/rate_limit_utils.py +++ b/rate_limit_utils.py @@ -18,7 +18,7 @@ RATE_LIMIT_REGEX_PATTERNS = [ r"\bstatus\s*429\b", # "status 429", "status429" r"\berror\s*429\b", # "error 429", "error429" r"\b429\s+too\s+many", # "429 too many" - r"\boverloaded\b", # "overloaded" + r"\b(?:server|api|system)\s+(?:is\s+)?overloaded\b", # "server is overloaded", "api overloaded" r"\bquota\s*exceeded\b", # "quota exceeded" ] diff --git a/test_rate_limit_utils.py b/test_rate_limit_utils.py index 55ecaa7..4c91ef2 100644 --- a/test_rate_limit_utils.py +++ b/test_rate_limit_utils.py @@ -43,6 +43,11 @@ class TestParseRetryAfter(unittest.TestCase): assert parse_retry_after("60 seconds left") == 60 assert parse_retry_after("120 seconds until reset") == 120 + def test_retry_after_zero(self): + """Test 'Retry-After: 0' returns 0 (not None).""" + assert parse_retry_after("Retry-After: 0") == 0 + assert parse_retry_after("retry after 0 seconds") == 0 + def test_no_match(self): """Test messages that don't contain retry-after info.""" assert parse_retry_after("no match here") is None @@ -141,6 +146,17 @@ class TestFalsePositives(unittest.TestCase): assert is_rate_limit_error("File size limit reached") is False assert is_rate_limit_error("Memory limit reached, consider optimization") is False + def test_overloaded_in_programming_context(self): + """Method/operator overloading discussions should not trigger.""" + assert is_rate_limit_error("I will create an overloaded constructor") is False + assert is_rate_limit_error("The + operator is overloaded") is False + assert is_rate_limit_error("Here is the overloaded version of the function") is False + assert is_rate_limit_error("The method is overloaded to accept different types") is False + # But actual API overload messages should still match + assert is_rate_limit_error("Server is overloaded") is True + assert is_rate_limit_error("API overloaded") is True + assert is_rate_limit_error("system is overloaded") is True + class TestBackoffFunctions(unittest.TestCase): """Test backoff calculation functions from rate_limit_utils.""" From dc5bcc4ae933c079320c3df51406955b74bf3b5e Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 11:32:06 +0200 Subject: [PATCH 18/24] feat: move autocoder runtime files into .autocoder/ subdirectory Add centralized path resolution module (autocoder_paths.py) that consolidates all autocoder-generated file paths behind a dual-path strategy: check .autocoder/X first, fall back to root-level X for backward compatibility, default to .autocoder/X for new projects. Key changes: - New autocoder_paths.py with dual-path resolution for features.db, assistant.db, lock files, settings, prompts dir, and progress cache - migrate_project_layout() safely moves old-layout projects to new layout with SQLite WAL flush and integrity verification - Updated 22 files to delegate path construction to autocoder_paths - Reset/delete logic cleans both old and new file locations - Orphan lock cleanup checks both locations per project - Migration called automatically at agent start in autonomous_agent_demo.py - Updated markdown commands/skills to reference .autocoder/prompts/ - CLAUDE.md documentation updated with new project structure Files at project root that remain unchanged: - CLAUDE.md (Claude SDK reads from cwd via setting_sources=["project"]) - app_spec.txt root copy (agent templates reference it via cat) Co-Authored-By: Claude Opus 4.5 --- .claude/commands/create-spec.md | 26 +- .claude/commands/expand-project.md | 4 +- .claude/commands/gsd-to-autocoder-spec.md | 2 +- .claude/skills/gsd-to-autocoder-spec/SKILL.md | 12 +- CLAUDE.md | 20 +- api/database.py | 7 +- autocoder_paths.py | 290 ++++++++++++++++++ autonomous_agent_demo.py | 6 + client.py | 4 +- progress.py | 12 +- prompts.py | 9 +- server/main.py | 9 +- server/routers/devserver.py | 51 ++- server/routers/expand_project.py | 3 +- server/routers/features.py | 9 +- server/routers/projects.py | 72 +++-- server/routers/spec_creation.py | 3 +- server/services/assistant_chat_session.py | 7 +- server/services/assistant_database.py | 3 +- server/services/dev_server_manager.py | 32 +- server/services/expand_chat_session.py | 7 +- server/services/process_manager.py | 17 +- server/services/scheduler_service.py | 6 +- server/services/spec_chat_session.py | 7 +- 24 files changed, 532 insertions(+), 86 deletions(-) create mode 100644 autocoder_paths.py diff --git a/.claude/commands/create-spec.md b/.claude/commands/create-spec.md index f8a1b96..9c23abe 100644 --- a/.claude/commands/create-spec.md +++ b/.claude/commands/create-spec.md @@ -8,7 +8,7 @@ This command **requires** the project directory as an argument via `$ARGUMENTS`. **Example:** `/create-spec generations/my-app` -**Output location:** `$ARGUMENTS/prompts/app_spec.txt` and `$ARGUMENTS/prompts/initializer_prompt.md` +**Output location:** `$ARGUMENTS/.autocoder/prompts/app_spec.txt` and `$ARGUMENTS/.autocoder/prompts/initializer_prompt.md` If `$ARGUMENTS` is empty, inform the user they must provide a project path and exit. @@ -347,13 +347,13 @@ First ask in conversation if they want to make changes. ## Output Directory -The output directory is: `$ARGUMENTS/prompts/` +The output directory is: `$ARGUMENTS/.autocoder/prompts/` Once the user approves, generate these files: ## 1. Generate `app_spec.txt` -**Output path:** `$ARGUMENTS/prompts/app_spec.txt` +**Output path:** `$ARGUMENTS/.autocoder/prompts/app_spec.txt` Create a new file using this XML structure: @@ -489,7 +489,7 @@ Create a new file using this XML structure: ## 2. Update `initializer_prompt.md` -**Output path:** `$ARGUMENTS/prompts/initializer_prompt.md` +**Output path:** `$ARGUMENTS/.autocoder/prompts/initializer_prompt.md` If the output directory has an existing `initializer_prompt.md`, read it and update the feature count. If not, copy from `.claude/templates/initializer_prompt.template.md` first, then update. @@ -512,7 +512,7 @@ After: **CRITICAL:** You must create exactly **25** features using the `feature ## 3. Write Status File (REQUIRED - Do This Last) -**Output path:** `$ARGUMENTS/prompts/.spec_status.json` +**Output path:** `$ARGUMENTS/.autocoder/prompts/.spec_status.json` **CRITICAL:** After you have completed ALL requested file changes, write this status file to signal completion to the UI. This is required for the "Continue to Project" button to appear. @@ -524,8 +524,8 @@ Write this JSON file: "version": 1, "timestamp": "[current ISO 8601 timestamp, e.g., 2025-01-15T14:30:00.000Z]", "files_written": [ - "prompts/app_spec.txt", - "prompts/initializer_prompt.md" + ".autocoder/prompts/app_spec.txt", + ".autocoder/prompts/initializer_prompt.md" ], "feature_count": [the feature count from Phase 4L] } @@ -539,9 +539,9 @@ Write this JSON file: "version": 1, "timestamp": "2025-01-15T14:30:00.000Z", "files_written": [ - "prompts/app_spec.txt", - "prompts/initializer_prompt.md", - "prompts/coding_prompt.md" + ".autocoder/prompts/app_spec.txt", + ".autocoder/prompts/initializer_prompt.md", + ".autocoder/prompts/coding_prompt.md" ], "feature_count": 35 } @@ -559,11 +559,11 @@ Write this JSON file: Once files are generated, tell the user what to do next: -> "Your specification files have been created in `$ARGUMENTS/prompts/`! +> "Your specification files have been created in `$ARGUMENTS/.autocoder/prompts/`! > > **Files created:** -> - `$ARGUMENTS/prompts/app_spec.txt` -> - `$ARGUMENTS/prompts/initializer_prompt.md` +> - `$ARGUMENTS/.autocoder/prompts/app_spec.txt` +> - `$ARGUMENTS/.autocoder/prompts/initializer_prompt.md` > > The **Continue to Project** button should now appear. Click it to start the autonomous coding agent! > diff --git a/.claude/commands/expand-project.md b/.claude/commands/expand-project.md index e8005b2..0ddf027 100644 --- a/.claude/commands/expand-project.md +++ b/.claude/commands/expand-project.md @@ -42,7 +42,7 @@ You are the **Project Expansion Assistant** - an expert at understanding existin # FIRST: Read and Understand Existing Project **Step 1:** Read the existing specification: -- Read `$ARGUMENTS/prompts/app_spec.txt` +- Read `$ARGUMENTS/.autocoder/prompts/app_spec.txt` **Step 2:** Present a summary to the user: @@ -231,4 +231,4 @@ If they want to add more, go back to Phase 1. # BEGIN -Start by reading the app specification file at `$ARGUMENTS/prompts/app_spec.txt`, then greet the user with a summary of their existing project and ask what they want to add. +Start by reading the app specification file at `$ARGUMENTS/.autocoder/prompts/app_spec.txt`, then greet the user with a summary of their existing project and ask what they want to add. diff --git a/.claude/commands/gsd-to-autocoder-spec.md b/.claude/commands/gsd-to-autocoder-spec.md index fc41cee..dbaeff6 100644 --- a/.claude/commands/gsd-to-autocoder-spec.md +++ b/.claude/commands/gsd-to-autocoder-spec.md @@ -5,6 +5,6 @@ description: Convert GSD codebase mapping to Autocoder app_spec.txt # GSD to Autocoder Spec -Convert `.planning/codebase/*.md` (from `/gsd:map-codebase`) to Autocoder's `prompts/app_spec.txt`. +Convert `.planning/codebase/*.md` (from `/gsd:map-codebase`) to Autocoder's `.autocoder/prompts/app_spec.txt`. @.claude/skills/gsd-to-autocoder-spec/SKILL.md diff --git a/.claude/skills/gsd-to-autocoder-spec/SKILL.md b/.claude/skills/gsd-to-autocoder-spec/SKILL.md index d4fba24..167caf0 100644 --- a/.claude/skills/gsd-to-autocoder-spec/SKILL.md +++ b/.claude/skills/gsd-to-autocoder-spec/SKILL.md @@ -9,7 +9,7 @@ description: | # GSD to Autocoder Spec Converter -Converts `.planning/codebase/*.md` (GSD mapping output) to `prompts/app_spec.txt` (Autocoder format). +Converts `.planning/codebase/*.md` (GSD mapping output) to `.autocoder/prompts/app_spec.txt` (Autocoder format). ## When to Use @@ -84,7 +84,7 @@ Extract: Create `prompts/` directory: ```bash -mkdir -p prompts +mkdir -p .autocoder/prompts ``` **Mapping GSD Documents to Autocoder Spec:** @@ -114,7 +114,7 @@ mkdir -p prompts **Write the spec file** using the XML format from [references/app-spec-format.md](references/app-spec-format.md): ```bash -cat > prompts/app_spec.txt << 'EOF' +cat > .autocoder/prompts/app_spec.txt << 'EOF' {from package.json or directory} @@ -173,9 +173,9 @@ EOF ### Step 5: Verify Generated Spec ```bash -head -100 prompts/app_spec.txt +head -100 .autocoder/prompts/app_spec.txt echo "---" -grep -c "User can\|System\|API\|Feature" prompts/app_spec.txt || echo "0" +grep -c "User can\|System\|API\|Feature" .autocoder/prompts/app_spec.txt || echo "0" ``` **Validation checklist:** @@ -194,7 +194,7 @@ Output: app_spec.txt generated from GSD codebase mapping. Source: .planning/codebase/*.md -Output: prompts/app_spec.txt +Output: .autocoder/prompts/app_spec.txt Next: Start Autocoder diff --git a/CLAUDE.md b/CLAUDE.md index d92db4e..91a3f4c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -125,6 +125,7 @@ Configuration in `pyproject.toml`: - `start.py` - CLI launcher with project creation/selection menu - `autonomous_agent_demo.py` - Entry point for running the agent +- `autocoder_paths.py` - Central path resolution with dual-path backward compatibility and migration - `agent.py` - Agent session loop using Claude Agent SDK - `client.py` - ClaudeSDKClient configuration with security hooks and MCP servers - `security.py` - Bash command allowlist validation (ALLOWED_COMMANDS whitelist) @@ -197,12 +198,17 @@ Keyboard shortcuts (press `?` for help): ### Project Structure for Generated Apps Projects can be stored in any directory (registered in `~/.autocoder/registry.db`). Each project contains: -- `prompts/app_spec.txt` - Application specification (XML format) -- `prompts/initializer_prompt.md` - First session prompt -- `prompts/coding_prompt.md` - Continuation session prompt -- `features.db` - SQLite database with feature test cases -- `.agent.lock` - Lock file to prevent multiple agent instances +- `.autocoder/prompts/app_spec.txt` - Application specification (XML format) +- `.autocoder/prompts/initializer_prompt.md` - First session prompt +- `.autocoder/prompts/coding_prompt.md` - Continuation session prompt +- `.autocoder/features.db` - SQLite database with feature test cases +- `.autocoder/.agent.lock` - Lock file to prevent multiple agent instances - `.autocoder/allowed_commands.yaml` - Project-specific bash command allowlist (optional) +- `.autocoder/.gitignore` - Ignores runtime files +- `CLAUDE.md` - Stays at project root (SDK convention) +- `app_spec.txt` - Root copy for agent template compatibility + +Legacy projects with files at root level (e.g., `features.db`, `prompts/`) are auto-migrated to `.autocoder/` on next agent start. Dual-path resolution ensures old and new layouts work transparently. ### Security Model @@ -364,12 +370,12 @@ Run coding agents using local models via Ollama v0.14.0+: ### Prompt Loading Fallback Chain -1. Project-specific: `{project_dir}/prompts/{name}.md` +1. Project-specific: `{project_dir}/.autocoder/prompts/{name}.md` (or legacy `{project_dir}/prompts/{name}.md`) 2. Base template: `.claude/templates/{name}.template.md` ### Agent Session Flow -1. Check if `features.db` has features (determines initializer vs coding agent) +1. Check if `.autocoder/features.db` has features (determines initializer vs coding agent) 2. Create ClaudeSDKClient with security settings 3. Send prompt and stream response 4. Auto-continue with 3-second delay between sessions diff --git a/api/database.py b/api/database.py index 2a732fe..4c5ef42 100644 --- a/api/database.py +++ b/api/database.py @@ -183,7 +183,8 @@ class ScheduleOverride(Base): def get_database_path(project_dir: Path) -> Path: """Return the path to the SQLite database for a project.""" - return project_dir / "features.db" + from autocoder_paths import get_features_db_path + return get_features_db_path(project_dir) def get_database_url(project_dir: Path) -> str: @@ -384,6 +385,10 @@ def create_database(project_dir: Path) -> tuple: db_url = get_database_url(project_dir) + # Ensure parent directory exists (for .autocoder/ layout) + db_path = get_database_path(project_dir) + db_path.parent.mkdir(parents=True, exist_ok=True) + # Choose journal mode based on filesystem type # WAL mode doesn't work reliably on network filesystems and can cause corruption is_network = _is_network_path(project_dir) diff --git a/autocoder_paths.py b/autocoder_paths.py new file mode 100644 index 0000000..7d1db6f --- /dev/null +++ b/autocoder_paths.py @@ -0,0 +1,290 @@ +""" +Autocoder Path Resolution +========================= + +Central module for resolving paths to autocoder-generated files within a project. + +Implements a dual-path resolution strategy for backward compatibility: + + 1. Check ``project_dir / ".autocoder" / X`` (new layout) + 2. Check ``project_dir / X`` (legacy root-level layout) + 3. Default to the new location for fresh projects + +This allows existing projects with root-level ``features.db``, ``.agent.lock``, +etc. to keep working while new projects store everything under ``.autocoder/``. + +The ``migrate_project_layout`` function can move an old-layout project to the +new layout safely, with full integrity checks for SQLite databases. +""" + +import logging +import shutil +import sqlite3 +from pathlib import Path + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# .gitignore content written into every .autocoder/ directory +# --------------------------------------------------------------------------- +_GITIGNORE_CONTENT = """\ +# Autocoder runtime files +features.db +features.db-wal +features.db-shm +assistant.db +assistant.db-wal +assistant.db-shm +.agent.lock +.devserver.lock +.claude_settings.json +.claude_assistant_settings.json +.claude_settings.expand.*.json +.progress_cache +""" + + +# --------------------------------------------------------------------------- +# Private helpers +# --------------------------------------------------------------------------- + +def _resolve_path(project_dir: Path, filename: str) -> Path: + """Resolve a file path using dual-path strategy. + + Checks the new ``.autocoder/`` location first, then falls back to the + legacy root-level location. If neither exists, returns the new location + so that newly-created files land in ``.autocoder/``. + """ + new = project_dir / ".autocoder" / filename + if new.exists(): + return new + old = project_dir / filename + if old.exists(): + return old + return new # default for new projects + + +def _resolve_dir(project_dir: Path, dirname: str) -> Path: + """Resolve a directory path using dual-path strategy. + + Same logic as ``_resolve_path`` but intended for directories such as + ``prompts/``. + """ + new = project_dir / ".autocoder" / dirname + if new.exists(): + return new + old = project_dir / dirname + if old.exists(): + return old + return new + + +# --------------------------------------------------------------------------- +# .autocoder directory management +# --------------------------------------------------------------------------- + +def get_autocoder_dir(project_dir: Path) -> Path: + """Return the ``.autocoder`` directory path. Does NOT create it.""" + return project_dir / ".autocoder" + + +def ensure_autocoder_dir(project_dir: Path) -> Path: + """Create the ``.autocoder/`` directory (if needed) and write its ``.gitignore``. + + Returns: + The path to the ``.autocoder`` directory. + """ + autocoder_dir = get_autocoder_dir(project_dir) + autocoder_dir.mkdir(parents=True, exist_ok=True) + + gitignore_path = autocoder_dir / ".gitignore" + gitignore_path.write_text(_GITIGNORE_CONTENT, encoding="utf-8") + + return autocoder_dir + + +# --------------------------------------------------------------------------- +# Dual-path file helpers +# --------------------------------------------------------------------------- + +def get_features_db_path(project_dir: Path) -> Path: + """Resolve the path to ``features.db``.""" + return _resolve_path(project_dir, "features.db") + + +def get_assistant_db_path(project_dir: Path) -> Path: + """Resolve the path to ``assistant.db``.""" + return _resolve_path(project_dir, "assistant.db") + + +def get_agent_lock_path(project_dir: Path) -> Path: + """Resolve the path to ``.agent.lock``.""" + return _resolve_path(project_dir, ".agent.lock") + + +def get_devserver_lock_path(project_dir: Path) -> Path: + """Resolve the path to ``.devserver.lock``.""" + return _resolve_path(project_dir, ".devserver.lock") + + +def get_claude_settings_path(project_dir: Path) -> Path: + """Resolve the path to ``.claude_settings.json``.""" + return _resolve_path(project_dir, ".claude_settings.json") + + +def get_claude_assistant_settings_path(project_dir: Path) -> Path: + """Resolve the path to ``.claude_assistant_settings.json``.""" + return _resolve_path(project_dir, ".claude_assistant_settings.json") + + +def get_progress_cache_path(project_dir: Path) -> Path: + """Resolve the path to ``.progress_cache``.""" + return _resolve_path(project_dir, ".progress_cache") + + +def get_prompts_dir(project_dir: Path) -> Path: + """Resolve the path to the ``prompts/`` directory.""" + return _resolve_dir(project_dir, "prompts") + + +# --------------------------------------------------------------------------- +# Non-dual-path helpers (always use new location) +# --------------------------------------------------------------------------- + +def get_expand_settings_path(project_dir: Path, uuid_hex: str) -> Path: + """Return the path for an ephemeral expand-session settings file. + + These files are short-lived and always stored in ``.autocoder/``. + """ + return project_dir / ".autocoder" / f".claude_settings.expand.{uuid_hex}.json" + + +# --------------------------------------------------------------------------- +# Lock-file safety check +# --------------------------------------------------------------------------- + +def has_agent_running(project_dir: Path) -> bool: + """Check whether any agent or dev-server lock file exists at either location. + + Inspects both the legacy root-level paths and the new ``.autocoder/`` + paths so that a running agent is detected regardless of project layout. + + Returns: + ``True`` if any ``.agent.lock`` or ``.devserver.lock`` exists. + """ + lock_names = (".agent.lock", ".devserver.lock") + for name in lock_names: + if (project_dir / name).exists(): + return True + if (project_dir / ".autocoder" / name).exists(): + return True + return False + + +# --------------------------------------------------------------------------- +# Migration +# --------------------------------------------------------------------------- + +def migrate_project_layout(project_dir: Path) -> list[str]: + """Migrate a project from the legacy root-level layout to ``.autocoder/``. + + The migration is incremental and safe: + + * If the agent is running (lock files present) the migration is skipped + entirely to avoid corrupting in-use databases. + * Each file/directory is migrated independently. If any single step + fails the error is logged and migration continues with the remaining + items. Partial migration is safe because the dual-path resolution + strategy will find files at whichever location they ended up in. + + Returns: + A list of human-readable descriptions of what was migrated, e.g. + ``["prompts/ -> .autocoder/prompts/", "features.db -> .autocoder/features.db"]``. + An empty list means nothing was migrated (either everything is + already migrated, or the agent is running). + """ + # Safety: refuse to migrate while an agent is running + if has_agent_running(project_dir): + logger.warning("Migration skipped: agent or dev-server is running for %s", project_dir) + return [] + + autocoder_dir = ensure_autocoder_dir(project_dir) + migrated: list[str] = [] + + # --- 1. Migrate prompts/ directory ----------------------------------- + try: + old_prompts = project_dir / "prompts" + new_prompts = autocoder_dir / "prompts" + if old_prompts.exists() and old_prompts.is_dir() and not new_prompts.exists(): + shutil.copytree(str(old_prompts), str(new_prompts)) + shutil.rmtree(str(old_prompts)) + migrated.append("prompts/ -> .autocoder/prompts/") + logger.info("Migrated prompts/ -> .autocoder/prompts/") + except Exception: + logger.warning("Failed to migrate prompts/ directory", exc_info=True) + + # --- 2. Migrate SQLite databases (features.db, assistant.db) --------- + db_names = ("features.db", "assistant.db") + for db_name in db_names: + try: + old_db = project_dir / db_name + new_db = autocoder_dir / db_name + if old_db.exists() and not new_db.exists(): + # Flush WAL to ensure all data is in the main database file + conn = sqlite3.connect(str(old_db)) + try: + cursor = conn.cursor() + cursor.execute("PRAGMA wal_checkpoint(TRUNCATE)") + finally: + conn.close() + + # Copy the main database file (WAL is now flushed) + shutil.copy2(str(old_db), str(new_db)) + + # Verify the copy is intact + verify_conn = sqlite3.connect(str(new_db)) + try: + verify_cursor = verify_conn.cursor() + result = verify_cursor.execute("PRAGMA integrity_check").fetchone() + if result is None or result[0] != "ok": + logger.error( + "Integrity check failed for migrated %s: %s", + db_name, result, + ) + # Remove the broken copy; old file stays in place + new_db.unlink(missing_ok=True) + continue + finally: + verify_conn.close() + + # Remove old database files (.db, .db-wal, .db-shm) + old_db.unlink(missing_ok=True) + for suffix in ("-wal", "-shm"): + wal_file = project_dir / f"{db_name}{suffix}" + wal_file.unlink(missing_ok=True) + + migrated.append(f"{db_name} -> .autocoder/{db_name}") + logger.info("Migrated %s -> .autocoder/%s", db_name, db_name) + except Exception: + logger.warning("Failed to migrate %s", db_name, exc_info=True) + + # --- 3. Migrate simple files ----------------------------------------- + simple_files = ( + ".agent.lock", + ".devserver.lock", + ".claude_settings.json", + ".claude_assistant_settings.json", + ".progress_cache", + ) + for filename in simple_files: + try: + old_file = project_dir / filename + new_file = autocoder_dir / filename + if old_file.exists() and not new_file.exists(): + shutil.move(str(old_file), str(new_file)) + migrated.append(f"{filename} -> .autocoder/{filename}") + logger.info("Migrated %s -> .autocoder/%s", filename, filename) + except Exception: + logger.warning("Failed to migrate %s", filename, exc_info=True) + + return migrated diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py index 16702f5..03ceb7f 100644 --- a/autonomous_agent_demo.py +++ b/autonomous_agent_demo.py @@ -193,6 +193,12 @@ def main() -> None: print("Use an absolute path or register the project first.") return + # Migrate project layout to .autocoder/ if needed (idempotent, safe) + from autocoder_paths import migrate_project_layout + migrated = migrate_project_layout(project_dir) + if migrated: + print(f"Migrated project files to .autocoder/: {', '.join(migrated)}", flush=True) + try: if args.agent_type: # Subprocess mode - spawned by orchestrator for a specific role diff --git a/client.py b/client.py index f394ebb..0b55295 100644 --- a/client.py +++ b/client.py @@ -360,7 +360,9 @@ def create_client( project_dir.mkdir(parents=True, exist_ok=True) # Write settings to a file in the project directory - settings_file = project_dir / ".claude_settings.json" + from autocoder_paths import get_claude_settings_path + settings_file = get_claude_settings_path(project_dir) + settings_file.parent.mkdir(parents=True, exist_ok=True) with open(settings_file, "w") as f: json.dump(security_settings, f, indent=2) diff --git a/progress.py b/progress.py index 1f17ae6..f0795b6 100644 --- a/progress.py +++ b/progress.py @@ -46,7 +46,8 @@ def has_features(project_dir: Path) -> bool: return True # Check SQLite database - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return False @@ -71,7 +72,8 @@ def count_passing_tests(project_dir: Path) -> tuple[int, int, int]: Returns: (passing_count, in_progress_count, total_count) """ - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return 0, 0, 0 @@ -120,7 +122,8 @@ def get_all_passing_features(project_dir: Path) -> list[dict]: Returns: List of dicts with id, category, name for each passing feature """ - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return [] @@ -144,7 +147,8 @@ def send_progress_webhook(passing: int, total: int, project_dir: Path) -> None: if not WEBHOOK_URL: return # Webhook not configured - cache_file = project_dir / PROGRESS_CACHE_FILE + from autocoder_paths import get_progress_cache_path + cache_file = get_progress_cache_path(project_dir) previous = 0 previous_passing_ids = set() diff --git a/prompts.py b/prompts.py index 137928c..b2ab11b 100644 --- a/prompts.py +++ b/prompts.py @@ -18,7 +18,8 @@ TEMPLATES_DIR = Path(__file__).parent / ".claude" / "templates" def get_project_prompts_dir(project_dir: Path) -> Path: """Get the prompts directory for a specific project.""" - return project_dir / "prompts" + from autocoder_paths import get_prompts_dir + return get_prompts_dir(project_dir) def load_prompt(name: str, project_dir: Path | None = None) -> str: @@ -190,9 +191,9 @@ def scaffold_project_prompts(project_dir: Path) -> Path: project_prompts = get_project_prompts_dir(project_dir) project_prompts.mkdir(parents=True, exist_ok=True) - # Create .autocoder directory for configuration files - autocoder_dir = project_dir / ".autocoder" - autocoder_dir.mkdir(parents=True, exist_ok=True) + # Create .autocoder directory with .gitignore for runtime files + from autocoder_paths import ensure_autocoder_dir + autocoder_dir = ensure_autocoder_dir(project_dir) # Define template mappings: (source_template, destination_name) templates = [ diff --git a/server/main.py b/server/main.py index 1b01f79..e46f436 100644 --- a/server/main.py +++ b/server/main.py @@ -222,7 +222,14 @@ if UI_DIST_DIR.exists(): raise HTTPException(status_code=404) # Try to serve the file directly - file_path = UI_DIST_DIR / path + file_path = (UI_DIST_DIR / path).resolve() + + # Ensure resolved path is within UI_DIST_DIR (prevent path traversal) + try: + file_path.relative_to(UI_DIST_DIR.resolve()) + except ValueError: + raise HTTPException(status_code=404) + if file_path.exists() and file_path.is_file(): return FileResponse(file_path) diff --git a/server/routers/devserver.py b/server/routers/devserver.py index 18f91ec..9892e3a 100644 --- a/server/routers/devserver.py +++ b/server/routers/devserver.py @@ -6,6 +6,7 @@ API endpoints for dev server control (start/stop) and configuration. Uses project registry for path lookups and project_config for command detection. """ +import logging import re import sys from pathlib import Path @@ -33,6 +34,9 @@ if str(_root) not in sys.path: sys.path.insert(0, str(_root)) from registry import get_project_path as registry_get_project_path +from security import extract_commands, get_effective_commands, is_command_allowed + +logger = logging.getLogger(__name__) def _get_project_path(project_name: str) -> Path | None: @@ -106,6 +110,45 @@ def get_project_devserver_manager(project_name: str): return get_devserver_manager(project_name, project_dir) +def validate_dev_command(command: str, project_dir: Path) -> None: + """ + Validate a dev server command against the security allowlist. + + Extracts all commands from the shell string and checks each against + the effective allowlist (global + org + project). Raises HTTPException + if any command is blocked or not allowed. + + Args: + command: The shell command string to validate + project_dir: Project directory for loading project-level allowlists + + Raises: + HTTPException 400: If the command fails validation + """ + commands = extract_commands(command) + if not commands: + raise HTTPException( + status_code=400, + detail="Could not parse command for security validation" + ) + + allowed_commands, blocked_commands = get_effective_commands(project_dir) + + for cmd in commands: + if cmd in blocked_commands: + logger.warning("Blocked dev server command '%s' (in blocklist) for project dir %s", cmd, project_dir) + raise HTTPException( + status_code=400, + detail=f"Command '{cmd}' is blocked and cannot be used as a dev server command" + ) + if not is_command_allowed(cmd, allowed_commands): + logger.warning("Rejected dev server command '%s' (not in allowlist) for project dir %s", cmd, project_dir) + raise HTTPException( + status_code=400, + detail=f"Command '{cmd}' is not in the allowed commands list" + ) + + # ============================================================================ # Endpoints # ============================================================================ @@ -167,7 +210,10 @@ async def start_devserver( detail="No dev command available. Configure a custom command or ensure project type can be detected." ) - # Now command is definitely str + # Validate command against security allowlist before execution + validate_dev_command(command, project_dir) + + # Now command is definitely str and validated success, message = await manager.start(command) return DevServerActionResponse( @@ -258,6 +304,9 @@ async def update_devserver_config( except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) else: + # Validate command against security allowlist before persisting + validate_dev_command(update.custom_command, project_dir) + # Set the custom command try: set_dev_command(project_dir, update.custom_command) diff --git a/server/routers/expand_project.py b/server/routers/expand_project.py index 50bf196..7f6c985 100644 --- a/server/routers/expand_project.py +++ b/server/routers/expand_project.py @@ -136,7 +136,8 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str): return # Verify project has app_spec.txt - spec_path = project_dir / "prompts" / "app_spec.txt" + from autocoder_paths import get_prompts_dir + spec_path = get_prompts_dir(project_dir) / "app_spec.txt" if not spec_path.exists(): await websocket.close(code=4004, reason="Project has no spec. Create spec first.") return diff --git a/server/routers/features.py b/server/routers/features.py index a0e1664..ab95843 100644 --- a/server/routers/features.py +++ b/server/routers/features.py @@ -134,7 +134,8 @@ async def list_features(project_name: str): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return FeatureListResponse(pending=[], in_progress=[], done=[]) @@ -329,7 +330,8 @@ async def get_dependency_graph(project_name: str): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return DependencyGraphResponse(nodes=[], edges=[]) @@ -393,7 +395,8 @@ async def get_feature(project_name: str, feature_id: int): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): raise HTTPException(status_code=404, detail="No features database found") diff --git a/server/routers/projects.py b/server/routers/projects.py index 0f76ff9..7ecfe08 100644 --- a/server/routers/projects.py +++ b/server/routers/projects.py @@ -269,8 +269,8 @@ async def delete_project(name: str, delete_files: bool = False): raise HTTPException(status_code=404, detail=f"Project '{name}' not found") # Check if agent is running - lock_file = project_dir / ".agent.lock" - if lock_file.exists(): + from autocoder_paths import has_agent_running + if has_agent_running(project_dir): raise HTTPException( status_code=409, detail="Cannot delete project while agent is running. Stop the agent first." @@ -398,8 +398,8 @@ async def reset_project(name: str, full_reset: bool = False): raise HTTPException(status_code=404, detail="Project directory not found") # Check if agent is running - lock_file = project_dir / ".agent.lock" - if lock_file.exists(): + from autocoder_paths import has_agent_running + if has_agent_running(project_dir): raise HTTPException( status_code=409, detail="Cannot reset project while agent is running. Stop the agent first." @@ -415,36 +415,58 @@ async def reset_project(name: str, full_reset: bool = False): deleted_files: list[str] = [] - # Files to delete in quick reset - quick_reset_files = [ - "features.db", - "features.db-wal", # WAL mode journal file - "features.db-shm", # WAL mode shared memory file - "assistant.db", - "assistant.db-wal", - "assistant.db-shm", - ".claude_settings.json", - ".claude_assistant_settings.json", + from autocoder_paths import ( + get_assistant_db_path, + get_claude_assistant_settings_path, + get_claude_settings_path, + get_features_db_path, + ) + + # Build list of files to delete using path helpers (finds files at current location) + # Plus explicit old-location fallbacks for backward compatibility + db_path = get_features_db_path(project_dir) + asst_path = get_assistant_db_path(project_dir) + reset_files: list[Path] = [ + db_path, + db_path.with_suffix(".db-wal"), + db_path.with_suffix(".db-shm"), + asst_path, + asst_path.with_suffix(".db-wal"), + asst_path.with_suffix(".db-shm"), + get_claude_settings_path(project_dir), + get_claude_assistant_settings_path(project_dir), + # Also clean old root-level locations if they exist + project_dir / "features.db", + project_dir / "features.db-wal", + project_dir / "features.db-shm", + project_dir / "assistant.db", + project_dir / "assistant.db-wal", + project_dir / "assistant.db-shm", + project_dir / ".claude_settings.json", + project_dir / ".claude_assistant_settings.json", ] - for filename in quick_reset_files: - file_path = project_dir / filename + for file_path in reset_files: if file_path.exists(): try: + relative = file_path.relative_to(project_dir) file_path.unlink() - deleted_files.append(filename) + deleted_files.append(str(relative)) except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to delete {filename}: {e}") + raise HTTPException(status_code=500, detail=f"Failed to delete {file_path.name}: {e}") # Full reset: also delete prompts directory if full_reset: - prompts_dir = project_dir / "prompts" - if prompts_dir.exists(): - try: - shutil.rmtree(prompts_dir) - deleted_files.append("prompts/") - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to delete prompts/: {e}") + from autocoder_paths import get_prompts_dir + # Delete prompts from both possible locations + for prompts_dir in [get_prompts_dir(project_dir), project_dir / "prompts"]: + if prompts_dir.exists(): + try: + relative = prompts_dir.relative_to(project_dir) + shutil.rmtree(prompts_dir) + deleted_files.append(f"{relative}/") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to delete prompts: {e}") return { "success": True, diff --git a/server/routers/spec_creation.py b/server/routers/spec_creation.py index 87f79a6..c29da6b 100644 --- a/server/routers/spec_creation.py +++ b/server/routers/spec_creation.py @@ -124,7 +124,8 @@ async def get_spec_file_status(project_name: str): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - status_file = project_dir / "prompts" / ".spec_status.json" + from autocoder_paths import get_prompts_dir + status_file = get_prompts_dir(project_dir) / ".spec_status.json" if not status_file.exists(): return SpecFileStatus( diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py index 1fb26e1..2ac41fc 100755 --- a/server/services/assistant_chat_session.py +++ b/server/services/assistant_chat_session.py @@ -76,7 +76,8 @@ def get_system_prompt(project_name: str, project_dir: Path) -> str: """Generate the system prompt for the assistant with project context.""" # Try to load app_spec.txt for context app_spec_content = "" - app_spec_path = project_dir / "prompts" / "app_spec.txt" + from autocoder_paths import get_prompts_dir + app_spec_path = get_prompts_dir(project_dir) / "app_spec.txt" if app_spec_path.exists(): try: app_spec_content = app_spec_path.read_text(encoding="utf-8") @@ -235,7 +236,9 @@ class AssistantChatSession: "allow": permissions_list, }, } - settings_file = self.project_dir / ".claude_assistant_settings.json" + from autocoder_paths import get_claude_assistant_settings_path + settings_file = get_claude_assistant_settings_path(self.project_dir) + settings_file.parent.mkdir(parents=True, exist_ok=True) with open(settings_file, "w") as f: json.dump(security_settings, f, indent=2) diff --git a/server/services/assistant_database.py b/server/services/assistant_database.py index 0dbfdd3..b91a388 100644 --- a/server/services/assistant_database.py +++ b/server/services/assistant_database.py @@ -63,7 +63,8 @@ class ConversationMessage(Base): def get_db_path(project_dir: Path) -> Path: """Get the path to the assistant database for a project.""" - return project_dir / "assistant.db" + from autocoder_paths import get_assistant_db_path + return get_assistant_db_path(project_dir) def get_engine(project_dir: Path): diff --git a/server/services/dev_server_manager.py b/server/services/dev_server_manager.py index 5acfbc8..41dac02 100644 --- a/server/services/dev_server_manager.py +++ b/server/services/dev_server_manager.py @@ -24,6 +24,7 @@ from typing import Awaitable, Callable, Literal, Set import psutil from registry import list_registered_projects +from security import extract_commands, get_effective_commands, is_command_allowed from server.utils.process_utils import kill_process_tree logger = logging.getLogger(__name__) @@ -114,7 +115,8 @@ class DevServerProcessManager: self._callbacks_lock = threading.Lock() # Lock file to prevent multiple instances (stored in project directory) - self.lock_file = self.project_dir / ".devserver.lock" + from autocoder_paths import get_devserver_lock_path + self.lock_file = get_devserver_lock_path(self.project_dir) @property def status(self) -> Literal["stopped", "running", "crashed"]: @@ -304,6 +306,20 @@ class DevServerProcessManager: if not self.project_dir.exists(): return False, f"Project directory does not exist: {self.project_dir}" + # Defense-in-depth: validate command against security allowlist + commands = extract_commands(command) + if not commands: + return False, "Could not parse command for security validation" + + allowed_commands, blocked_commands = get_effective_commands(self.project_dir) + for cmd in commands: + if cmd in blocked_commands: + logger.warning("Blocked dev server command '%s' (in blocklist) for %s", cmd, self.project_name) + return False, f"Command '{cmd}' is blocked and cannot be used as a dev server command" + if not is_command_allowed(cmd, allowed_commands): + logger.warning("Rejected dev server command '%s' (not in allowlist) for %s", cmd, self.project_name) + return False, f"Command '{cmd}' is not in the allowed commands list" + self._command = command self._detected_url = None # Reset URL detection @@ -487,8 +503,18 @@ def cleanup_orphaned_devserver_locks() -> int: if not project_path.exists(): continue - lock_file = project_path / ".devserver.lock" - if not lock_file.exists(): + # Check both legacy and new locations for lock files + from autocoder_paths import get_autocoder_dir + lock_locations = [ + project_path / ".devserver.lock", + get_autocoder_dir(project_path) / ".devserver.lock", + ] + lock_file = None + for candidate in lock_locations: + if candidate.exists(): + lock_file = candidate + break + if lock_file is None: continue try: diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py index 6829372..2960e2e 100644 --- a/server/services/expand_chat_session.py +++ b/server/services/expand_chat_session.py @@ -128,7 +128,8 @@ class ExpandChatSession: return # Verify project has existing spec - spec_path = self.project_dir / "prompts" / "app_spec.txt" + from autocoder_paths import get_prompts_dir + spec_path = get_prompts_dir(self.project_dir) / "app_spec.txt" if not spec_path.exists(): yield { "type": "error", @@ -166,7 +167,9 @@ class ExpandChatSession: ], }, } - settings_file = self.project_dir / f".claude_settings.expand.{uuid.uuid4().hex}.json" + from autocoder_paths import get_expand_settings_path + settings_file = get_expand_settings_path(self.project_dir, uuid.uuid4().hex) + settings_file.parent.mkdir(parents=True, exist_ok=True) self._settings_file = settings_file with open(settings_file, "w", encoding="utf-8") as f: json.dump(security_settings, f, indent=2) diff --git a/server/services/process_manager.py b/server/services/process_manager.py index fd1a192..7f461c5 100644 --- a/server/services/process_manager.py +++ b/server/services/process_manager.py @@ -92,7 +92,8 @@ class AgentProcessManager: self._callbacks_lock = threading.Lock() # Lock file to prevent multiple instances (stored in project directory) - self.lock_file = self.project_dir / ".agent.lock" + from autocoder_paths import get_agent_lock_path + self.lock_file = get_agent_lock_path(self.project_dir) @property def status(self) -> Literal["stopped", "running", "paused", "crashed"]: @@ -579,8 +580,18 @@ def cleanup_orphaned_locks() -> int: if not project_path.exists(): continue - lock_file = project_path / ".agent.lock" - if not lock_file.exists(): + # Check both legacy and new locations for lock files + from autocoder_paths import get_autocoder_dir + lock_locations = [ + project_path / ".agent.lock", + get_autocoder_dir(project_path) / ".agent.lock", + ] + lock_file = None + for candidate in lock_locations: + if candidate.exists(): + lock_file = candidate + break + if lock_file is None: continue try: diff --git a/server/services/scheduler_service.py b/server/services/scheduler_service.py index eb22a3a..578aed2 100644 --- a/server/services/scheduler_service.py +++ b/server/services/scheduler_service.py @@ -92,8 +92,9 @@ class SchedulerService: async def _load_project_schedules(self, project_name: str, project_dir: Path) -> int: """Load schedules for a single project. Returns count of schedules loaded.""" from api.database import Schedule, create_database + from autocoder_paths import get_features_db_path - db_path = project_dir / "features.db" + db_path = get_features_db_path(project_dir) if not db_path.exists(): return 0 @@ -567,8 +568,9 @@ class SchedulerService: ): """Check if a project should be started on server startup.""" from api.database import Schedule, ScheduleOverride, create_database + from autocoder_paths import get_features_db_path - db_path = project_dir / "features.db" + db_path = get_features_db_path(project_dir) if not db_path.exists(): return diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py index c86bda2..ce49ea4 100644 --- a/server/services/spec_chat_session.py +++ b/server/services/spec_chat_session.py @@ -125,7 +125,8 @@ class SpecChatSession: # Delete app_spec.txt so Claude can create it fresh # The SDK requires reading existing files before writing, but app_spec.txt is created new # Note: We keep initializer_prompt.md so Claude can read and update the template - prompts_dir = self.project_dir / "prompts" + from autocoder_paths import get_prompts_dir + prompts_dir = get_prompts_dir(self.project_dir) app_spec_path = prompts_dir / "app_spec.txt" if app_spec_path.exists(): app_spec_path.unlink() @@ -145,7 +146,9 @@ class SpecChatSession: ], }, } - settings_file = self.project_dir / ".claude_settings.json" + from autocoder_paths import get_claude_settings_path + settings_file = get_claude_settings_path(self.project_dir) + settings_file.parent.mkdir(parents=True, exist_ok=True) with open(settings_file, "w") as f: json.dump(security_settings, f, indent=2) From 94e0b05cb1af4aa501ff8dd742e2f4a6484ed657 Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 13:16:24 +0200 Subject: [PATCH 19/24] refactor: optimize token usage, deduplicate code, fix bugs across agents Token reduction (~40% per session, ~2.3M fewer tokens per 200-feature project): - Agent-type-specific tool lists: coding 9, testing 5, init 5 (was 19 for all) - Right-sized max_turns: coding 300, testing 100 (was 1000 for all) - Trimmed coding prompt template (~150 lines removed) - Streamlined testing prompt with batch support - YOLO mode now strips browser testing instructions from prompt - Added Grep, WebFetch, WebSearch to expand project session Performance improvements: - Rate limit retries start at ~15s with jitter (was fixed 60s) - Post-spawn delay reduced to 0.5s (was 2s) - Orchestrator consolidated to 1 DB query per loop (was 5-7) - Testing agents batch 3 features per session (was 1) - Smart context compaction preserves critical state, discards noise Bug fixes: - Removed ghost feature_release_testing MCP tool (wasted tokens every test session) - Forward all 9 Vertex AI env vars to chat sessions (was missing 3) - Fix DetachedInstanceError risk in test batch ORM access - Prevent duplicate testing of same features in parallel mode Code deduplication: - _get_project_path(): 9 copies -> 1 shared utility (project_helpers.py) - validate_project_name(): 9 copies -> 2 variants in 1 file (validation.py) - ROOT_DIR: 10 copies -> 1 definition (chat_constants.py) - API_ENV_VARS: 4 copies -> 1 source of truth (env_constants.py) Security hardening: - Unified sensitive directory blocklist (14 dirs, was two divergent lists) - Cached get_blocked_paths() for O(1) directory listing checks - Terminal security warning when ALLOW_REMOTE=1 exposes WebSocket - 20 new security tests for EXTRA_READ_PATHS blocking - Extracted _validate_command_list() and _validate_pkill_processes() helpers Type safety: - 87 mypy errors -> 0 across 58 source files - Installed types-PyYAML for proper yaml stub types - Fixed SQLAlchemy Column[T] coercions across all routers Dead code removed: - 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs - 7 unused npm packages removed (Radix UI components with 0 imports) - AgentAvatar.tsx reduced from 615 -> 119 lines (SVGs extracted to mascotData.tsx) New CLI options: - --testing-batch-size (1-5) for parallel mode test batching - --testing-feature-ids for direct multi-feature testing Co-Authored-By: Claude Opus 4.5 --- .claude/templates/coding_prompt.template.md | 164 +- .claude/templates/testing_prompt.template.md | 106 +- .gitignore | 2 + CLAUDE.md | 1 - CUSTOM_UPDATES.md | 228 --- PHASE3_SPEC.md | 1591 ------------------ SAMPLE_PROMPT.md | 22 - agent.py | 10 +- api/dependency_resolver.py | 10 +- autonomous_agent_demo.py | 27 +- client.py | 225 ++- env_constants.py | 27 + mcp_server/feature_mcp.py | 2 +- parallel_orchestrator.py | 542 +++--- prompts.py | 132 +- rate_limit_utils.py | 18 +- requirements.txt | 1 + security.py | 213 +-- server/main.py | 12 +- server/routers/agent.py | 28 +- server/routers/assistant_chat.py | 29 +- server/routers/devserver.py | 26 +- server/routers/expand_project.py | 17 +- server/routers/features.py | 17 +- server/routers/filesystem.py | 30 +- server/routers/projects.py | 21 +- server/routers/schedules.py | 93 +- server/routers/settings.py | 5 +- server/routers/spec_creation.py | 23 +- server/routers/terminal.py | 32 +- server/services/assistant_chat_session.py | 26 +- server/services/assistant_database.py | 3 +- server/services/chat_constants.py | 57 + server/services/expand_chat_session.py | 45 +- server/services/process_manager.py | 4 +- server/services/spec_chat_session.py | 50 +- server/services/terminal_manager.py | 4 +- server/utils/project_helpers.py | 32 + server/utils/validation.py | 40 +- server/websocket.py | 53 +- start_ui.py | 2 +- summary.md | 146 ++ test_client.py | 162 +- test_rate_limit_utils.py | 17 +- test_security.py | 34 +- ui/package-lock.json | 322 ---- ui/package.json | 7 - ui/src/components/AgentAvatar.tsx | 511 +----- ui/src/components/mascotData.tsx | 529 ++++++ ui/src/components/ui/popover.tsx | 87 - ui/src/components/ui/radio-group.tsx | 45 - ui/src/components/ui/scroll-area.tsx | 56 - ui/src/components/ui/select.tsx | 190 --- ui/src/components/ui/tabs.tsx | 89 - ui/src/components/ui/toggle.tsx | 47 - ui/src/components/ui/tooltip.tsx | 61 - ui/vite.config.ts | 1 - 57 files changed, 1974 insertions(+), 4300 deletions(-) delete mode 100644 CUSTOM_UPDATES.md delete mode 100644 PHASE3_SPEC.md delete mode 100644 SAMPLE_PROMPT.md create mode 100644 env_constants.py create mode 100644 server/services/chat_constants.py create mode 100644 server/utils/project_helpers.py create mode 100644 summary.md create mode 100644 ui/src/components/mascotData.tsx delete mode 100644 ui/src/components/ui/popover.tsx delete mode 100644 ui/src/components/ui/radio-group.tsx delete mode 100644 ui/src/components/ui/scroll-area.tsx delete mode 100644 ui/src/components/ui/select.tsx delete mode 100644 ui/src/components/ui/tabs.tsx delete mode 100644 ui/src/components/ui/toggle.tsx delete mode 100644 ui/src/components/ui/tooltip.tsx diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md index 9322404..c8d3ba6 100644 --- a/.claude/templates/coding_prompt.template.md +++ b/.claude/templates/coding_prompt.template.md @@ -49,51 +49,21 @@ Otherwise, start servers manually and document the process. #### TEST-DRIVEN DEVELOPMENT MINDSET (CRITICAL) -Features are **test cases** that drive development. This is test-driven development: +Features are **test cases** that drive development. If functionality doesn't exist, **BUILD IT** -- you are responsible for implementing ALL required functionality. Missing pages, endpoints, database tables, or components are NOT blockers; they are your job to create. -- **If you can't test a feature because functionality doesn't exist → BUILD IT** -- You are responsible for implementing ALL required functionality -- Never assume another process will build it later -- "Missing functionality" is NOT a blocker - it's your job to create it - -**Example:** Feature says "User can filter flashcards by difficulty level" -- WRONG: "Flashcard page doesn't exist yet" → skip feature -- RIGHT: "Flashcard page doesn't exist yet" → build flashcard page → implement filter → test feature - -**Note:** Your feature has been pre-assigned by the orchestrator. Use `feature_get_by_id` with your assigned feature ID to get the details. - -Once you've retrieved the feature, **mark it as in-progress** (if not already): +**Note:** Your feature has been pre-assigned by the orchestrator. Use `feature_get_by_id` with your assigned feature ID to get the details. Then mark it as in-progress: ``` -# Mark feature as in-progress Use the feature_mark_in_progress tool with feature_id={your_assigned_id} ``` If you get "already in-progress" error, that's OK - continue with implementation. -Focus on completing one feature perfectly and completing its testing steps in this session before moving on to other features. -It's ok if you only complete one feature in this session, as there will be more sessions later that continue to make progress. +Focus on completing one feature perfectly in this session. It's ok if you only complete one feature, as more sessions will follow. #### When to Skip a Feature (EXTREMELY RARE) -**Skipping should almost NEVER happen.** Only skip for truly external blockers you cannot control: - -- **External API not configured**: Third-party service credentials missing (e.g., Stripe keys, OAuth secrets) -- **External service unavailable**: Dependency on service that's down or inaccessible -- **Environment limitation**: Hardware or system requirement you cannot fulfill - -**NEVER skip because:** - -| Situation | Wrong Action | Correct Action | -|-----------|--------------|----------------| -| "Page doesn't exist" | Skip | Create the page | -| "API endpoint missing" | Skip | Implement the endpoint | -| "Database table not ready" | Skip | Create the migration | -| "Component not built" | Skip | Build the component | -| "No data to test with" | Skip | Create test data or build data entry flow | -| "Feature X needs to be done first" | Skip | Build feature X as part of this feature | - -If a feature requires building other functionality first, **build that functionality**. You are the coding agent - your job is to make the feature work, not to defer it. +Only skip for truly external blockers: missing third-party credentials (Stripe keys, OAuth secrets), unavailable external services, or unfulfillable environment requirements. **NEVER** skip because a page, endpoint, component, or data doesn't exist yet -- build it. If a feature requires other functionality first, build that functionality as part of this feature. If you must skip (truly external blocker only): @@ -139,130 +109,22 @@ Use browser automation tools: ### STEP 5.5: MANDATORY VERIFICATION CHECKLIST (BEFORE MARKING ANY TEST PASSING) -**You MUST complete ALL of these checks before marking any feature as "passes": true** +**Complete ALL applicable checks before marking any feature as passing:** -#### Security Verification (for protected features) - -- [ ] Feature respects user role permissions -- [ ] Unauthenticated access is blocked (redirects to login) -- [ ] API endpoint checks authorization (returns 401/403 appropriately) -- [ ] Cannot access other users' data by manipulating URLs - -#### Real Data Verification (CRITICAL - NO MOCK DATA) - -- [ ] Created unique test data via UI (e.g., "TEST_12345_VERIFY_ME") -- [ ] Verified the EXACT data I created appears in UI -- [ ] Refreshed page - data persists (proves database storage) -- [ ] Deleted the test data - verified it's gone everywhere -- [ ] NO unexplained data appeared (would indicate mock data) -- [ ] Dashboard/counts reflect real numbers after my changes -- [ ] **Ran extended mock data grep (STEP 5.6) - no hits in src/ (excluding tests)** -- [ ] **Verified no globalThis, devStore, or dev-store patterns** -- [ ] **Server restart test passed (STEP 5.7) - data persists across restart** - -#### Navigation Verification - -- [ ] All buttons on this page link to existing routes -- [ ] No 404 errors when clicking any interactive element -- [ ] Back button returns to correct previous page -- [ ] Related links (edit, view, delete) have correct IDs in URLs - -#### Integration Verification - -- [ ] Console shows ZERO JavaScript errors -- [ ] Network tab shows successful API calls (no 500s) -- [ ] Data returned from API matches what UI displays -- [ ] Loading states appeared during API calls -- [ ] Error states handle failures gracefully +- **Security:** Feature respects role permissions; unauthenticated access blocked; API checks auth (401/403); no cross-user data leaks via URL manipulation +- **Real Data:** Create unique test data via UI, verify it appears, refresh to confirm persistence, delete and verify removal. No unexplained data (indicates mocks). Dashboard counts reflect real numbers +- **Mock Data Grep:** Run STEP 5.6 grep checks - no hits in src/ (excluding tests). No globalThis, devStore, or dev-store patterns +- **Server Restart:** For data features, run STEP 5.7 - data persists across server restart +- **Navigation:** All buttons link to existing routes, no 404s, back button works, edit/view/delete links have correct IDs +- **Integration:** Zero JS console errors, no 500s in network tab, API data matches UI, loading/error states work ### STEP 5.6: MOCK DATA DETECTION (Before marking passing) -**Run ALL these grep checks. Any hits in src/ (excluding test files) require investigation:** - -```bash -# Common exclusions for test files -EXCLUDE="--exclude=*.test.* --exclude=*.spec.* --exclude=*__test__* --exclude=*__mocks__*" - -# 1. In-memory storage patterns (CRITICAL - catches dev-store) -grep -r "globalThis\." --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ -grep -r "dev-store\|devStore\|DevStore\|mock-db\|mockDb" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ - -# 2. Mock data variables -grep -r "mockData\|fakeData\|sampleData\|dummyData\|testData" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ - -# 3. TODO/incomplete markers -grep -r "TODO.*real\|TODO.*database\|TODO.*API\|STUB\|MOCK" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ - -# 4. Development-only conditionals -grep -r "isDevelopment\|isDev\|process\.env\.NODE_ENV.*development" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ - -# 5. In-memory collections as data stores -grep -r "new Map\(\)\|new Set\(\)" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ 2>/dev/null -``` - -**Rule:** If ANY grep returns results in production code → investigate → FIX before marking passing. - -**Runtime verification:** -1. Create unique data (e.g., "TEST_12345") → verify in UI → delete → verify gone -2. Check database directly - all displayed data must come from real DB queries -3. If unexplained data appears, it's mock data - fix before marking passing. +Before marking a feature passing, grep for mock/placeholder data patterns in src/ (excluding test files): `globalThis`, `devStore`, `dev-store`, `mockDb`, `mockData`, `fakeData`, `sampleData`, `dummyData`, `testData`, `TODO.*real`, `TODO.*database`, `STUB`, `MOCK`, `isDevelopment`, `isDev`. Any hits in production code must be investigated and fixed. Also create unique test data (e.g., "TEST_12345"), verify it appears in UI, then delete and confirm removal - unexplained data indicates mock implementations. ### STEP 5.7: SERVER RESTART PERSISTENCE TEST (MANDATORY for data features) -**When required:** Any feature involving CRUD operations or data persistence. - -**This test is NON-NEGOTIABLE. It catches in-memory storage implementations that pass all other tests.** - -**Steps:** - -1. Create unique test data via UI or API (e.g., item named "RESTART_TEST_12345") -2. Verify data appears in UI and API response - -3. **STOP the server completely:** - ```bash - # Kill by port (safer - only kills the dev server, not VS Code/Claude Code/etc.) - # Unix/macOS: - lsof -ti :${PORT:-3000} | xargs kill -TERM 2>/dev/null || true - sleep 3 - lsof -ti :${PORT:-3000} | xargs kill -9 2>/dev/null || true - sleep 2 - - # Windows alternative (use if lsof not available): - # netstat -ano | findstr :${PORT:-3000} | findstr LISTENING - # taskkill /F /PID 2>nul - - # Verify server is stopped - if lsof -ti :${PORT:-3000} > /dev/null 2>&1; then - echo "ERROR: Server still running on port ${PORT:-3000}!" - exit 1 - fi - ``` - -4. **RESTART the server:** - ```bash - ./init.sh & - sleep 15 # Allow server to fully start - # Verify server is responding - if ! curl -f http://localhost:${PORT:-3000}/api/health && ! curl -f http://localhost:${PORT:-3000}; then - echo "ERROR: Server failed to start after restart" - exit 1 - fi - ``` - -5. **Query for test data - it MUST still exist** - - Via UI: Navigate to data location, verify data appears - - Via API: `curl http://localhost:${PORT:-3000}/api/items` - verify data in response - -6. **If data is GONE:** Implementation uses in-memory storage → CRITICAL FAIL - - Run all grep commands from STEP 5.6 to identify the mock pattern - - You MUST fix the in-memory storage implementation before proceeding - - Replace in-memory storage with real database queries - -7. **Clean up test data** after successful verification - -**Why this test exists:** In-memory stores like `globalThis.devStore` pass all other tests because data persists during a single server run. Only a full server restart reveals this bug. Skipping this step WILL allow dev-store implementations to slip through. - -**YOLO Mode Note:** Even in YOLO mode, this verification is MANDATORY for data features. Use curl instead of browser automation. +For any feature involving CRUD or data persistence: create unique test data (e.g., "RESTART_TEST_12345"), verify it exists, then fully stop and restart the dev server. After restart, verify the test data still exists. If data is gone, the implementation uses in-memory storage -- run STEP 5.6 greps, find the mock pattern, and replace with real database queries. Clean up test data after verification. This test catches in-memory stores like `globalThis.devStore` that pass all other tests but lose data on restart. ### STEP 6: UPDATE FEATURE STATUS (CAREFULLY!) diff --git a/.claude/templates/testing_prompt.template.md b/.claude/templates/testing_prompt.template.md index a7e2bbe..c8011a3 100644 --- a/.claude/templates/testing_prompt.template.md +++ b/.claude/templates/testing_prompt.template.md @@ -1,58 +1,29 @@ ## YOUR ROLE - TESTING AGENT -You are a **testing agent** responsible for **regression testing** previously-passing features. +You are a **testing agent** responsible for **regression testing** previously-passing features. If you find a regression, you must fix it. -Your job is to ensure that features marked as "passing" still work correctly. If you find a regression (a feature that no longer works), you must fix it. +## ASSIGNED FEATURES FOR REGRESSION TESTING -### STEP 1: GET YOUR BEARINGS (MANDATORY) +You are assigned to test the following features: {{TESTING_FEATURE_IDS}} -Start by orienting yourself: +### Workflow for EACH feature: +1. Call `feature_get_by_id` with the feature ID +2. Read the feature's verification steps +3. Test the feature in the browser +4. Call `feature_mark_passing` or `feature_mark_failing` +5. Move to the next feature -```bash -# 1. See your working directory -pwd +--- -# 2. List files to understand project structure -ls -la +### STEP 1: GET YOUR ASSIGNED FEATURE(S) -# 3. Read progress notes from previous sessions (last 200 lines) -tail -200 claude-progress.txt - -# 4. Check recent git history -git log --oneline -10 -``` - -Then use MCP tools to check feature status: +Your features have been pre-assigned by the orchestrator. For each feature ID listed above, use `feature_get_by_id` to get the details: ``` -# 5. Get progress statistics -Use the feature_get_stats tool +Use the feature_get_by_id tool with feature_id= ``` -### STEP 2: START SERVERS (IF NOT RUNNING) - -If `init.sh` exists, run it: - -```bash -chmod +x init.sh -./init.sh -``` - -Otherwise, start servers manually. - -### STEP 3: GET YOUR ASSIGNED FEATURE - -Your feature has been pre-assigned by the orchestrator. Use `feature_get_by_id` to get the details: - -``` -Use the feature_get_by_id tool with feature_id={your_assigned_id} -``` - -The orchestrator has already claimed this feature for testing (set `testing_in_progress=true`). - -**CRITICAL:** You MUST call `feature_release_testing` when done, regardless of pass/fail. - -### STEP 4: VERIFY THE FEATURE +### STEP 2: VERIFY THE FEATURE **CRITICAL:** You MUST verify the feature through the actual UI using browser automation. @@ -81,21 +52,11 @@ Use browser automation tools: - browser_console_messages - Get browser console output (check for errors) - browser_network_requests - Monitor API calls -### STEP 5: HANDLE RESULTS +### STEP 3: HANDLE RESULTS #### If the feature PASSES: -The feature still works correctly. Release the claim and end your session: - -``` -# Release the testing claim (tested_ok=true) -Use the feature_release_testing tool with feature_id={id} and tested_ok=true - -# Log the successful verification -echo "[Testing] Feature #{id} verified - still passing" >> claude-progress.txt -``` - -**DO NOT** call feature_mark_passing again - it's already passing. +The feature still works correctly. **DO NOT** call feature_mark_passing again -- it's already passing. End your session. #### If the feature FAILS (regression found): @@ -125,13 +86,7 @@ A regression has been introduced. You MUST fix it: Use the feature_mark_passing tool with feature_id={id} ``` -6. **Release the testing claim:** - ``` - Use the feature_release_testing tool with feature_id={id} and tested_ok=false - ``` - Note: tested_ok=false because we found a regression (even though we fixed it). - -7. **Commit the fix:** +6. **Commit the fix:** ```bash git add . git commit -m "Fix regression in [feature name] @@ -141,14 +96,6 @@ A regression has been introduced. You MUST fix it: - Verified with browser automation" ``` -### STEP 6: UPDATE PROGRESS AND END - -Update `claude-progress.txt`: - -```bash -echo "[Testing] Session complete - verified/fixed feature #{id}" >> claude-progress.txt -``` - --- ## AVAILABLE MCP TOOLS @@ -156,12 +103,11 @@ echo "[Testing] Session complete - verified/fixed feature #{id}" >> claude-progr ### Feature Management - `feature_get_stats` - Get progress overview (passing/in_progress/total counts) - `feature_get_by_id` - Get your assigned feature details -- `feature_release_testing` - **REQUIRED** - Release claim after testing (pass tested_ok=true/false) - `feature_mark_failing` - Mark a feature as failing (when you find a regression) - `feature_mark_passing` - Mark a feature as passing (after fixing a regression) ### Browser Automation (Playwright) -All interaction tools have **built-in auto-wait** - no manual timeouts needed. +All interaction tools have **built-in auto-wait** -- no manual timeouts needed. - `browser_navigate` - Navigate to URL - `browser_take_screenshot` - Capture screenshot @@ -178,9 +124,7 @@ All interaction tools have **built-in auto-wait** - no manual timeouts needed. ## IMPORTANT REMINDERS -**Your Goal:** Verify that passing features still work, and fix any regressions found. - -**This Session's Goal:** Test ONE feature thoroughly. +**Your Goal:** Test each assigned feature thoroughly. Verify it still works, and fix any regression found. Process ALL features in your list before ending your session. **Quality Bar:** - Zero console errors @@ -188,21 +132,15 @@ All interaction tools have **built-in auto-wait** - no manual timeouts needed. - Visual appearance correct - API calls succeed -**CRITICAL - Always release your claim:** -- Call `feature_release_testing` when done, whether pass or fail -- Pass `tested_ok=true` if the feature passed -- Pass `tested_ok=false` if you found a regression - **If you find a regression:** 1. Mark the feature as failing immediately 2. Fix the issue 3. Verify the fix with browser automation 4. Mark as passing only after thorough verification -5. Release the testing claim with `tested_ok=false` -6. Commit the fix +5. Commit the fix -**You have one iteration.** Focus on testing ONE feature thoroughly. +**You have one iteration.** Test all assigned features before ending. --- -Begin by running Step 1 (Get Your Bearings). +Begin by running Step 1 for the first feature in your assigned list. diff --git a/.gitignore b/.gitignore index bb20118..2639f8d 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,8 @@ ui/playwright-report/ .dmypy.json dmypy.json +.ruff_cache/ + # =================== # Claude Code # =================== diff --git a/CLAUDE.md b/CLAUDE.md index 91a3f4c..c9ef839 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -324,7 +324,6 @@ blocked_commands: - `examples/project_allowed_commands.yaml` - Project config example (all commented by default) - `examples/org_config.yaml` - Org config example (all commented by default) - `examples/README.md` - Comprehensive guide with use cases, testing, and troubleshooting -- `PHASE3_SPEC.md` - Specification for mid-session approval feature (future enhancement) ### Ollama Local Models (Optional) diff --git a/CUSTOM_UPDATES.md b/CUSTOM_UPDATES.md deleted file mode 100644 index f211696..0000000 --- a/CUSTOM_UPDATES.md +++ /dev/null @@ -1,228 +0,0 @@ -# Custom Updates - AutoCoder - -This document tracks all customizations made to AutoCoder that deviate from the upstream repository. Reference this file before any updates to preserve these changes. - ---- - -## Table of Contents - -1. [UI Theme Customization](#1-ui-theme-customization) -2. [Playwright Browser Configuration](#2-playwright-browser-configuration) -3. [Update Checklist](#update-checklist) - ---- - -## 1. UI Theme Customization - -### Overview - -The UI has been customized from the default **neobrutalism** style to a clean **Twitter/Supabase-style** design. - -**Design Changes:** -- No shadows -- Thin borders (1px) -- Rounded corners (1.3rem base) -- Blue accent color (Twitter blue) -- Clean typography (Open Sans) - -### Modified Files - -#### `ui/src/styles/custom-theme.css` - -**Purpose:** Main theme override file that replaces neo design with clean Twitter style. - -**Key Changes:** -- All `--shadow-neo-*` variables set to `none` -- All status colors (`pending`, `progress`, `done`) use Twitter blue -- Rounded corners: `--radius-neo-lg: 1.3rem` -- Font: Open Sans -- Removed all transform effects on hover -- Dark mode with proper contrast - -**CSS Variables (Light Mode):** -```css ---color-neo-accent: oklch(0.6723 0.1606 244.9955); /* Twitter blue */ ---color-neo-pending: oklch(0.6723 0.1606 244.9955); ---color-neo-progress: oklch(0.6723 0.1606 244.9955); ---color-neo-done: oklch(0.6723 0.1606 244.9955); -``` - -**CSS Variables (Dark Mode):** -```css ---color-neo-bg: oklch(0.08 0 0); ---color-neo-card: oklch(0.16 0.005 250); ---color-neo-border: oklch(0.30 0 0); -``` - -**How to preserve:** This file should NOT be overwritten. It loads after `globals.css` and overrides it. - ---- - -#### `ui/src/components/KanbanColumn.tsx` - -**Purpose:** Modified to support themeable kanban columns without inline styles. - -**Changes:** - -1. **colorMap changed from inline colors to CSS classes:** -```tsx -// BEFORE (original): -const colorMap = { - pending: 'var(--color-neo-pending)', - progress: 'var(--color-neo-progress)', - done: 'var(--color-neo-done)', -} - -// AFTER (customized): -const colorMap = { - pending: 'kanban-header-pending', - progress: 'kanban-header-progress', - done: 'kanban-header-done', -} -``` - -2. **Column div uses CSS class instead of inline style:** -```tsx -// BEFORE: -
- -// AFTER: -
-``` - -3. **Header div simplified (removed duplicate color class):** -```tsx -// BEFORE: -
- -// AFTER: -
-``` - -4. **Title text color:** -```tsx -// BEFORE: -text-[var(--color-neo-text-on-bright)] - -// AFTER: -text-[var(--color-neo-text)] -``` - ---- - -## 2. Playwright Browser Configuration - -### Overview - -Changed default Playwright settings for better performance: -- **Default browser:** Firefox (lower CPU usage) -- **Default mode:** Headless (saves resources) - -### Modified Files - -#### `client.py` - -**Changes:** - -```python -# BEFORE: -DEFAULT_PLAYWRIGHT_HEADLESS = False - -# AFTER: -DEFAULT_PLAYWRIGHT_HEADLESS = True -DEFAULT_PLAYWRIGHT_BROWSER = "firefox" -``` - -**New function added:** -```python -def get_playwright_browser() -> str: - """ - Get the browser to use for Playwright. - Options: chrome, firefox, webkit, msedge - Firefox is recommended for lower CPU usage. - """ - return os.getenv("PLAYWRIGHT_BROWSER", DEFAULT_PLAYWRIGHT_BROWSER).lower() -``` - -**Playwright args updated:** -```python -playwright_args = [ - "@playwright/mcp@latest", - "--viewport-size", "1280x720", - "--browser", browser, # NEW: configurable browser -] -``` - ---- - -#### `.env.example` - -**Updated documentation:** -```bash -# PLAYWRIGHT_BROWSER: Which browser to use for testing -# - firefox: Lower CPU usage, recommended (default) -# - chrome: Google Chrome -# - webkit: Safari engine -# - msedge: Microsoft Edge -# PLAYWRIGHT_BROWSER=firefox - -# PLAYWRIGHT_HEADLESS: Run browser without visible window -# - true: Browser runs in background, saves CPU (default) -# - false: Browser opens a visible window (useful for debugging) -# PLAYWRIGHT_HEADLESS=true -``` - ---- - -## 3. Update Checklist - -When updating AutoCoder from upstream, verify these items: - -### UI Changes -- [ ] `ui/src/styles/custom-theme.css` is preserved -- [ ] `ui/src/components/KanbanColumn.tsx` changes are preserved -- [ ] Run `npm run build` in `ui/` directory -- [ ] Test both light and dark modes - -### Backend Changes -- [ ] `client.py` - Playwright browser/headless defaults preserved -- [ ] `.env.example` - Documentation updates preserved - -### General -- [ ] Verify Playwright uses Firefox by default -- [ ] Check that browser runs headless by default - ---- - -## Reverting to Defaults - -### UI Only -```bash -rm ui/src/styles/custom-theme.css -git checkout ui/src/components/KanbanColumn.tsx -cd ui && npm run build -``` - -### Backend Only -```bash -git checkout client.py .env.example -``` - ---- - -## Files Summary - -| File | Type | Change Description | -|------|------|-------------------| -| `ui/src/styles/custom-theme.css` | UI | Twitter-style theme | -| `ui/src/components/KanbanColumn.tsx` | UI | Themeable kanban columns | -| `ui/src/main.tsx` | UI | Imports custom theme | -| `client.py` | Backend | Firefox + headless defaults | -| `.env.example` | Config | Updated documentation | - ---- - -## Last Updated - -**Date:** January 2026 -**PR:** #93 - Twitter-style UI theme with custom theme override system diff --git a/PHASE3_SPEC.md b/PHASE3_SPEC.md deleted file mode 100644 index 7c0c64b..0000000 --- a/PHASE3_SPEC.md +++ /dev/null @@ -1,1591 +0,0 @@ -# Phase 3: Mid-Session Command Approval - Implementation Specification - -**Status:** Not yet implemented (Phases 1 & 2 complete) -**Estimated Effort:** 2-3 days for experienced developer -**Priority:** Medium (nice-to-have, not blocking) - ---- - -## Table of Contents - -- [Executive Summary](#executive-summary) -- [User Experience](#user-experience) -- [Technical Architecture](#technical-architecture) -- [Implementation Checklist](#implementation-checklist) -- [Detailed Implementation Guide](#detailed-implementation-guide) -- [Testing Strategy](#testing-strategy) -- [Security Considerations](#security-considerations) -- [Future Enhancements](#future-enhancements) - ---- - -## Executive Summary - -### What is Phase 3? - -Phase 3 adds **mid-session approval** for bash commands that aren't in the allowlist. Instead of immediately blocking unknown commands, the agent can request user approval in real-time. - -### Current State (Phases 1 & 2) - -The agent can only run commands that are: -1. In the hardcoded allowlist (npm, git, ls, etc.) -2. In project config (`.autocoder/allowed_commands.yaml`) -3. In org config (`~/.autocoder/config.yaml`) - -If the agent tries an unknown command → **immediately blocked**. - -### Phase 3 Vision - -If the agent tries an unknown command → **request approval**: -- **CLI mode**: Rich TUI overlay shows approval dialog -- **UI mode**: React banner/toast prompts user -- **User decides**: Session-only, Permanent (save to YAML), or Deny -- **Timeout**: Auto-deny after 5 minutes (configurable) - -### Benefits - -1. **Flexibility**: Don't need to pre-configure every possible command -2. **Discovery**: See what commands the agent actually needs -3. **Safety**: Still requires explicit approval (not automatic) -4. **Persistence**: Can save approved commands to config for future sessions - -### Non-Goals - -- **NOT** auto-approval (always requires user confirmation) -- **NOT** bypassing hardcoded blocklist (sudo, dd, etc. are NEVER allowed) -- **NOT** bypassing org-level blocklist (those remain final) - ---- - -## User Experience - -### CLI Mode Flow - -``` -Agent is working... -Agent tries: xcodebuild -project MyApp.xcodeproj - -┌─────────────────────────────────────────────────────────────┐ -│ ⚠️ COMMAND APPROVAL REQUIRED │ -├─────────────────────────────────────────────────────────────┤ -│ The agent is requesting permission to run: │ -│ │ -│ xcodebuild -project MyApp.xcodeproj │ -│ │ -│ This command is not in your allowed commands list. │ -│ │ -│ Options: │ -│ [S] Allow for this Session only │ -│ [P] Allow Permanently (save to config) │ -│ [D] Deny (default in 5 minutes) │ -│ │ -│ Your choice (S/P/D): │ -└─────────────────────────────────────────────────────────────┘ -``` - -**For dangerous commands** (aws, kubectl, sudo*): - -``` -╔═══════════════════════════════════════════════════════════════╗ -║ ⚠️ DANGER: PRIVILEGED COMMAND REQUESTED ║ -╠═══════════════════════════════════════════════════════════════╣ -║ The agent is requesting: aws s3 ls ║ -║ ║ -║ aws is a CLOUD CLI that can: ║ -║ • Access production infrastructure ║ -║ • Modify or delete cloud resources ║ -║ • Incur significant costs ║ -║ ║ -║ This action could have SERIOUS consequences. ║ -║ ║ -║ Type CONFIRM to allow, or press Enter to deny: ║ -╚═══════════════════════════════════════════════════════════════╝ -``` - -*Note: sudo would still be in hardcoded blocklist, but this shows the UX pattern - -### UI Mode Flow - -**React UI Banner** (top of screen): - -``` -┌─────────────────────────────────────────────────────────────┐ -│ ⚠️ Agent requesting permission: xcodebuild │ -│ │ -│ [Session Only] [Save to Config] [Deny] │ -│ │ -│ Auto-denies in: 4:32 │ -└─────────────────────────────────────────────────────────────┘ -``` - -**Multiple requests queued:** - -``` -┌─────────────────────────────────────────────────────────────┐ -│ ⚠️ 3 approval requests pending │ -│ │ -│ 1. xcodebuild -project MyApp.xcodeproj │ -│ [Session] [Save] [Deny] │ -│ │ -│ 2. swift package resolve │ -│ [Session] [Save] [Deny] │ -│ │ -│ 3. xcrun simctl list devices │ -│ [Session] [Save] [Deny] │ -└─────────────────────────────────────────────────────────────┘ -``` - -### Response Behavior - -| User Action | Agent Behavior | Config Updated | -|-------------|----------------|----------------| -| Session Only | Command allowed this session | No | -| Permanent | Command allowed forever | Yes - appended to YAML | -| Deny | Command blocked, agent sees error | No | -| Timeout (5 min) | Command blocked, agent sees timeout | No | - ---- - -## Technical Architecture - -### Data Flow - -``` -┌─────────────────────────────────────────────────────────────┐ -│ 1. Agent tries command: xcodebuild │ -└────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 2. bash_security_hook() checks allowlist │ -│ → Not found, not in blocklist │ -└────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 3. Hook returns: {"decision": "pending", │ -│ "request_id": "req_123", │ -│ "command": "xcodebuild"} │ -└────────────────────┬────────────────────────────────────────┘ - │ - ┌──────────┴──────────┐ - │ │ - ▼ ▼ -┌─────────────────────┐ ┌─────────────────────┐ -│ CLI Mode │ │ UI Mode │ -│ │ │ │ -│ approval_tui.py │ │ WebSocket message │ -│ shows Rich dialog │ │ → React banner │ -└──────────┬──────────┘ └──────────┬──────────┘ - │ │ - └────────┬───────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 4. User responds: "session" / "permanent" / "deny" │ -└────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 5. approval_manager.respond(request_id, decision) │ -│ → If permanent: persist_command() │ -│ → If session: add to in-memory set │ -└────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 6. Hook gets response, returns to agent: │ -│ → "allow" or "block" │ -└─────────────────────────────────────────────────────────────┘ -``` - -### State Management - -**ApprovalManager** (new class in `security.py`): - -```python -class ApprovalManager: - """ - Manages pending approval requests and responses. - Thread-safe for concurrent access. - """ - - def __init__(self): - self._pending: Dict[str, PendingRequest] = {} - self._session_allowed: Set[str] = set() - self._lock = threading.Lock() - - def request_approval( - self, - command: str, - is_dangerous: bool = False - ) -> str: - """ - Create a new approval request. - Returns request_id. - """ - ... - - def wait_for_response( - self, - request_id: str, - timeout_seconds: int = 300 - ) -> ApprovalDecision: - """ - Block until user responds or timeout. - Returns: "allow_session", "allow_permanent", "deny", "timeout" - """ - ... - - def respond( - self, - request_id: str, - decision: ApprovalDecision - ): - """ - Called by UI/CLI to respond to a request. - """ - ... -``` - -### File Locking for Persistence - -When user chooses "Permanent", append to YAML with exclusive file lock: - -```python -import fcntl # Unix -import msvcrt # Windows - -def persist_command(project_dir: Path, command: str, description: str = None): - """ - Atomically append command to project YAML. - Uses platform-specific file locking. - """ - config_path = project_dir / ".autocoder" / "allowed_commands.yaml" - - # Ensure file exists - if not config_path.exists(): - config_path.write_text("version: 1\ncommands: []\n") - - with open(config_path, "r+") as f: - # Acquire exclusive lock - if sys.platform == "win32": - msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1) - else: - fcntl.flock(f.fileno(), fcntl.LOCK_EX) - - try: - # Load current config - config = yaml.safe_load(f) or {"version": 1, "commands": []} - - # Add new command - new_entry = {"name": command} - if description: - new_entry["description"] = description - - config.setdefault("commands", []).append(new_entry) - - # Validate doesn't exceed 50 commands - if len(config["commands"]) > 50: - raise ValueError("Cannot add command: 50 command limit reached") - - # Write back - f.seek(0) - f.truncate() - yaml.dump(config, f, default_flow_style=False) - - finally: - # Release lock - if sys.platform == "win32": - msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1) - else: - fcntl.flock(f.fileno(), fcntl.LOCK_UN) -``` - ---- - -## Implementation Checklist - -### Core Security Module - -- [ ] Create `ApprovalManager` class in `security.py` - - [ ] Thread-safe pending request storage - - [ ] Session-only allowed commands set - - [ ] Timeout handling with threading.Timer - - [ ] Request/response API - -- [ ] Modify `bash_security_hook()` to support pending state - - [ ] Check if command needs approval - - [ ] Create approval request - - [ ] Wait for response (with timeout) - - [ ] Return appropriate decision - -- [ ] Implement `persist_command()` with file locking - - [ ] Platform-specific locking (fcntl/msvcrt) - - [ ] Atomic YAML append - - [ ] 50 command limit validation - - [ ] Auto-generate description if not provided - -- [ ] Add `is_dangerous_command()` helper - - [ ] Check against DANGEROUS_COMMANDS set - - [ ] Return emphatic warning text - -- [ ] Update DANGEROUS_COMMANDS set - - [ ] Move from hardcoded blocklist to dangerous list - - [ ] Commands: aws, gcloud, az, kubectl, docker-compose - - [ ] Keep sudo, dd, etc. in BLOCKED_COMMANDS (never allowed) - -### CLI Approval Interface - -- [ ] Create `approval_tui.py` module - - [ ] Use Rich library for TUI - - [ ] Overlay design (doesn't clear screen) - - [ ] Keyboard input handling (S/P/D keys) - - [ ] Timeout display (countdown timer) - - [ ] Different layouts for normal vs dangerous commands - -- [ ] Integrate with agent.py - - [ ] Detect if running in CLI mode (not UI) - - [ ] Pass approval callback to client - - [ ] Handle approval responses - -- [ ] Add `rich` to requirements.txt - - [ ] Version: `rich>=13.0.0` - -### React UI Components - -- [ ] Create `ApprovalBanner.tsx` component - - [ ] Banner at top of screen - - [ ] Queue multiple requests - - [ ] Session/Permanent/Deny buttons - - [ ] Countdown timer display - - [ ] Dangerous command warning variant - -- [ ] Update `useWebSocket.ts` hook - - [ ] Handle `approval_request` message type - - [ ] Send `approval_response` message - - [ ] Queue management for multiple requests - -- [ ] Update WebSocket message types in `types.ts` - ```typescript - type ApprovalRequest = { - request_id: string; - command: string; - is_dangerous: boolean; - timeout_seconds: number; - warning_text?: string; - }; - - type ApprovalResponse = { - request_id: string; - decision: "session" | "permanent" | "deny"; - }; - ``` - -### Backend WebSocket Integration - -- [ ] Update `server/routers/agent.py` - - [ ] Add `approval_request` message sender - - [ ] Add `approval_response` message handler - - [ ] Wire to ApprovalManager - -- [ ] Thread-safe WebSocket message queue - - [ ] Handle approval requests from agent thread - - [ ] Handle approval responses from WebSocket thread - -### MCP Tool for Agent Introspection - -- [ ] Add `list_allowed_commands` tool to feature MCP - - [ ] Returns current allowed commands - - [ ] Indicates which are from project/org/global - - [ ] Shows if approval is available - - [ ] Agent can proactively query before trying commands - -- [ ] Tool response format: - ```python - { - "commands": [ - {"name": "swift", "source": "project"}, - {"name": "npm", "source": "global"}, - {"name": "jq", "source": "org"} - ], - "blocked_count": 15, - "can_request_approval": True, - "approval_timeout_minutes": 5 - } - ``` - -### Configuration - -- [ ] Add approval settings to org config - - [ ] `approval_timeout_minutes` (default: 5) - - [ ] `approval_enabled` (default: true) - - [ ] `dangerous_command_requires_confirmation` (default: true) - -- [ ] Validate org config settings - - [ ] Timeout must be 1-30 minutes - - [ ] Boolean flags properly typed - -### Testing - -- [ ] Unit tests for ApprovalManager - - [ ] Request creation - - [ ] Response handling - - [ ] Timeout behavior - - [ ] Thread safety - -- [ ] Unit tests for file locking - - [ ] Concurrent append operations - - [ ] Platform-specific locking - - [ ] Error handling - -- [ ] Integration tests for approval flow - - [ ] CLI approval (mocked input) - - [ ] WebSocket approval (mocked messages) - - [ ] Session vs permanent vs deny - - [ ] Timeout scenarios - -- [ ] UI component tests - - [ ] ApprovalBanner rendering - - [ ] Queue management - - [ ] Button interactions - - [ ] Timer countdown - -### Documentation - -- [ ] Update `CLAUDE.md` - - [ ] Document approval flow - - [ ] Update security model section - - [ ] Add Phase 3 to architecture - -- [ ] Update `examples/README.md` - - [ ] Add mid-session approval examples - - [ ] Document timeout configuration - - [ ] Troubleshooting approval issues - -- [ ] Create user guide for approvals - - [ ] When/why to use session vs permanent - - [ ] How to handle dangerous commands - - [ ] Keyboard shortcuts for CLI - ---- - -## Detailed Implementation Guide - -### Step 1: Core ApprovalManager (2-3 hours) - -**File:** `security.py` - -```python -from dataclasses import dataclass -from enum import Enum -import threading -import time -from typing import Dict, Set, Optional -import uuid - -class ApprovalDecision(Enum): - ALLOW_SESSION = "session" - ALLOW_PERMANENT = "permanent" - DENY = "deny" - TIMEOUT = "timeout" - -@dataclass -class PendingRequest: - request_id: str - command: str - is_dangerous: bool - timestamp: float - response_event: threading.Event - decision: Optional[ApprovalDecision] = None - -class ApprovalManager: - """ - Singleton manager for approval requests. - Thread-safe for concurrent access from agent and UI. - """ - - _instance = None - _lock = threading.Lock() - - def __new__(cls): - if cls._instance is None: - with cls._lock: - if cls._instance is None: - cls._instance = super().__new__(cls) - cls._instance._initialized = False - return cls._instance - - def __init__(self): - if self._initialized: - return - - self._pending: Dict[str, PendingRequest] = {} - self._session_allowed: Set[str] = set() - self._state_lock = threading.Lock() - self._initialized = True - - def request_approval( - self, - command: str, - is_dangerous: bool = False, - timeout_seconds: int = 300 - ) -> str: - """ - Create a new approval request. - - Args: - command: The command needing approval - is_dangerous: True if command is in DANGEROUS_COMMANDS - timeout_seconds: How long to wait before auto-deny - - Returns: - request_id to use for waiting/responding - """ - request_id = f"req_{uuid.uuid4().hex[:8]}" - - with self._state_lock: - request = PendingRequest( - request_id=request_id, - command=command, - is_dangerous=is_dangerous, - timestamp=time.time(), - response_event=threading.Event() - ) - self._pending[request_id] = request - - # Start timeout timer - timer = threading.Timer( - timeout_seconds, - self._handle_timeout, - args=[request_id] - ) - timer.daemon = True - timer.start() - - # Emit notification (CLI or WebSocket) - self._emit_approval_request(request) - - return request_id - - def wait_for_response( - self, - request_id: str, - timeout_seconds: int = 300 - ) -> ApprovalDecision: - """ - Block until user responds or timeout. - - Returns: - ApprovalDecision (session/permanent/deny/timeout) - """ - with self._state_lock: - request = self._pending.get(request_id) - if not request: - return ApprovalDecision.DENY - - # Wait for response event - request.response_event.wait(timeout=timeout_seconds) - - with self._state_lock: - request = self._pending.get(request_id) - if not request or not request.decision: - return ApprovalDecision.TIMEOUT - - decision = request.decision - - # Handle permanent approval - if decision == ApprovalDecision.ALLOW_PERMANENT: - # This will be handled by caller (needs project_dir) - pass - elif decision == ApprovalDecision.ALLOW_SESSION: - self._session_allowed.add(request.command) - - # Clean up - del self._pending[request_id] - - return decision - - def respond( - self, - request_id: str, - decision: ApprovalDecision - ): - """ - Called by UI/CLI to respond to a request. - """ - with self._state_lock: - request = self._pending.get(request_id) - if not request: - return - - request.decision = decision - request.response_event.set() - - def is_session_allowed(self, command: str) -> bool: - """Check if command was approved for this session.""" - with self._state_lock: - return command in self._session_allowed - - def _handle_timeout(self, request_id: str): - """Called by timer thread when request times out.""" - self.respond(request_id, ApprovalDecision.TIMEOUT) - - def _emit_approval_request(self, request: PendingRequest): - """ - Emit approval request to CLI or WebSocket. - To be implemented based on execution mode. - """ - # This is called by approval_callback in client.py - pass - -# Global singleton instance -_approval_manager = ApprovalManager() - -def get_approval_manager() -> ApprovalManager: - """Get the global ApprovalManager singleton.""" - return _approval_manager -``` - -### Step 2: Modify bash_security_hook (1 hour) - -**File:** `security.py` - -```python -async def bash_security_hook(input_data, tool_use_id=None, context=None): - """ - Pre-tool-use hook that validates bash commands. - - Phase 3: Supports mid-session approval for unknown commands. - """ - if input_data.get("tool_name") != "Bash": - return {} - - command = input_data.get("tool_input", {}).get("command", "") - if not command: - return {} - - # Extract commands - commands = extract_commands(command) - if not commands: - return { - "decision": "block", - "reason": f"Could not parse command: {command}", - } - - # Get project directory and effective commands - project_dir = None - if context and isinstance(context, dict): - project_dir_str = context.get("project_dir") - if project_dir_str: - project_dir = Path(project_dir_str) - - allowed_commands, blocked_commands = get_effective_commands(project_dir) - segments = split_command_segments(command) - - # Check each command - for cmd in commands: - # Check blocklist (highest priority) - if cmd in blocked_commands: - return { - "decision": "block", - "reason": f"Command '{cmd}' is blocked and cannot be approved.", - } - - # Check if allowed (allowlist or session) - approval_mgr = get_approval_manager() - if is_command_allowed(cmd, allowed_commands) or approval_mgr.is_session_allowed(cmd): - # Additional validation for sensitive commands - if cmd in COMMANDS_NEEDING_EXTRA_VALIDATION: - cmd_segment = get_command_for_validation(cmd, segments) - # ... existing validation code ... - continue - - # PHASE 3: Request approval - is_dangerous = cmd in DANGEROUS_COMMANDS - request_id = approval_mgr.request_approval( - command=cmd, - is_dangerous=is_dangerous, - timeout_seconds=300 # TODO: Get from org config - ) - - decision = approval_mgr.wait_for_response(request_id) - - if decision == ApprovalDecision.DENY: - return { - "decision": "block", - "reason": f"Command '{cmd}' was denied.", - } - elif decision == ApprovalDecision.TIMEOUT: - return { - "decision": "block", - "reason": f"Command '{cmd}' was denied (approval timeout after 5 minutes).", - } - elif decision == ApprovalDecision.ALLOW_PERMANENT: - # Persist to YAML - if project_dir: - try: - persist_command( - project_dir, - cmd, - description=f"Added via mid-session approval" - ) - except Exception as e: - # If persist fails, still allow for session - print(f"Warning: Could not save to config: {e}") - # If ALLOW_SESSION, already added to session set by wait_for_response - - return {} # Allow -``` - -### Step 3: CLI Approval Interface (3-4 hours) - -**File:** `approval_tui.py` - -```python -""" -CLI approval interface using Rich library. -Displays an overlay when approval is needed. -""" - -from rich.console import Console -from rich.panel import Panel -from rich.prompt import Prompt -from rich.live import Live -from rich.text import Text -import sys -import threading -import time - -console = Console() - -def show_approval_dialog( - command: str, - is_dangerous: bool, - timeout_seconds: int, - on_response: callable -): - """ - Show approval dialog in CLI. - - Args: - command: The command requesting approval - is_dangerous: True if dangerous command - timeout_seconds: Timeout in seconds - on_response: Callback(decision: str) - "session"/"permanent"/"deny" - """ - - if is_dangerous: - _show_dangerous_dialog(command, timeout_seconds, on_response) - else: - _show_normal_dialog(command, timeout_seconds, on_response) - -def _show_normal_dialog(command: str, timeout_seconds: int, on_response: callable): - """Standard approval dialog.""" - - start_time = time.time() - - while True: - elapsed = time.time() - start_time - remaining = timeout_seconds - elapsed - - if remaining <= 0: - on_response("deny") - console.print("[red]⏱️ Request timed out - command denied[/red]") - return - - # Build dialog - content = f"""[bold yellow]⚠️ COMMAND APPROVAL REQUIRED[/bold yellow] - -The agent is requesting permission to run: - - [cyan]{command}[/cyan] - -This command is not in your allowed commands list. - -Options: - [green][S][/green] Allow for this [green]Session only[/green] - [blue][P][/blue] Allow [blue]Permanently[/blue] (save to config) - [red][D][/red] [red]Deny[/red] (default in {int(remaining)}s) - -Your choice (S/P/D): """ - - console.print(Panel(content, border_style="yellow", expand=False)) - - # Get input with timeout - choice = _get_input_with_timeout("", timeout=1.0) - - if choice: - choice = choice.upper() - if choice == "S": - on_response("session") - console.print("[green]✅ Allowed for this session[/green]") - return - elif choice == "P": - on_response("permanent") - console.print("[blue]✅ Saved to config permanently[/blue]") - return - elif choice == "D": - on_response("deny") - console.print("[red]❌ Command denied[/red]") - return - else: - console.print("[yellow]Invalid choice. Use S, P, or D.[/yellow]") - -def _show_dangerous_dialog(command: str, timeout_seconds: int, on_response: callable): - """Emphatic dialog for dangerous commands.""" - - # Determine warning text based on command - warnings = { - "aws": "AWS CLI can:\n • Access production infrastructure\n • Modify or delete cloud resources\n • Incur significant costs", - "gcloud": "Google Cloud CLI can:\n • Access production GCP resources\n • Modify or delete cloud infrastructure\n • Incur significant costs", - "kubectl": "Kubernetes CLI can:\n • Access production clusters\n • Deploy or delete workloads\n • Disrupt running services", - } - - cmd_name = command.split()[0] - warning = warnings.get(cmd_name, "This command can make significant system changes.") - - content = f"""[bold red on white] ⚠️ DANGER: PRIVILEGED COMMAND REQUESTED [/bold red on white] - -The agent is requesting: [red bold]{command}[/red bold] - -[yellow]{warning}[/yellow] - -[bold]This action could have SERIOUS consequences.[/bold] - -Type [bold]CONFIRM[/bold] to allow, or press Enter to deny:""" - - console.print(Panel(content, border_style="red", expand=False)) - - confirmation = Prompt.ask("", default="deny") - - if confirmation.upper() == "CONFIRM": - # Ask session vs permanent - choice = Prompt.ask( - "Allow for [S]ession or [P]ermanent?", - choices=["S", "P", "s", "p"], - default="S" - ) - if choice.upper() == "P": - on_response("permanent") - console.print("[blue]✅ Saved to config permanently[/blue]") - else: - on_response("session") - console.print("[green]✅ Allowed for this session[/green]") - else: - on_response("deny") - console.print("[red]❌ Command denied[/red]") - -def _get_input_with_timeout(prompt: str, timeout: float) -> str: - """ - Get input with timeout (non-blocking). - Returns empty string if timeout. - """ - import select - - sys.stdout.write(prompt) - sys.stdout.flush() - - # Check if input available (Unix only, Windows needs different approach) - if sys.platform != "win32": - ready, _, _ = select.select([sys.stdin], [], [], timeout) - if ready: - return sys.stdin.readline().strip() - else: - # Windows: use msvcrt.kbhit() and msvcrt.getch() - import msvcrt - start = time.time() - chars = [] - while time.time() - start < timeout: - if msvcrt.kbhit(): - char = msvcrt.getch() - if char == b'\r': # Enter - return ''.join(chars) - elif char == b'\x08': # Backspace - if chars: - chars.pop() - sys.stdout.write('\b \b') - else: - chars.append(char.decode('utf-8')) - sys.stdout.write(char.decode('utf-8')) - time.sleep(0.01) - - return "" -``` - -### Step 4: React UI Components (4-5 hours) - -**File:** `ui/src/components/ApprovalBanner.tsx` - -```tsx -import React, { useState, useEffect } from 'react'; -import { X, AlertTriangle, Clock } from 'lucide-react'; - -interface ApprovalRequest { - request_id: string; - command: string; - is_dangerous: boolean; - timeout_seconds: number; - warning_text?: string; - timestamp: number; -} - -interface ApprovalBannerProps { - requests: ApprovalRequest[]; - onRespond: (requestId: string, decision: 'session' | 'permanent' | 'deny') => void; -} - -export function ApprovalBanner({ requests, onRespond }: ApprovalBannerProps) { - const [remainingTimes, setRemainingTimes] = useState>({}); - - // Update countdown timers - useEffect(() => { - const interval = setInterval(() => { - const now = Date.now(); - const newTimes: Record = {}; - - requests.forEach(req => { - const elapsed = (now - req.timestamp) / 1000; - const remaining = Math.max(0, req.timeout_seconds - elapsed); - newTimes[req.request_id] = remaining; - - // Auto-deny on timeout - if (remaining === 0) { - onRespond(req.request_id, 'deny'); - } - }); - - setRemainingTimes(newTimes); - }, 100); - - return () => clearInterval(interval); - }, [requests, onRespond]); - - if (requests.length === 0) return null; - - const formatTime = (seconds: number): string => { - const mins = Math.floor(seconds / 60); - const secs = Math.floor(seconds % 60); - return `${mins}:${secs.toString().padStart(2, '0')}`; - }; - - return ( -
-
- {requests.length === 1 ? ( - - ) : ( - - )} -
-
- ); -} - -function SingleRequestView({ - request, - remaining, - onRespond, - formatTime, -}: { - request: ApprovalRequest; - remaining: number; - onRespond: (requestId: string, decision: 'session' | 'permanent' | 'deny') => void; - formatTime: (seconds: number) => string; -}) { - const isDangerous = request.is_dangerous; - - return ( -
- {isDangerous && ( -
- - DANGER: PRIVILEGED COMMAND -
- )} - -
-
-
- Agent requesting permission: - - {request.command} - -
- - {request.warning_text && ( -

- {request.warning_text} -

- )} -
- -
- - - - - - -
- - {formatTime(remaining)} -
-
-
-
- ); -} - -function MultipleRequestsView({ - requests, - remainingTimes, - onRespond, - formatTime, -}: { - requests: ApprovalRequest[]; - remainingTimes: Record; - onRespond: (requestId: string, decision: 'session' | 'permanent' | 'deny') => void; - formatTime: (seconds: number) => string; -}) { - return ( -
-
- ⚠️ {requests.length} approval requests pending -
- -
- {requests.map(req => ( -
- - {req.command} - - -
- - - - - - - - {formatTime(remainingTimes[req.request_id] || 0)} - -
-
- ))} -
-
- ); -} -``` - -**File:** `ui/src/hooks/useWebSocket.ts` (add approval handling) - -```typescript -// Add to message types -type ApprovalRequestMessage = { - type: 'approval_request'; - request_id: string; - command: string; - is_dangerous: boolean; - timeout_seconds: number; - warning_text?: string; -}; - -// Add to useWebSocket hook -const [approvalRequests, setApprovalRequests] = useState([]); - -// In message handler -if (data.type === 'approval_request') { - setApprovalRequests(prev => [ - ...prev, - { - ...data, - timestamp: Date.now(), - }, - ]); -} - -// Approval response function -const respondToApproval = useCallback( - (requestId: string, decision: 'session' | 'permanent' | 'deny') => { - if (ws.current?.readyState === WebSocket.OPEN) { - ws.current.send( - JSON.stringify({ - type: 'approval_response', - request_id: requestId, - decision, - }) - ); - } - - // Remove from queue - setApprovalRequests(prev => - prev.filter(req => req.request_id !== requestId) - ); - }, - [] -); - -return { - // ... existing returns - approvalRequests, - respondToApproval, -}; -``` - -### Step 5: Backend WebSocket (2-3 hours) - -**File:** `server/routers/agent.py` - -```python -# Add to WebSocket message handlers - -async def handle_approval_response(websocket: WebSocket, data: dict): - """ - Handle approval response from UI. - - Message format: - { - "type": "approval_response", - "request_id": "req_abc123", - "decision": "session" | "permanent" | "deny" - } - """ - request_id = data.get("request_id") - decision = data.get("decision") - - if not request_id or not decision: - return - - # Convert string to enum - decision_map = { - "session": ApprovalDecision.ALLOW_SESSION, - "permanent": ApprovalDecision.ALLOW_PERMANENT, - "deny": ApprovalDecision.DENY, - } - - approval_decision = decision_map.get(decision, ApprovalDecision.DENY) - - # Respond to approval manager - from security import get_approval_manager - approval_mgr = get_approval_manager() - approval_mgr.respond(request_id, approval_decision) - - -async def send_approval_request( - websocket: WebSocket, - request_id: str, - command: str, - is_dangerous: bool, - timeout_seconds: int, - warning_text: str = None -): - """ - Send approval request to UI via WebSocket. - """ - await websocket.send_json({ - "type": "approval_request", - "request_id": request_id, - "command": command, - "is_dangerous": is_dangerous, - "timeout_seconds": timeout_seconds, - "warning_text": warning_text, - }) -``` - ---- - -## Testing Strategy - -### Unit Tests - -**File:** `test_approval.py` - -```python -def test_approval_manager_request(): - """Test creating approval request.""" - mgr = ApprovalManager() - request_id = mgr.request_approval("swift", is_dangerous=False) - assert request_id.startswith("req_") - -def test_approval_manager_respond(): - """Test responding to approval.""" - mgr = ApprovalManager() - request_id = mgr.request_approval("swift", is_dangerous=False, timeout_seconds=1) - - # Respond in separate thread - import threading - def respond(): - time.sleep(0.1) - mgr.respond(request_id, ApprovalDecision.ALLOW_SESSION) - - t = threading.Thread(target=respond) - t.start() - - decision = mgr.wait_for_response(request_id, timeout_seconds=2) - assert decision == ApprovalDecision.ALLOW_SESSION - t.join() - -def test_approval_timeout(): - """Test approval timeout.""" - mgr = ApprovalManager() - request_id = mgr.request_approval("swift", is_dangerous=False, timeout_seconds=1) - - # Don't respond, let it timeout - decision = mgr.wait_for_response(request_id, timeout_seconds=2) - assert decision == ApprovalDecision.TIMEOUT - -def test_session_allowed(): - """Test session-allowed commands.""" - mgr = ApprovalManager() - assert not mgr.is_session_allowed("swift") - - # Approve for session - request_id = mgr.request_approval("swift", is_dangerous=False, timeout_seconds=1) - mgr.respond(request_id, ApprovalDecision.ALLOW_SESSION) - mgr.wait_for_response(request_id) - - assert mgr.is_session_allowed("swift") -``` - -### Integration Tests - -**File:** `test_security_integration.py` (add Phase 3 tests) - -```python -def test_approval_flow_session(): - """Test mid-session approval with session-only.""" - # Create project with no config - # Mock approval response: session - # Try command → should be allowed - # Try same command again → should still be allowed (session) - pass - -def test_approval_flow_permanent(): - """Test mid-session approval with permanent save.""" - # Create project with empty config - # Mock approval response: permanent - # Try command → should be allowed - # Check YAML file → command should be added - # Create new session → command should still be allowed - pass - -def test_approval_flow_deny(): - """Test mid-session approval denial.""" - # Create project - # Mock approval response: deny - # Try command → should be blocked - pass - -def test_approval_timeout(): - """Test approval timeout auto-deny.""" - # Create project - # Don't respond to approval - # Wait for timeout - # Command should be blocked with timeout message - pass - -def test_concurrent_approvals(): - """Test multiple simultaneous approval requests.""" - # Create project - # Try 3 commands at once - # All should queue - # Respond to each individually - # Verify all handled correctly - pass -``` - -### Manual Testing Checklist - -- [ ] CLI mode: Request approval for unknown command -- [ ] CLI mode: Press S → command works this session -- [ ] CLI mode: Press P → command saved to YAML -- [ ] CLI mode: Press D → command denied -- [ ] CLI mode: Wait 5 minutes → timeout, command denied -- [ ] CLI mode: Dangerous command shows emphatic warning -- [ ] UI mode: Banner appears at top -- [ ] UI mode: Click "Session Only" → command works -- [ ] UI mode: Click "Save to Config" → YAML updated -- [ ] UI mode: Click "Deny" → command blocked -- [ ] UI mode: Multiple requests → all shown in queue -- [ ] UI mode: Countdown timer updates -- [ ] Concurrent access: Multiple agents, file locking works -- [ ] Config validation: 50 command limit enforced -- [ ] Session persistence: Session commands available until restart -- [ ] Permanent persistence: Saved commands available after restart - ---- - -## Security Considerations - -### 1. Hardcoded Blocklist is Final - -**NEVER** allow approval for hardcoded blocklist commands: -- `sudo`, `su`, `doas` -- `dd`, `mkfs`, `fdisk` -- `shutdown`, `reboot`, `halt` -- etc. - -These bypass approval entirely - immediate block. - -### 2. Org Blocklist Cannot Be Overridden - -If org config blocks a command, approval is not even requested. - -### 3. Dangerous Commands Require Extra Confirmation - -Commands like `aws`, `kubectl` should: -- Show emphatic warning -- Require typing "CONFIRM" (not just button click) -- Explain potential consequences - -### 4. Timeout is Critical - -Default 5-minute timeout prevents: -- Stale approval requests -- Forgotten dialogs -- Unattended approval accumulation - -### 5. Session vs Permanent - -**Session-only:** -- ✅ Safe for experimentation -- ✅ Doesn't persist across restarts -- ✅ Good for one-off commands - -**Permanent:** -- ⚠️ Saved to YAML forever -- ⚠️ Available to all future sessions -- ⚠️ User should understand impact - -### 6. File Locking is Essential - -Multiple agents or concurrent modifications require: -- Exclusive file locks (fcntl/msvcrt) -- Atomic read-modify-write -- Proper error handling - -Without locking → race conditions → corrupted YAML - -### 7. Audit Trail - -Consider logging all approval decisions: -``` -[2026-01-22 10:30:45] User approved 'swift' (session-only) -[2026-01-22 10:32:12] User approved 'xcodebuild' (permanent) -[2026-01-22 10:35:00] Approval timeout for 'wget' (denied) -``` - ---- - -## Future Enhancements - -Beyond Phase 3 scope, but possible extensions: - -### 1. Approval Profiles - -Pre-defined approval sets: -```yaml -profiles: - ios-dev: - - swift* - - xcodebuild - - xcrun - - rust-dev: - - cargo - - rustc - - clippy -``` - -User can activate profile with one click. - -### 2. Smart Recommendations - -Agent AI suggests commands to add based on: -- Project type detection (iOS, Rust, Python) -- Frequently denied commands -- Similar projects - -### 3. Approval History - -Show past approvals in UI: -- What was approved -- When -- Session vs permanent -- By which agent - -### 4. Bulk Approve/Deny - -When agent requests multiple commands: -- "Approve all for session" -- "Save all to config" -- "Deny all" - -### 5. Temporary Time-Based Approval - -"Allow for next 1 hour" option: -- Not session-only (survives restarts) -- Not permanent (expires) -- Good for contractors/temporary access - -### 6. Command Arguments Validation - -Phase 1 has placeholder, could be fully implemented: -```yaml -- name: rm - description: Remove files - args_whitelist: - - "-rf ./build/*" - - "-rf ./dist/*" -``` - -### 7. Remote Approval - -For team environments: -- Agent requests approval -- Notification sent to team lead -- Lead approves/denies remotely -- Agent proceeds based on decision - ---- - -## Questions for Implementer - -Before starting Phase 3, consider: - -1. **CLI vs UI priority?** - - Implement CLI first (simpler)? - - Or UI first (more users)? - -2. **Approval persistence format?** - - Separate log file for audit trail? - - Just YAML modifications? - -3. **Dangerous commands list?** - - Current list correct? - - Need org-specific dangerous commands? - -4. **Timeout default?** - - 5 minutes reasonable? - - Different for dangerous commands? - -5. **UI placement?** - - Top banner (blocks view)? - - Modal dialog (more prominent)? - - Sidebar notification? - -6. **Multiple agents?** - - How to attribute approvals? - - Show which agent requested? - -7. **Undo permanent approvals?** - - UI for removing saved commands? - - Or manual YAML editing only? - ---- - -## Success Criteria - -Phase 3 is complete when: - -- ✅ Agent can request approval for unknown commands -- ✅ CLI shows Rich TUI dialog with countdown -- ✅ UI shows React banner with buttons -- ✅ Session-only approval works (in-memory) -- ✅ Permanent approval persists to YAML -- ✅ Dangerous commands show emphatic warnings -- ✅ Timeout auto-denies after configured time -- ✅ Multiple requests can queue -- ✅ File locking prevents corruption -- ✅ All tests pass (unit + integration) -- ✅ Documentation updated -- ✅ Backward compatible (Phase 1/2 still work) - ---- - -## Estimated Timeline - -| Task | Time | Dependencies | -|------|------|--------------| -| ApprovalManager core | 2-3 hours | None | -| Modify bash_security_hook | 1 hour | ApprovalManager | -| File locking + persist | 1-2 hours | None | -| CLI approval TUI | 3-4 hours | ApprovalManager | -| React components | 4-5 hours | None | -| WebSocket integration | 2-3 hours | React components | -| Unit tests | 3-4 hours | All core features | -| Integration tests | 2-3 hours | Full implementation | -| Documentation | 2-3 hours | None | -| Manual testing + polish | 4-6 hours | Full implementation | - -**Total: 24-36 hours (3-4.5 days)** - ---- - -## Getting Started - -To implement Phase 3: - -1. **Read this document fully** -2. **Review Phase 1 & 2 code** (`security.py`, `client.py`) -3. **Run existing tests** to understand current behavior -4. **Start with ApprovalManager** (core functionality) -5. **Add file locking** (critical for safety) -6. **Choose CLI or UI** (whichever you're more comfortable with) -7. **Write tests as you go** (don't leave for end) -8. **Manual test frequently** (approval UX needs polish) - -Good luck! 🚀 - ---- - -**Document Version:** 1.0 -**Last Updated:** 2026-01-22 -**Author:** Phase 1 & 2 implementation team -**Status:** Ready for implementation diff --git a/SAMPLE_PROMPT.md b/SAMPLE_PROMPT.md deleted file mode 100644 index 284a4bf..0000000 --- a/SAMPLE_PROMPT.md +++ /dev/null @@ -1,22 +0,0 @@ -Let's call it Simple Todo. This is a really simple web app that I can use to track my to-do items using a Kanban -board. I should be able to add to-dos and then drag and drop them through the Kanban board. The different columns in -the Kanban board are: - -- To Do -- In Progress -- Done - -The app should use a neobrutalism design. - -There is no need for user authentication either. All the to-dos will be stored in local storage, so each user has -access to all of their to-dos when they open their browser. So do not worry about implementing a backend with user -authentication or a database. Simply store everything in local storage. As for the design, please try to avoid AI -slop, so use your front-end design skills to design something beautiful and practical. As for the content of the -to-dos, we should store: - -- The name or the title at the very least -- Optionally, we can also set tags, due dates, and priorities which should be represented as beautiful little badges - on the to-do card Users should have the ability to easily clear out all the completed To-Dos. They should also be - able to filter and search for To-Dos as well. - -You choose the rest. Keep it simple. Should be 25 features. diff --git a/agent.py b/agent.py index e64f38b..bdc174c 100644 --- a/agent.py +++ b/agent.py @@ -141,6 +141,7 @@ async def run_autonomous_agent( feature_id: Optional[int] = None, agent_type: Optional[str] = None, testing_feature_id: Optional[int] = None, + testing_feature_ids: Optional[list[int]] = None, ) -> None: """ Run the autonomous agent loop. @@ -152,7 +153,8 @@ async def run_autonomous_agent( yolo_mode: If True, skip browser testing in coding agent prompts feature_id: If set, work only on this specific feature (used by orchestrator for coding agents) agent_type: Type of agent: "initializer", "coding", "testing", or None (auto-detect) - testing_feature_id: For testing agents, the pre-claimed feature ID to test + testing_feature_id: For testing agents, the pre-claimed feature ID to test (legacy single mode) + testing_feature_ids: For testing agents, list of feature IDs to batch test """ print("\n" + "=" * 70) print(" AUTONOMOUS CODING AGENT") @@ -241,19 +243,19 @@ async def run_autonomous_agent( agent_id = f"feature-{feature_id}" else: agent_id = None - client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_id=agent_id) + client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_id=agent_id, agent_type=agent_type) # Choose prompt based on agent type if agent_type == "initializer": prompt = get_initializer_prompt(project_dir) elif agent_type == "testing": - prompt = get_testing_prompt(project_dir, testing_feature_id) + prompt = get_testing_prompt(project_dir, testing_feature_id, testing_feature_ids) elif feature_id: # Single-feature mode (used by orchestrator for coding agents) prompt = get_single_feature_prompt(feature_id, project_dir, yolo_mode) else: # General coding prompt (legacy path) - prompt = get_coding_prompt(project_dir) + prompt = get_coding_prompt(project_dir, yolo_mode=yolo_mode) # Run session with async context manager # Wrap in try/except to handle MCP server startup failures gracefully diff --git a/api/dependency_resolver.py b/api/dependency_resolver.py index 6b09244..9cc8082 100644 --- a/api/dependency_resolver.py +++ b/api/dependency_resolver.py @@ -7,6 +7,7 @@ Includes cycle detection, validation, and helper functions for dependency manage """ import heapq +from collections import deque from typing import TypedDict # Security: Prevent DoS via excessive dependencies @@ -301,19 +302,20 @@ def compute_scheduling_scores(features: list[dict]) -> dict[int, float]: # Calculate depths via BFS from roots # Use visited set to prevent infinite loops from circular dependencies + # Use deque for O(1) popleft instead of list.pop(0) which is O(n) depths: dict[int, int] = {} visited: set[int] = set() roots = [f["id"] for f in features if not parents[f["id"]]] - queue = [(root, 0) for root in roots] - while queue: - node_id, depth = queue.pop(0) + bfs_queue: deque[tuple[int, int]] = deque((root, 0) for root in roots) + while bfs_queue: + node_id, depth = bfs_queue.popleft() if node_id in visited: continue # Skip already visited nodes (handles cycles) visited.add(node_id) depths[node_id] = depth for child_id in children[node_id]: if child_id not in visited: - queue.append((child_id, depth + 1)) + bfs_queue.append((child_id, depth + 1)) # Handle orphaned nodes (shouldn't happen but be safe) for f in features: diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py index 03ceb7f..1e3d4d6 100644 --- a/autonomous_agent_demo.py +++ b/autonomous_agent_demo.py @@ -145,7 +145,14 @@ Authentication: "--testing-feature-id", type=int, default=None, - help="Feature ID to regression test (used by orchestrator for testing agents)", + help="Feature ID to regression test (used by orchestrator for testing agents, legacy single mode)", + ) + + parser.add_argument( + "--testing-feature-ids", + type=str, + default=None, + help="Comma-separated feature IDs to regression test in batch (e.g., '5,12,18')", ) # Testing agent configuration @@ -156,6 +163,13 @@ Authentication: help="Testing agents per coding agent (0-3, default: 1). Set to 0 to disable testing agents.", ) + parser.add_argument( + "--testing-batch-size", + type=int, + default=3, + help="Number of features per testing batch (1-5, default: 3)", + ) + return parser.parse_args() @@ -199,6 +213,15 @@ def main() -> None: if migrated: print(f"Migrated project files to .autocoder/: {', '.join(migrated)}", flush=True) + # Parse batch testing feature IDs (comma-separated string -> list[int]) + testing_feature_ids: list[int] | None = None + if args.testing_feature_ids: + try: + testing_feature_ids = [int(x.strip()) for x in args.testing_feature_ids.split(",") if x.strip()] + except ValueError: + print(f"Error: --testing-feature-ids must be comma-separated integers, got: {args.testing_feature_ids}") + return + try: if args.agent_type: # Subprocess mode - spawned by orchestrator for a specific role @@ -211,6 +234,7 @@ def main() -> None: feature_id=args.feature_id, agent_type=args.agent_type, testing_feature_id=args.testing_feature_id, + testing_feature_ids=testing_feature_ids, ) ) else: @@ -229,6 +253,7 @@ def main() -> None: model=args.model, yolo_mode=args.yolo, testing_agent_ratio=args.testing_ratio, + testing_batch_size=args.testing_batch_size, ) ) except KeyboardInterrupt: diff --git a/client.py b/client.py index 0b55295..d31b5ad 100644 --- a/client.py +++ b/client.py @@ -16,7 +16,8 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from claude_agent_sdk.types import HookContext, HookInput, HookMatcher, SyncHookJSONOutput from dotenv import load_dotenv -from security import bash_security_hook +from env_constants import API_ENV_VARS +from security import SENSITIVE_DIRECTORIES, bash_security_hook # Load environment variables from .env file if present load_dotenv() @@ -31,43 +32,15 @@ DEFAULT_PLAYWRIGHT_HEADLESS = True # Firefox is recommended for lower CPU usage DEFAULT_PLAYWRIGHT_BROWSER = "firefox" -# Environment variables to pass through to Claude CLI for API configuration -# These allow using alternative API endpoints (e.g., GLM via z.ai, Vertex AI) without -# affecting the user's global Claude Code settings -API_ENV_VARS = [ - "ANTHROPIC_BASE_URL", # Custom API endpoint (e.g., https://api.z.ai/api/anthropic) - "ANTHROPIC_AUTH_TOKEN", # API authentication token - "API_TIMEOUT_MS", # Request timeout in milliseconds - "ANTHROPIC_DEFAULT_SONNET_MODEL", # Model override for Sonnet - "ANTHROPIC_DEFAULT_OPUS_MODEL", # Model override for Opus - "ANTHROPIC_DEFAULT_HAIKU_MODEL", # Model override for Haiku - # Vertex AI configuration - "CLAUDE_CODE_USE_VERTEX", # Enable Vertex AI mode (set to "1") - "CLOUD_ML_REGION", # GCP region (e.g., us-east5) - "ANTHROPIC_VERTEX_PROJECT_ID", # GCP project ID -] - # Extra read paths for cross-project file access (read-only) # Set EXTRA_READ_PATHS environment variable with comma-separated absolute paths # Example: EXTRA_READ_PATHS=/Volumes/Data/dev,/Users/shared/libs EXTRA_READ_PATHS_VAR = "EXTRA_READ_PATHS" -# Sensitive directories that should never be allowed via EXTRA_READ_PATHS -# These contain credentials, keys, or system-critical files -EXTRA_READ_PATHS_BLOCKLIST = { - ".ssh", - ".aws", - ".azure", - ".kube", - ".gnupg", - ".gpg", - ".password-store", - ".docker", - ".config/gcloud", - ".npmrc", - ".pypirc", - ".netrc", -} +# Sensitive directories that should never be allowed via EXTRA_READ_PATHS. +# Delegates to the canonical SENSITIVE_DIRECTORIES set in security.py so that +# this blocklist and the filesystem browser API share a single source of truth. +EXTRA_READ_PATHS_BLOCKLIST = SENSITIVE_DIRECTORIES def convert_model_for_vertex(model: str) -> str: """ @@ -209,32 +182,55 @@ def get_extra_read_paths() -> list[Path]: return validated_paths -# Feature MCP tools for feature/test management -FEATURE_MCP_TOOLS = [ - # Core feature operations +# Per-agent-type MCP tool lists. +# Only expose the tools each agent type actually needs, reducing tool schema +# overhead and preventing agents from calling tools meant for other roles. +# +# Tools intentionally omitted from ALL agent lists (UI/orchestrator only): +# feature_get_ready, feature_get_blocked, feature_get_graph, +# feature_remove_dependency +# +# The ghost tool "feature_release_testing" was removed entirely -- it was +# listed here but never implemented in mcp_server/feature_mcp.py. + +CODING_AGENT_TOOLS = [ "mcp__features__feature_get_stats", - "mcp__features__feature_get_by_id", # Get assigned feature details - "mcp__features__feature_get_summary", # Lightweight: id, name, status, deps only + "mcp__features__feature_get_by_id", + "mcp__features__feature_get_summary", + "mcp__features__feature_claim_and_get", "mcp__features__feature_mark_in_progress", - "mcp__features__feature_claim_and_get", # Atomic claim + get details "mcp__features__feature_mark_passing", - "mcp__features__feature_mark_failing", # Mark regression detected + "mcp__features__feature_mark_failing", "mcp__features__feature_skip", - "mcp__features__feature_create_bulk", - "mcp__features__feature_create", "mcp__features__feature_clear_in_progress", - "mcp__features__feature_release_testing", # Release testing claim - # Dependency management - "mcp__features__feature_add_dependency", - "mcp__features__feature_remove_dependency", - "mcp__features__feature_set_dependencies", - # Query tools - "mcp__features__feature_get_ready", - "mcp__features__feature_get_blocked", - "mcp__features__feature_get_graph", ] -# Playwright MCP tools for browser automation +TESTING_AGENT_TOOLS = [ + "mcp__features__feature_get_stats", + "mcp__features__feature_get_by_id", + "mcp__features__feature_get_summary", + "mcp__features__feature_mark_passing", + "mcp__features__feature_mark_failing", +] + +INITIALIZER_AGENT_TOOLS = [ + "mcp__features__feature_get_stats", + "mcp__features__feature_create_bulk", + "mcp__features__feature_create", + "mcp__features__feature_add_dependency", + "mcp__features__feature_set_dependencies", +] + +# Union of all agent tool lists -- used for permissions (all tools remain +# *permitted* so the MCP server can respond, but only the agent-type-specific +# list is included in allowed_tools, which controls what the LLM sees). +ALL_FEATURE_MCP_TOOLS = sorted( + set(CODING_AGENT_TOOLS) | set(TESTING_AGENT_TOOLS) | set(INITIALIZER_AGENT_TOOLS) +) + +# Playwright MCP tools for browser automation. +# Full set of tools for comprehensive UI testing including drag-and-drop, +# hover menus, file uploads, tab management, etc. PLAYWRIGHT_TOOLS = [ # Core navigation & screenshots "mcp__playwright__browser_navigate", @@ -247,9 +243,10 @@ PLAYWRIGHT_TOOLS = [ "mcp__playwright__browser_type", "mcp__playwright__browser_fill_form", "mcp__playwright__browser_select_option", - "mcp__playwright__browser_hover", - "mcp__playwright__browser_drag", "mcp__playwright__browser_press_key", + "mcp__playwright__browser_drag", + "mcp__playwright__browser_hover", + "mcp__playwright__browser_file_upload", # JavaScript & debugging "mcp__playwright__browser_evaluate", @@ -258,16 +255,17 @@ PLAYWRIGHT_TOOLS = [ "mcp__playwright__browser_network_requests", # Browser management - "mcp__playwright__browser_close", "mcp__playwright__browser_resize", - "mcp__playwright__browser_tabs", "mcp__playwright__browser_wait_for", "mcp__playwright__browser_handle_dialog", - "mcp__playwright__browser_file_upload", "mcp__playwright__browser_install", + "mcp__playwright__browser_close", + "mcp__playwright__browser_tabs", ] -# Built-in tools +# Built-in tools available to agents. +# WebFetch and WebSearch are included so coding agents can look up current +# documentation for frameworks and libraries they are implementing. BUILTIN_TOOLS = [ "Read", "Write", @@ -285,6 +283,7 @@ def create_client( model: str, yolo_mode: bool = False, agent_id: str | None = None, + agent_type: str = "coding", ): """ Create a Claude Agent SDK client with multi-layered security. @@ -295,6 +294,8 @@ def create_client( yolo_mode: If True, skip Playwright MCP server for rapid prototyping agent_id: Optional unique identifier for browser isolation in parallel mode. When provided, each agent gets its own browser profile. + agent_type: One of "coding", "testing", or "initializer". Controls which + MCP tools are exposed and the max_turns limit. Returns: Configured ClaudeSDKClient (from claude_agent_sdk) @@ -308,13 +309,34 @@ def create_client( Note: Authentication is handled by start.bat/start.sh before this runs. The Claude SDK auto-detects credentials from the Claude CLI configuration """ - # Build allowed tools list based on mode - # In YOLO mode, exclude Playwright tools for faster prototyping - allowed_tools = [*BUILTIN_TOOLS, *FEATURE_MCP_TOOLS] + # Select the feature MCP tools appropriate for this agent type + feature_tools_map = { + "coding": CODING_AGENT_TOOLS, + "testing": TESTING_AGENT_TOOLS, + "initializer": INITIALIZER_AGENT_TOOLS, + } + feature_tools = feature_tools_map.get(agent_type, CODING_AGENT_TOOLS) + + # Select max_turns based on agent type: + # - coding/initializer: 300 turns (complex multi-step implementation) + # - testing: 100 turns (focused verification of a single feature) + max_turns_map = { + "coding": 300, + "testing": 100, + "initializer": 300, + } + max_turns = max_turns_map.get(agent_type, 300) + + # Build allowed tools list based on mode and agent type. + # In YOLO mode, exclude Playwright tools for faster prototyping. + allowed_tools = [*BUILTIN_TOOLS, *feature_tools] if not yolo_mode: allowed_tools.extend(PLAYWRIGHT_TOOLS) - # Build permissions list + # Build permissions list. + # We permit ALL feature MCP tools at the security layer (so the MCP server + # can respond if called), but the LLM only *sees* the agent-type-specific + # subset via allowed_tools above. permissions_list = [ # Allow all file operations within the project directory "Read(./**)", @@ -325,11 +347,11 @@ def create_client( # Bash permission granted here, but actual commands are validated # by the bash_security_hook (see security.py for allowed commands) "Bash(*)", - # Allow web tools for documentation lookup - "WebFetch", - "WebSearch", + # Allow web tools for looking up framework/library documentation + "WebFetch(*)", + "WebSearch(*)", # Allow Feature MCP tools for feature management - *FEATURE_MCP_TOOLS, + *ALL_FEATURE_MCP_TOOLS, ] # Add extra read paths from environment variable (read-only access) @@ -461,9 +483,10 @@ def create_client( context["project_dir"] = str(project_dir.resolve()) return await bash_security_hook(input_data, tool_use_id, context) - # PreCompact hook for logging and customizing context compaction + # PreCompact hook for logging and customizing context compaction. # Compaction is handled automatically by Claude Code CLI when context approaches limits. - # This hook allows us to log when compaction occurs and optionally provide custom instructions. + # This hook provides custom instructions that guide the summarizer to preserve + # critical workflow state while discarding verbose/redundant content. async def pre_compact_hook( input_data: HookInput, tool_use_id: str | None, @@ -476,8 +499,9 @@ def create_client( - "auto": Automatic compaction when context approaches token limits - "manual": User-initiated compaction via /compact command - The hook can customize compaction via hookSpecificOutput: - - customInstructions: String with focus areas for summarization + Returns custom instructions that guide the compaction summarizer to: + 1. Preserve critical workflow state (feature ID, modified files, test results) + 2. Discard verbose content (screenshots, long grep outputs, repeated reads) """ trigger = input_data.get("trigger", "auto") custom_instructions = input_data.get("custom_instructions") @@ -488,18 +512,53 @@ def create_client( print("[Context] Manual compaction requested") if custom_instructions: - print(f"[Context] Custom instructions: {custom_instructions}") + print(f"[Context] Custom instructions provided: {custom_instructions}") - # Return empty dict to allow compaction to proceed with default behavior - # To customize, return: - # { - # "hookSpecificOutput": { - # "hookEventName": "PreCompact", - # "customInstructions": "Focus on preserving file paths and test results" - # } - # } - return SyncHookJSONOutput() + # Build compaction instructions that preserve workflow-critical context + # while discarding verbose content that inflates token usage. + # + # The summarizer receives these instructions and uses them to decide + # what to keep vs. discard during context compaction. + compaction_guidance = "\n".join([ + "## PRESERVE (critical workflow state)", + "- Current feature ID, feature name, and feature status (pending/in_progress/passing/failing)", + "- List of all files created or modified during this session, with their paths", + "- Last test/lint/type-check results: command run, pass/fail status, and key error messages", + "- Current step in the workflow (e.g., implementing, testing, fixing lint errors)", + "- Any dependency information (which features block this one)", + "- Git operations performed (commits, branches created)", + "- MCP tool call results (feature_claim_and_get, feature_mark_passing, etc.)", + "- Key architectural decisions made during this session", + "", + "## DISCARD (verbose content safe to drop)", + "- Full screenshot base64 data (just note that a screenshot was taken and what it showed)", + "- Long grep/find/glob output listings (summarize to: searched for X, found Y relevant files)", + "- Repeated file reads of the same file (keep only the latest read or a summary of changes)", + "- Full file contents from Read tool (summarize to: read file X, key sections were Y)", + "- Verbose npm/pip install output (just note: dependencies installed successfully/failed)", + "- Full lint/type-check output when passing (just note: lint passed with no errors)", + "- Browser console message dumps (summarize to: N errors found, key error was X)", + "- Redundant tool result confirmations ([Done] markers)", + ]) + print("[Context] Applying custom compaction instructions (preserve workflow state, discard verbose content)") + + # The SDK's HookSpecificOutput union type does not yet include a + # PreCompactHookSpecificOutput variant, but the CLI protocol accepts + # {"hookEventName": "PreCompact", "customInstructions": "..."}. + # The dict is serialized to JSON and sent to the CLI process directly, + # so the runtime behavior is correct despite the type mismatch. + return SyncHookJSONOutput( + hookSpecificOutput={ # type: ignore[typeddict-item] + "hookEventName": "PreCompact", + "customInstructions": compaction_guidance, + } + ) + + # PROMPT CACHING: The Claude Code CLI applies cache_control breakpoints internally. + # Our system_prompt benefits from automatic caching without explicit configuration. + # If explicit cache_control is needed, the SDK would need to accept content blocks + # with cache_control fields (not currently supported in v0.1.x). return ClaudeSDKClient( options=ClaudeAgentOptions( model=model, @@ -508,7 +567,7 @@ def create_client( setting_sources=["project"], # Enable skills, commands, and CLAUDE.md from project dir max_buffer_size=10 * 1024 * 1024, # 10MB for large Playwright screenshots allowed_tools=allowed_tools, - mcp_servers=mcp_servers, + mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime hooks={ "PreToolUse": [ HookMatcher(matcher="Bash", hooks=[bash_hook_with_context]), @@ -520,7 +579,7 @@ def create_client( HookMatcher(hooks=[pre_compact_hook]), ], }, - max_turns=1000, + max_turns=max_turns, cwd=str(project_dir.resolve()), settings=str(settings_file.resolve()), # Use absolute path env=sdk_env, # Pass API configuration overrides to CLI subprocess @@ -538,7 +597,7 @@ def create_client( # parameters. Instead, context is managed via: # 1. betas=["context-1m-2025-08-07"] - Extended context window # 2. PreCompact hook - Intercept and customize compaction behavior - # 3. max_turns - Limit conversation turns (set to 1000 for long sessions) + # 3. max_turns - Limit conversation turns (per agent type: coding=300, testing=100) # # Future SDK versions may add explicit compaction controls. When available, # consider adding: diff --git a/env_constants.py b/env_constants.py new file mode 100644 index 0000000..2a8753d --- /dev/null +++ b/env_constants.py @@ -0,0 +1,27 @@ +""" +Shared Environment Variable Constants +====================================== + +Single source of truth for environment variables forwarded to Claude CLI +subprocesses. Imported by both ``client.py`` (agent sessions) and +``server/services/chat_constants.py`` (chat sessions) to avoid maintaining +duplicate lists. + +These allow autocoder to use alternative API endpoints (Ollama, GLM, +Vertex AI) without affecting the user's global Claude Code settings. +""" + +API_ENV_VARS: list[str] = [ + # Core API configuration + "ANTHROPIC_BASE_URL", # Custom API endpoint (e.g., https://api.z.ai/api/anthropic) + "ANTHROPIC_AUTH_TOKEN", # API authentication token + "API_TIMEOUT_MS", # Request timeout in milliseconds + # Model tier overrides + "ANTHROPIC_DEFAULT_SONNET_MODEL", # Model override for Sonnet + "ANTHROPIC_DEFAULT_OPUS_MODEL", # Model override for Opus + "ANTHROPIC_DEFAULT_HAIKU_MODEL", # Model override for Haiku + # Vertex AI configuration + "CLAUDE_CODE_USE_VERTEX", # Enable Vertex AI mode (set to "1") + "CLOUD_ML_REGION", # GCP region (e.g., us-east5) + "ANTHROPIC_VERTEX_PROJECT_ID", # GCP project ID +] diff --git a/mcp_server/feature_mcp.py b/mcp_server/feature_mcp.py index a7f2691..ce3859f 100755 --- a/mcp_server/feature_mcp.py +++ b/mcp_server/feature_mcp.py @@ -614,7 +614,7 @@ def feature_create_bulk( if indices: # Convert indices to actual feature IDs dep_ids = [created_features[idx].id for idx in indices] - created_features[i].dependencies = sorted(dep_ids) + created_features[i].dependencies = sorted(dep_ids) # type: ignore[assignment] # SQLAlchemy JSON Column accepts list at runtime deps_count += 1 # Commit happens automatically on context manager exit diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py index 6e8bb54..3a0196b 100644 --- a/parallel_orchestrator.py +++ b/parallel_orchestrator.py @@ -20,6 +20,7 @@ Usage: import asyncio import atexit +import logging import os import signal import subprocess @@ -27,7 +28,7 @@ import sys import threading from datetime import datetime, timezone from pathlib import Path -from typing import Callable, Literal +from typing import Any, Callable, Literal from sqlalchemy import text @@ -36,6 +37,8 @@ from api.dependency_resolver import are_dependencies_satisfied, compute_scheduli from progress import has_features from server.utils.process_utils import kill_process_tree +logger = logging.getLogger(__name__) + # Root directory of autocoder (where this script and autonomous_agent_demo.py live) AUTOCODER_ROOT = Path(__file__).parent.resolve() @@ -83,23 +86,25 @@ class DebugLogger: debug_log = DebugLogger() -def _dump_database_state(session, label: str = ""): - """Helper to dump full database state to debug log.""" - from api.database import Feature - all_features = session.query(Feature).all() +def _dump_database_state(feature_dicts: list[dict], label: str = ""): + """Helper to dump full database state to debug log. - passing = [f for f in all_features if f.passes] - in_progress = [f for f in all_features if f.in_progress and not f.passes] - pending = [f for f in all_features if not f.passes and not f.in_progress] + Args: + feature_dicts: Pre-fetched list of feature dicts. + label: Optional label for the dump entry. + """ + passing = [f for f in feature_dicts if f.get("passes")] + in_progress = [f for f in feature_dicts if f.get("in_progress") and not f.get("passes")] + pending = [f for f in feature_dicts if not f.get("passes") and not f.get("in_progress")] debug_log.log("DB_DUMP", f"Full database state {label}", - total_features=len(all_features), + total_features=len(feature_dicts), passing_count=len(passing), - passing_ids=[f.id for f in passing], + passing_ids=[f["id"] for f in passing], in_progress_count=len(in_progress), - in_progress_ids=[f.id for f in in_progress], + in_progress_ids=[f["id"] for f in in_progress], pending_count=len(pending), - pending_ids=[f.id for f in pending[:10]]) # First 10 pending only + pending_ids=[f["id"] for f in pending[:10]]) # First 10 pending only # ============================================================================= # Process Limits @@ -125,6 +130,7 @@ def _dump_database_state(session, label: str = ""): MAX_PARALLEL_AGENTS = 5 MAX_TOTAL_AGENTS = 10 DEFAULT_CONCURRENCY = 3 +DEFAULT_TESTING_BATCH_SIZE = 3 # Number of features per testing batch (1-5) POLL_INTERVAL = 5 # seconds between checking for ready features MAX_FEATURE_RETRIES = 3 # Maximum times to retry a failed feature INITIALIZER_TIMEOUT = 1800 # 30 minutes timeout for initializer @@ -146,6 +152,7 @@ class ParallelOrchestrator: model: str | None = None, yolo_mode: bool = False, testing_agent_ratio: int = 1, + testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE, on_output: Callable[[int, str], None] | None = None, on_status: Callable[[int, str], None] | None = None, ): @@ -159,6 +166,8 @@ class ParallelOrchestrator: yolo_mode: Whether to run in YOLO mode (skip testing agents entirely) testing_agent_ratio: Number of regression testing agents to maintain (0-3). 0 = disabled, 1-3 = maintain that many testing agents running independently. + testing_batch_size: Number of features to include per testing session (1-5). + Each testing agent receives this many features to regression test. on_output: Callback for agent output (feature_id, line) on_status: Callback for agent status changes (feature_id, status) """ @@ -167,6 +176,7 @@ class ParallelOrchestrator: self.model = model self.yolo_mode = yolo_mode self.testing_agent_ratio = min(max(testing_agent_ratio, 0), 3) # Clamp 0-3 + self.testing_batch_size = min(max(testing_batch_size, 1), 5) # Clamp 1-5 self.on_output = on_output self.on_status = on_status @@ -186,6 +196,10 @@ class ParallelOrchestrator: # Track feature failures to prevent infinite retry loops self._failure_counts: dict[int, int] = {} + # Track recently tested feature IDs to avoid redundant re-testing. + # Cleared when all passing features have been covered at least once. + self._recently_tested: set[int] = set() + # Shutdown flag for async-safe signal handling # Signal handlers only set this flag; cleanup happens in the main loop self._shutdown_requested = False @@ -213,6 +227,9 @@ class ParallelOrchestrator: This simplifies the architecture by removing unnecessary coordination. Returns the feature ID if available, None if no passing features exist. + + Note: Prefer _get_test_batch() for batch testing mode. This method is + retained for backward compatibility. """ from sqlalchemy.sql.expression import func @@ -231,164 +248,282 @@ class ParallelOrchestrator: finally: session.close() - def get_resumable_features(self) -> list[dict]: + def _get_test_batch(self, batch_size: int = 3) -> list[int]: + """Select a prioritized batch of passing features for regression testing. + + Uses weighted scoring to prioritize features that: + 1. Haven't been tested recently in this orchestrator session + 2. Are depended on by many other features (higher impact if broken) + 3. Have more dependencies themselves (complex integration points) + + When all passing features have been recently tested, the tracking set + is cleared so the cycle starts fresh. + + Args: + batch_size: Maximum number of feature IDs to return (1-5). + + Returns: + List of feature IDs to test, may be shorter than batch_size if + fewer passing features are available. Empty list if none available. + """ + session = self.get_session() + try: + session.expire_all() + passing = ( + session.query(Feature) + .filter(Feature.passes == True) + .filter(Feature.in_progress == False) # Don't test while coding + .all() + ) + + # Extract data from ORM objects before closing the session to avoid + # DetachedInstanceError when accessing attributes after session.close(). + passing_data: list[dict] = [] + for f in passing: + passing_data.append({ + 'id': f.id, + 'dependencies': f.get_dependencies_safe() if hasattr(f, 'get_dependencies_safe') else [], + }) + finally: + session.close() + + if not passing_data: + return [] + + # Build a reverse dependency map: feature_id -> count of features that depend on it. + # The Feature model stores dependencies (what I depend ON), so we invert to find + # dependents (what depends ON me). + dependent_counts: dict[int, int] = {} + for fd in passing_data: + for dep_id in fd['dependencies']: + dependent_counts[dep_id] = dependent_counts.get(dep_id, 0) + 1 + + # Exclude features that are already being tested by running testing agents + # to avoid redundant concurrent testing of the same features. + # running_testing_agents is dict[pid, (primary_feature_id, process)] + with self._lock: + currently_testing_ids: set[int] = set() + for _pid, (feat_id, _proc) in self.running_testing_agents.items(): + currently_testing_ids.add(feat_id) + + # If all passing features have been recently tested, reset the tracker + # so we cycle through them again rather than returning empty batches. + passing_ids = {fd['id'] for fd in passing_data} + if passing_ids.issubset(self._recently_tested): + self._recently_tested.clear() + + # Score each feature by testing priority + scored: list[tuple[int, int]] = [] + for fd in passing_data: + f_id = fd['id'] + + # Skip features already being tested by a running testing agent + if f_id in currently_testing_ids: + continue + + score = 0 + + # Weight 1: Features depended on by many others are higher impact + # if they regress, so test them more often + score += dependent_counts.get(f_id, 0) * 2 + + # Weight 2: Strongly prefer features not tested recently + if f_id not in self._recently_tested: + score += 5 + + # Weight 3: Features with more dependencies are integration points + # that are more likely to regress when other code changes + dep_count = len(fd['dependencies']) + score += min(dep_count, 3) # Cap at 3 to avoid over-weighting + + scored.append((f_id, score)) + + # Sort by score descending (highest priority first) + scored.sort(key=lambda x: x[1], reverse=True) + selected = [fid for fid, _ in scored[:batch_size]] + + # Track what we've tested to avoid re-testing the same features next batch + self._recently_tested.update(selected) + + debug_log.log("TEST_BATCH", f"Selected {len(selected)} features for testing batch", + selected_ids=selected, + recently_tested_count=len(self._recently_tested), + total_passing=len(passing_data)) + + return selected + + def get_resumable_features( + self, + feature_dicts: list[dict] | None = None, + scheduling_scores: dict[int, float] | None = None, + ) -> list[dict]: """Get features that were left in_progress from a previous session. These are features where in_progress=True but passes=False, and they're not currently being worked on by this orchestrator. This handles the case where a previous session was interrupted before completing the feature. + + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. + scheduling_scores: Pre-computed scheduling scores. If None, computed from feature_dicts. """ - session = self.get_session() - try: - # Force fresh read from database to avoid stale cached data - # This is critical when agent subprocesses have committed changes - session.expire_all() + if feature_dicts is None: + session = self.get_session() + try: + session.expire_all() + all_features = session.query(Feature).all() + feature_dicts = [f.to_dict() for f in all_features] + finally: + session.close() - # Find features that are in_progress but not complete - stale = session.query(Feature).filter( - Feature.in_progress == True, - Feature.passes == False - ).all() + # Snapshot running IDs once to avoid acquiring lock per feature + with self._lock: + running_ids = set(self.running_coding_agents.keys()) - resumable = [] - for f in stale: - # Skip if already running in this orchestrator instance - with self._lock: - if f.id in self.running_coding_agents: - continue - # Skip if feature has failed too many times - if self._failure_counts.get(f.id, 0) >= MAX_FEATURE_RETRIES: - continue - resumable.append(f.to_dict()) + resumable = [] + for fd in feature_dicts: + if not fd.get("in_progress") or fd.get("passes"): + continue + # Skip if already running in this orchestrator instance + if fd["id"] in running_ids: + continue + # Skip if feature has failed too many times + if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES: + continue + resumable.append(fd) - # Sort by scheduling score (higher = first), then priority, then id - all_dicts = [f.to_dict() for f in session.query(Feature).all()] - scores = compute_scheduling_scores(all_dicts) - resumable.sort(key=lambda f: (-scores.get(f["id"], 0), f["priority"], f["id"])) - return resumable - finally: - session.close() + # Sort by scheduling score (higher = first), then priority, then id + if scheduling_scores is None: + scheduling_scores = compute_scheduling_scores(feature_dicts) + resumable.sort(key=lambda f: (-scheduling_scores.get(f["id"], 0), f["priority"], f["id"])) + return resumable - def get_ready_features(self) -> list[dict]: - """Get features with satisfied dependencies, not already running.""" - session = self.get_session() - try: - # Force fresh read from database to avoid stale cached data - # This is critical when agent subprocesses have committed changes - session.expire_all() + def get_ready_features( + self, + feature_dicts: list[dict] | None = None, + scheduling_scores: dict[int, float] | None = None, + ) -> list[dict]: + """Get features with satisfied dependencies, not already running. - all_features = session.query(Feature).all() - all_dicts = [f.to_dict() for f in all_features] + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. + scheduling_scores: Pre-computed scheduling scores. If None, computed from feature_dicts. + """ + if feature_dicts is None: + session = self.get_session() + try: + session.expire_all() + all_features = session.query(Feature).all() + feature_dicts = [f.to_dict() for f in all_features] + finally: + session.close() - # Pre-compute passing_ids once to avoid O(n^2) in the loop - passing_ids = {f.id for f in all_features if f.passes} + # Pre-compute passing_ids once to avoid O(n^2) in the loop + passing_ids = {fd["id"] for fd in feature_dicts if fd.get("passes")} - ready = [] - skipped_reasons = {"passes": 0, "in_progress": 0, "running": 0, "failed": 0, "deps": 0} - for f in all_features: - if f.passes: - skipped_reasons["passes"] += 1 - continue - if f.in_progress: - skipped_reasons["in_progress"] += 1 - continue - # Skip if already running in this orchestrator - with self._lock: - if f.id in self.running_coding_agents: - skipped_reasons["running"] += 1 - continue - # Skip if feature has failed too many times - if self._failure_counts.get(f.id, 0) >= MAX_FEATURE_RETRIES: - skipped_reasons["failed"] += 1 - continue - # Check dependencies (pass pre-computed passing_ids) - if are_dependencies_satisfied(f.to_dict(), all_dicts, passing_ids): - ready.append(f.to_dict()) - else: - skipped_reasons["deps"] += 1 + # Snapshot running IDs once to avoid acquiring lock per feature + with self._lock: + running_ids = set(self.running_coding_agents.keys()) - # Sort by scheduling score (higher = first), then priority, then id - scores = compute_scheduling_scores(all_dicts) - ready.sort(key=lambda f: (-scores.get(f["id"], 0), f["priority"], f["id"])) + ready = [] + skipped_reasons = {"passes": 0, "in_progress": 0, "running": 0, "failed": 0, "deps": 0} + for fd in feature_dicts: + if fd.get("passes"): + skipped_reasons["passes"] += 1 + continue + if fd.get("in_progress"): + skipped_reasons["in_progress"] += 1 + continue + # Skip if already running in this orchestrator + if fd["id"] in running_ids: + skipped_reasons["running"] += 1 + continue + # Skip if feature has failed too many times + if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES: + skipped_reasons["failed"] += 1 + continue + # Check dependencies (pass pre-computed passing_ids) + if are_dependencies_satisfied(fd, feature_dicts, passing_ids): + ready.append(fd) + else: + skipped_reasons["deps"] += 1 - # Debug logging - passing = sum(1 for f in all_features if f.passes) - in_progress = sum(1 for f in all_features if f.in_progress and not f.passes) - print( - f"[DEBUG] get_ready_features: {len(ready)} ready, " - f"{passing} passing, {in_progress} in_progress, {len(all_features)} total", - flush=True - ) - print( - f"[DEBUG] Skipped: {skipped_reasons['passes']} passing, {skipped_reasons['in_progress']} in_progress, " - f"{skipped_reasons['running']} running, {skipped_reasons['failed']} failed, {skipped_reasons['deps']} blocked by deps", - flush=True - ) + # Sort by scheduling score (higher = first), then priority, then id + if scheduling_scores is None: + scheduling_scores = compute_scheduling_scores(feature_dicts) + ready.sort(key=lambda f: (-scheduling_scores.get(f["id"], 0), f["priority"], f["id"])) - # Log to debug file (but not every call to avoid spam) - debug_log.log("READY", "get_ready_features() called", - ready_count=len(ready), - ready_ids=[f['id'] for f in ready[:5]], # First 5 only - passing=passing, - in_progress=in_progress, - total=len(all_features), - skipped=skipped_reasons) + # Summary counts for logging + passing = skipped_reasons["passes"] + in_progress = skipped_reasons["in_progress"] + total = len(feature_dicts) - return ready - finally: - session.close() + debug_log.log("READY", "get_ready_features() called", + ready_count=len(ready), + ready_ids=[f['id'] for f in ready[:5]], # First 5 only + passing=passing, + in_progress=in_progress, + total=total, + skipped=skipped_reasons) - def get_all_complete(self) -> bool: + return ready + + def get_all_complete(self, feature_dicts: list[dict] | None = None) -> bool: """Check if all features are complete or permanently failed. Returns False if there are no features (initialization needed). + + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. """ - session = self.get_session() - try: - # Force fresh read from database to avoid stale cached data - # This is critical when agent subprocesses have committed changes - session.expire_all() + if feature_dicts is None: + session = self.get_session() + try: + session.expire_all() + all_features = session.query(Feature).all() + feature_dicts = [f.to_dict() for f in all_features] + finally: + session.close() - all_features = session.query(Feature).all() + # No features = NOT complete, need initialization + if len(feature_dicts) == 0: + return False - # No features = NOT complete, need initialization - if len(all_features) == 0: - return False + passing_count = 0 + failed_count = 0 + pending_count = 0 + for fd in feature_dicts: + if fd.get("passes"): + passing_count += 1 + continue # Completed successfully + if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES: + failed_count += 1 + continue # Permanently failed, count as "done" + pending_count += 1 - passing_count = 0 - failed_count = 0 - pending_count = 0 - for f in all_features: - if f.passes: - passing_count += 1 - continue # Completed successfully - if self._failure_counts.get(f.id, 0) >= MAX_FEATURE_RETRIES: - failed_count += 1 - continue # Permanently failed, count as "done" - pending_count += 1 + total = len(feature_dicts) + is_complete = pending_count == 0 + debug_log.log("COMPLETE_CHECK", f"get_all_complete: {passing_count}/{total} passing, " + f"{failed_count} failed, {pending_count} pending -> {is_complete}") + return is_complete - total = len(all_features) - is_complete = pending_count == 0 - print( - f"[DEBUG] get_all_complete: {passing_count}/{total} passing, " - f"{failed_count} failed, {pending_count} pending -> {is_complete}", - flush=True - ) - return is_complete - finally: - session.close() + def get_passing_count(self, feature_dicts: list[dict] | None = None) -> int: + """Get the number of passing features. - def get_passing_count(self) -> int: - """Get the number of passing features.""" - session = self.get_session() - try: - session.expire_all() - count: int = session.query(Feature).filter(Feature.passes == True).count() - return count - finally: - session.close() + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. + """ + if feature_dicts is None: + session = self.get_session() + try: + session.expire_all() + count: int = session.query(Feature).filter(Feature.passes == True).count() + return count + finally: + session.close() + return sum(1 for fd in feature_dicts if fd.get("passes")) - def _maintain_testing_agents(self) -> None: + def _maintain_testing_agents(self, feature_dicts: list[dict] | None = None) -> None: """Maintain the desired count of testing agents independently. This runs every loop iteration and spawns testing agents as needed to maintain @@ -402,18 +537,21 @@ class ParallelOrchestrator: - YOLO mode is enabled - testing_agent_ratio is 0 - No passing features exist yet + + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. """ # Skip if testing is disabled if self.yolo_mode or self.testing_agent_ratio == 0: return # No testing until there are passing features - passing_count = self.get_passing_count() + passing_count = self.get_passing_count(feature_dicts) if passing_count == 0: return # Don't spawn testing agents if all features are already complete - if self.get_all_complete(): + if self.get_all_complete(feature_dicts): return # Spawn testing agents one at a time, re-checking limits each time @@ -439,7 +577,7 @@ class ParallelOrchestrator: passing_count=passing_count) # Spawn outside lock (I/O bound operation) - print(f"[DEBUG] Spawning testing agent ({spawn_index}/{desired})", flush=True) + logger.debug("Spawning testing agent (%d/%d)", spawn_index, desired) success, msg = self._spawn_testing_agent() if not success: debug_log.log("TESTING", f"Spawn failed, stopping: {msg}") @@ -521,7 +659,7 @@ class ParallelOrchestrator: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads # encoding="utf-8" and errors="replace" fix Windows CP1252 issues - popen_kwargs = { + popen_kwargs: dict[str, Any] = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, @@ -565,11 +703,14 @@ class ParallelOrchestrator: return True, f"Started feature {feature_id}" def _spawn_testing_agent(self) -> tuple[bool, str]: - """Spawn a testing agent subprocess for regression testing. + """Spawn a testing agent subprocess for batch regression testing. - Picks a random passing feature to test. Multiple testing agents can test - the same feature concurrently - this is intentional and simplifies the - architecture by removing claim coordination. + Selects a prioritized batch of passing features using weighted scoring + (via _get_test_batch) and passes them as --testing-feature-ids to the + subprocess. Falls back to single --testing-feature-id for batches of one. + + Multiple testing agents can test the same feature concurrently - this is + intentional and simplifies the architecture by removing claim coordination. """ # Check limits first (under lock) with self._lock: @@ -582,13 +723,16 @@ class ParallelOrchestrator: debug_log.log("TESTING", f"Skipped spawn - at max total agents ({total_agents}/{MAX_TOTAL_AGENTS})") return False, f"At max total agents ({total_agents})" - # Pick a random passing feature (no claim needed - concurrent testing is fine) - feature_id = self._get_random_passing_feature() - if feature_id is None: + # Select a weighted batch of passing features for regression testing + batch = self._get_test_batch(self.testing_batch_size) + if not batch: debug_log.log("TESTING", "No features available for testing") return False, "No features available for testing" - debug_log.log("TESTING", f"Selected feature #{feature_id} for testing") + # Use the first feature ID as the representative for logging/tracking + primary_feature_id = batch[0] + batch_str = ",".join(str(fid) for fid in batch) + debug_log.log("TESTING", f"Selected batch for testing: [{batch_str}]") # Spawn the testing agent with self._lock: @@ -604,7 +748,7 @@ class ParallelOrchestrator: "--project-dir", str(self.project_dir), "--max-iterations", "1", "--agent-type", "testing", - "--testing-feature-id", str(feature_id), + "--testing-feature-ids", batch_str, ] if self.model: cmd.extend(["--model", self.model]) @@ -613,7 +757,7 @@ class ParallelOrchestrator: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads # encoding="utf-8" and errors="replace" fix Windows CP1252 issues - popen_kwargs = { + popen_kwargs: dict[str, Any] = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, @@ -633,22 +777,22 @@ class ParallelOrchestrator: # Register process by PID (not feature_id) to avoid overwrites # when multiple agents test the same feature - self.running_testing_agents[proc.pid] = (feature_id, proc) + self.running_testing_agents[proc.pid] = (primary_feature_id, proc) testing_count = len(self.running_testing_agents) - # Start output reader thread with feature ID (same as coding agents) + # Start output reader thread with primary feature ID for log attribution threading.Thread( target=self._read_output, - args=(feature_id, proc, threading.Event(), "testing"), + args=(primary_feature_id, proc, threading.Event(), "testing"), daemon=True ).start() - print(f"Started testing agent for feature #{feature_id} (PID {proc.pid})", flush=True) - debug_log.log("TESTING", f"Successfully spawned testing agent for feature #{feature_id}", + print(f"Started testing agent for features [{batch_str}] (PID {proc.pid})", flush=True) + debug_log.log("TESTING", f"Successfully spawned testing agent for batch [{batch_str}]", pid=proc.pid, - feature_id=feature_id, + feature_ids=batch, total_testing_agents=testing_count) - return True, f"Started testing agent for feature #{feature_id}" + return True, f"Started testing agent for features [{batch_str}]" async def _run_initializer(self) -> bool: """Run initializer agent as blocking subprocess. @@ -674,7 +818,7 @@ class ParallelOrchestrator: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads # encoding="utf-8" and errors="replace" fix Windows CP1252 issues - popen_kwargs = { + popen_kwargs: dict[str, Any] = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, @@ -1000,16 +1144,15 @@ class ParallelOrchestrator: # newly created features. debug_log.section("INITIALIZATION COMPLETE") debug_log.log("INIT", "Disposing old database engine and creating fresh connection") - print("[DEBUG] Recreating database connection after initialization...", flush=True) + logger.debug("Recreating database connection after initialization") if self._engine is not None: self._engine.dispose() self._engine, self._session_maker = create_database(self.project_dir) # Debug: Show state immediately after initialization - print("[DEBUG] Post-initialization state check:", flush=True) - print(f"[DEBUG] max_concurrency={self.max_concurrency}", flush=True) - print(f"[DEBUG] yolo_mode={self.yolo_mode}", flush=True) - print(f"[DEBUG] testing_agent_ratio={self.testing_agent_ratio}", flush=True) + logger.debug("Post-initialization state check") + logger.debug("Post-initialization state: max_concurrency=%d, yolo_mode=%s, testing_agent_ratio=%d", + self.max_concurrency, self.yolo_mode, self.testing_agent_ratio) # Verify features were created and are visible session = self.get_session() @@ -1017,7 +1160,7 @@ class ParallelOrchestrator: feature_count = session.query(Feature).count() all_features = session.query(Feature).all() feature_names = [f"{f.id}: {f.name}" for f in all_features[:10]] - print(f"[DEBUG] features in database={feature_count}", flush=True) + logger.debug("Features in database: %d", feature_count) debug_log.log("INIT", "Post-initialization database state", max_concurrency=self.max_concurrency, yolo_mode=self.yolo_mode, @@ -1041,7 +1184,18 @@ class ParallelOrchestrator: while self.is_running and not self._shutdown_requested: loop_iteration += 1 if loop_iteration <= 3: - print(f"[DEBUG] === Loop iteration {loop_iteration} ===", flush=True) + logger.debug("=== Loop iteration %d ===", loop_iteration) + + # Query all features ONCE per iteration and build reusable snapshot. + # Every sub-method receives this snapshot instead of re-querying the DB. + session = self.get_session() + session.expire_all() + all_features = session.query(Feature).all() + feature_dicts = [f.to_dict() for f in all_features] + session.close() + + # Pre-compute scheduling scores once (BFS + reverse topo sort) + scheduling_scores = compute_scheduling_scores(feature_dicts) # Log every iteration to debug file (first 10, then every 5th) if loop_iteration <= 10 or loop_iteration % 5 == 0: @@ -1055,20 +1209,16 @@ class ParallelOrchestrator: # Full database dump every 5 iterations if loop_iteration == 1 or loop_iteration % 5 == 0: - session = self.get_session() - try: - _dump_database_state(session, f"(iteration {loop_iteration})") - finally: - session.close() + _dump_database_state(feature_dicts, f"(iteration {loop_iteration})") try: # Check if all complete - if self.get_all_complete(): + if self.get_all_complete(feature_dicts): print("\nAll features complete!", flush=True) break # Maintain testing agents independently (runs every iteration) - self._maintain_testing_agents() + self._maintain_testing_agents(feature_dicts) # Check capacity with self._lock: @@ -1089,17 +1239,17 @@ class ParallelOrchestrator: continue # Priority 1: Resume features from previous session - resumable = self.get_resumable_features() + resumable = self.get_resumable_features(feature_dicts, scheduling_scores) if resumable: slots = self.max_concurrency - current for feature in resumable[:slots]: print(f"Resuming feature #{feature['id']}: {feature['name']}", flush=True) self.start_feature(feature["id"], resume=True) - await asyncio.sleep(2) + await asyncio.sleep(0.5) # Brief delay for subprocess to claim feature before re-querying continue # Priority 2: Start new ready features - ready = self.get_ready_features() + ready = self.get_ready_features(feature_dicts, scheduling_scores) if not ready: # Wait for running features to complete if current > 0: @@ -1112,11 +1262,12 @@ class ParallelOrchestrator: session = self.get_session() try: session.expire_all() + fresh_dicts = [f.to_dict() for f in session.query(Feature).all()] finally: session.close() # Recheck if all features are now complete - if self.get_all_complete(): + if self.get_all_complete(fresh_dicts): print("\nAll features complete!", flush=True) break @@ -1127,10 +1278,10 @@ class ParallelOrchestrator: # Start features up to capacity slots = self.max_concurrency - current - print(f"[DEBUG] Spawning loop: {len(ready)} ready, {slots} slots available, max_concurrency={self.max_concurrency}", flush=True) - print(f"[DEBUG] Will attempt to start {min(len(ready), slots)} features", flush=True) + logger.debug("Spawning loop: %d ready, %d slots available, max_concurrency=%d", + len(ready), slots, self.max_concurrency) features_to_start = ready[:slots] - print(f"[DEBUG] Features to start: {[f['id'] for f in features_to_start]}", flush=True) + logger.debug("Features to start: %s", [f['id'] for f in features_to_start]) debug_log.log("SPAWN", "Starting features batch", ready_count=len(ready), @@ -1138,23 +1289,24 @@ class ParallelOrchestrator: features_to_start=[f['id'] for f in features_to_start]) for i, feature in enumerate(features_to_start): - print(f"[DEBUG] Starting feature {i+1}/{len(features_to_start)}: #{feature['id']} - {feature['name']}", flush=True) + logger.debug("Starting feature %d/%d: #%d - %s", + i + 1, len(features_to_start), feature['id'], feature['name']) success, msg = self.start_feature(feature["id"]) if not success: - print(f"[DEBUG] Failed to start feature #{feature['id']}: {msg}", flush=True) + logger.debug("Failed to start feature #%d: %s", feature['id'], msg) debug_log.log("SPAWN", f"FAILED to start feature #{feature['id']}", feature_name=feature['name'], error=msg) else: - print(f"[DEBUG] Successfully started feature #{feature['id']}", flush=True) + logger.debug("Successfully started feature #%d", feature['id']) with self._lock: running_count = len(self.running_coding_agents) - print(f"[DEBUG] Running coding agents after start: {running_count}", flush=True) + logger.debug("Running coding agents after start: %d", running_count) debug_log.log("SPAWN", f"Successfully started feature #{feature['id']}", feature_name=feature['name'], running_coding_agents=running_count) - await asyncio.sleep(2) # Brief pause between starts + await asyncio.sleep(0.5) # Brief delay for subprocess to claim feature before re-querying except Exception as e: print(f"Orchestrator error: {e}", flush=True) @@ -1223,6 +1375,7 @@ async def run_parallel_orchestrator( model: str | None = None, yolo_mode: bool = False, testing_agent_ratio: int = 1, + testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE, ) -> None: """Run the unified orchestrator. @@ -1232,6 +1385,7 @@ async def run_parallel_orchestrator( model: Claude model to use yolo_mode: Whether to run in YOLO mode (skip testing agents) testing_agent_ratio: Number of regression agents to maintain (0-3) + testing_batch_size: Number of features per testing batch (1-5) """ print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}", flush=True) orchestrator = ParallelOrchestrator( @@ -1240,6 +1394,7 @@ async def run_parallel_orchestrator( model=model, yolo_mode=yolo_mode, testing_agent_ratio=testing_agent_ratio, + testing_batch_size=testing_batch_size, ) # Set up cleanup to run on exit (handles normal exit, exceptions) @@ -1319,6 +1474,12 @@ def main(): default=1, help="Number of regression testing agents (0-3, default: 1). Set to 0 to disable testing agents.", ) + parser.add_argument( + "--testing-batch-size", + type=int, + default=DEFAULT_TESTING_BATCH_SIZE, + help=f"Number of features per testing batch (1-5, default: {DEFAULT_TESTING_BATCH_SIZE})", + ) args = parser.parse_args() @@ -1345,6 +1506,7 @@ def main(): model=args.model, yolo_mode=args.yolo, testing_agent_ratio=args.testing_agent_ratio, + testing_batch_size=args.testing_batch_size, )) except KeyboardInterrupt: print("\n\nInterrupted by user", flush=True) diff --git a/prompts.py b/prompts.py index b2ab11b..f50aecb 100644 --- a/prompts.py +++ b/prompts.py @@ -9,6 +9,7 @@ Fallback chain: 2. Base template: .claude/templates/{name}.template.md """ +import re import shutil from pathlib import Path @@ -70,42 +71,119 @@ def get_initializer_prompt(project_dir: Path | None = None) -> str: return load_prompt("initializer_prompt", project_dir) -def get_coding_prompt(project_dir: Path | None = None) -> str: - """Load the coding agent prompt (project-specific if available).""" - return load_prompt("coding_prompt", project_dir) +def _strip_browser_testing_sections(prompt: str) -> str: + """Strip browser automation and Playwright testing instructions from prompt. + + Used in YOLO mode where browser testing is skipped entirely. Replaces + browser-related sections with a brief YOLO-mode note while preserving + all non-testing instructions (implementation, git, progress notes, etc.). + + Args: + prompt: The full coding prompt text. + + Returns: + The prompt with browser testing sections replaced by YOLO guidance. + """ + original_prompt = prompt + + # Replace STEP 5 (browser automation verification) with YOLO note + prompt = re.sub( + r"### STEP 5: VERIFY WITH BROWSER AUTOMATION.*?(?=### STEP 5\.5:)", + "### STEP 5: VERIFY FEATURE (YOLO MODE)\n\n" + "**YOLO mode is active.** Skip browser automation testing. " + "Instead, verify your feature works by ensuring:\n" + "- Code compiles without errors (lint and type-check pass)\n" + "- Server starts without errors after your changes\n" + "- No obvious runtime errors in server logs\n\n", + prompt, + flags=re.DOTALL, + ) + + # Replace the screenshots-only marking rule with YOLO-appropriate wording + prompt = prompt.replace( + "**ONLY MARK A FEATURE AS PASSING AFTER VERIFICATION WITH SCREENSHOTS.**", + "**YOLO mode: Mark a feature as passing after lint/type-check succeeds and server starts cleanly.**", + ) + + # Replace the BROWSER AUTOMATION reference section + prompt = re.sub( + r"## BROWSER AUTOMATION\n\n.*?(?=---)", + "## VERIFICATION (YOLO MODE)\n\n" + "Browser automation is disabled in YOLO mode. " + "Verify features by running lint, type-check, and confirming the dev server starts without errors.\n\n", + prompt, + flags=re.DOTALL, + ) + + # In STEP 4, replace browser automation reference with YOLO guidance + prompt = prompt.replace( + "2. Test manually using browser automation (see Step 5)", + "2. Verify code compiles (lint and type-check pass)", + ) + + if prompt == original_prompt: + print("[YOLO] Warning: No browser testing sections found to strip. " + "Project-specific prompt may need manual YOLO adaptation.") + + return prompt -def get_testing_prompt(project_dir: Path | None = None, testing_feature_id: int | None = None) -> str: - """Load the testing agent prompt (project-specific if available). +def get_coding_prompt(project_dir: Path | None = None, yolo_mode: bool = False) -> str: + """Load the coding agent prompt (project-specific if available). Args: project_dir: Optional project directory for project-specific prompts - testing_feature_id: If provided, the pre-assigned feature ID to test. - The orchestrator claims the feature before spawning the agent. + yolo_mode: If True, strip browser automation / Playwright testing + instructions and replace with YOLO-mode guidance. This reduces + prompt tokens since YOLO mode skips all browser testing anyway. Returns: - The testing prompt, with pre-assigned feature instructions if applicable. + The coding prompt, optionally stripped of testing instructions. + """ + prompt = load_prompt("coding_prompt", project_dir) + + if yolo_mode: + prompt = _strip_browser_testing_sections(prompt) + + return prompt + + +def get_testing_prompt( + project_dir: Path | None = None, + testing_feature_id: int | None = None, + testing_feature_ids: list[int] | None = None, +) -> str: + """Load the testing agent prompt (project-specific if available). + + Supports both single-feature and multi-feature testing modes. When + testing_feature_ids is provided, the template's {{TESTING_FEATURE_IDS}} + placeholder is replaced with the comma-separated list. Falls back to + the legacy single-feature header when only testing_feature_id is given. + + Args: + project_dir: Optional project directory for project-specific prompts + testing_feature_id: If provided, the pre-assigned feature ID to test (legacy single mode). + testing_feature_ids: If provided, a list of feature IDs to test (batch mode). + Takes precedence over testing_feature_id when both are set. + + Returns: + The testing prompt, with feature assignment instructions populated. """ base_prompt = load_prompt("testing_prompt", project_dir) + # Batch mode: replace the {{TESTING_FEATURE_IDS}} placeholder in the template + if testing_feature_ids is not None and len(testing_feature_ids) > 0: + ids_str = ", ".join(str(fid) for fid in testing_feature_ids) + return base_prompt.replace("{{TESTING_FEATURE_IDS}}", ids_str) + + # Legacy single-feature mode: prepend header and replace placeholder if testing_feature_id is not None: - # Prepend pre-assigned feature instructions - pre_assigned_header = f"""## ASSIGNED FEATURE + # Replace the placeholder with the single ID for template consistency + base_prompt = base_prompt.replace("{{TESTING_FEATURE_IDS}}", str(testing_feature_id)) + return base_prompt -**You are assigned to regression test Feature #{testing_feature_id}.** - -### Your workflow: -1. Call `feature_get_by_id` with ID {testing_feature_id} to get the feature details -2. Verify the feature through the UI using browser automation -3. If regression found, call `feature_mark_failing` with feature_id={testing_feature_id} -4. Exit when done (no cleanup needed) - ---- - -""" - return pre_assigned_header + base_prompt - - return base_prompt + # No feature assignment -- return template with placeholder cleared + return base_prompt.replace("{{TESTING_FEATURE_IDS}}", "(none assigned)") def get_single_feature_prompt(feature_id: int, project_dir: Path | None = None, yolo_mode: bool = False) -> str: @@ -118,13 +196,13 @@ def get_single_feature_prompt(feature_id: int, project_dir: Path | None = None, Args: feature_id: The specific feature ID to work on project_dir: Optional project directory for project-specific prompts - yolo_mode: Ignored (kept for backward compatibility). Testing is now - handled by separate testing agents, not YOLO prompts. + yolo_mode: If True, strip browser testing instructions from the base + coding prompt for reduced token usage in YOLO mode. Returns: The prompt with single-feature header prepended """ - base_prompt = get_coding_prompt(project_dir) + base_prompt = get_coding_prompt(project_dir, yolo_mode=yolo_mode) # Minimal header - the base prompt already contains the full workflow single_feature_header = f"""## ASSIGNED FEATURE: #{feature_id} diff --git a/rate_limit_utils.py b/rate_limit_utils.py index 9c06f68..7fe77ea 100644 --- a/rate_limit_utils.py +++ b/rate_limit_utils.py @@ -6,6 +6,7 @@ Shared utilities for detecting and handling API rate limits. Used by both agent.py (production) and test_rate_limit_utils.py (tests). """ +import random import re from typing import Optional @@ -81,18 +82,25 @@ def is_rate_limit_error(error_message: str) -> bool: def calculate_rate_limit_backoff(retries: int) -> int: """ - Calculate exponential backoff for rate limits. + Calculate exponential backoff with jitter for rate limits. - Formula: min(60 * 2^retries, 3600) - caps at 1 hour - Sequence: 60s, 120s, 240s, 480s, 960s, 1920s, 3600s... + Base formula: min(15 * 2^retries, 3600) + Jitter: adds 0-30% random jitter to prevent thundering herd. + Base sequence: ~15-20s, ~30-40s, ~60-78s, ~120-156s, ... + + The lower starting delay (15s vs 60s) allows faster recovery from + transient rate limits, while jitter prevents synchronized retries + when multiple agents hit limits simultaneously. Args: retries: Number of consecutive rate limit retries (0-indexed) Returns: - Delay in seconds (clamped to 1-3600 range) + Delay in seconds (clamped to 1-3600 range, with jitter) """ - return int(min(max(60 * (2 ** retries), 1), 3600)) + base = int(min(max(15 * (2 ** retries), 1), 3600)) + jitter = random.uniform(0, base * 0.3) + return int(base + jitter) def calculate_error_backoff(retries: int) -> int: diff --git a/requirements.txt b/requirements.txt index 9cf420e..5d57a39 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,4 @@ pyyaml>=6.0.0 ruff>=0.8.0 mypy>=1.13.0 pytest>=8.0.0 +types-PyYAML>=6.0.0 diff --git a/security.py b/security.py index 024ad04..1e7455f 100644 --- a/security.py +++ b/security.py @@ -97,6 +97,31 @@ BLOCKED_COMMANDS = { "ufw", } +# Sensitive directories (relative to home) that should never be exposed. +# Used by both the EXTRA_READ_PATHS validator (client.py) and the filesystem +# browser API (server/routers/filesystem.py) to block credential/key directories. +# This is the single source of truth -- import from here in both places. +# +# SENSITIVE_DIRECTORIES is the union of the previous filesystem browser blocklist +# (filesystem.py) and the previous EXTRA_READ_PATHS blocklist (client.py). +# Some entries are new to each consumer -- this is intentional for defense-in-depth. +SENSITIVE_DIRECTORIES = { + ".ssh", + ".aws", + ".azure", + ".kube", + ".gnupg", + ".gpg", + ".password-store", + ".docker", + ".config/gcloud", + ".config/gh", + ".npmrc", + ".pypirc", + ".netrc", + ".terraform", +} + # Commands that trigger emphatic warnings but CAN be approved (Phase 3) # For now, these are blocked like BLOCKED_COMMANDS until Phase 3 implements approval DANGEROUS_COMMANDS = { @@ -413,24 +438,6 @@ def validate_init_script(command_string: str) -> tuple[bool, str]: return False, f"Only ./init.sh is allowed, got: {script}" -def get_command_for_validation(cmd: str, segments: list[str]) -> str: - """ - Find the specific command segment that contains the given command. - - Args: - cmd: The command name to find - segments: List of command segments - - Returns: - The segment containing the command, or empty string if not found - """ - for segment in segments: - segment_commands = extract_commands(segment) - if cmd in segment_commands: - return segment - return "" - - def matches_pattern(command: str, pattern: str) -> bool: """ Check if a command matches a pattern. @@ -472,6 +479,75 @@ def matches_pattern(command: str, pattern: str) -> bool: return False +def _validate_command_list(commands: list, config_path: Path, field_name: str) -> bool: + """ + Validate a list of command entries from a YAML config. + + Each entry must be a dict with a non-empty string 'name' field. + Used by both load_org_config() and load_project_commands() to avoid + duplicating the same validation logic. + + Args: + commands: List of command entries to validate + config_path: Path to the config file (for log messages) + field_name: Name of the YAML field being validated (e.g., 'allowed_commands', 'commands') + + Returns: + True if all entries are valid, False otherwise + """ + if not isinstance(commands, list): + logger.warning(f"Config at {config_path}: '{field_name}' must be a list") + return False + for i, cmd in enumerate(commands): + if not isinstance(cmd, dict): + logger.warning(f"Config at {config_path}: {field_name}[{i}] must be a dict") + return False + if "name" not in cmd: + logger.warning(f"Config at {config_path}: {field_name}[{i}] missing 'name'") + return False + if not isinstance(cmd["name"], str) or cmd["name"].strip() == "": + logger.warning(f"Config at {config_path}: {field_name}[{i}] has invalid 'name'") + return False + return True + + +def _validate_pkill_processes(config: dict, config_path: Path) -> Optional[list[str]]: + """ + Validate and normalize pkill_processes from a YAML config. + + Each entry must be a non-empty string matching VALID_PROCESS_NAME_PATTERN + (alphanumeric, dots, underscores, hyphens only -- no regex metacharacters). + Used by both load_org_config() and load_project_commands(). + + Args: + config: Parsed YAML config dict that may contain 'pkill_processes' + config_path: Path to the config file (for log messages) + + Returns: + Normalized list of process names, or None if validation fails. + Returns an empty list if 'pkill_processes' is not present. + """ + if "pkill_processes" not in config: + return [] + + processes = config["pkill_processes"] + if not isinstance(processes, list): + logger.warning(f"Config at {config_path}: 'pkill_processes' must be a list") + return None + + normalized = [] + for i, proc in enumerate(processes): + if not isinstance(proc, str): + logger.warning(f"Config at {config_path}: pkill_processes[{i}] must be a string") + return None + proc = proc.strip() + if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc): + logger.warning(f"Config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'") + return None + normalized.append(proc) + return normalized + + def get_org_config_path() -> Path: """ Get the organization-level config file path. @@ -513,21 +589,8 @@ def load_org_config() -> Optional[dict]: # Validate allowed_commands if present if "allowed_commands" in config: - allowed = config["allowed_commands"] - if not isinstance(allowed, list): - logger.warning(f"Org config at {config_path}: 'allowed_commands' must be a list") + if not _validate_command_list(config["allowed_commands"], config_path, "allowed_commands"): return None - for i, cmd in enumerate(allowed): - if not isinstance(cmd, dict): - logger.warning(f"Org config at {config_path}: allowed_commands[{i}] must be a dict") - return None - if "name" not in cmd: - logger.warning(f"Org config at {config_path}: allowed_commands[{i}] missing 'name'") - return None - # Validate that name is a non-empty string - if not isinstance(cmd["name"], str) or cmd["name"].strip() == "": - logger.warning(f"Org config at {config_path}: allowed_commands[{i}] has invalid 'name'") - return None # Validate blocked_commands if present if "blocked_commands" in config: @@ -541,23 +604,10 @@ def load_org_config() -> Optional[dict]: return None # Validate pkill_processes if present - if "pkill_processes" in config: - processes = config["pkill_processes"] - if not isinstance(processes, list): - logger.warning(f"Org config at {config_path}: 'pkill_processes' must be a list") - return None - # Normalize and validate each process name against safe pattern - normalized = [] - for i, proc in enumerate(processes): - if not isinstance(proc, str): - logger.warning(f"Org config at {config_path}: pkill_processes[{i}] must be a string") - return None - proc = proc.strip() - # Block empty strings and regex metacharacters - if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc): - logger.warning(f"Org config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'") - return None - normalized.append(proc) + normalized = _validate_pkill_processes(config, config_path) + if normalized is None: + return None + if normalized: config["pkill_processes"] = normalized return config @@ -603,46 +653,21 @@ def load_project_commands(project_dir: Path) -> Optional[dict]: return None commands = config.get("commands", []) - if not isinstance(commands, list): - logger.warning(f"Project config at {config_path}: 'commands' must be a list") - return None # Enforce 100 command limit - if len(commands) > 100: + if isinstance(commands, list) and len(commands) > 100: logger.warning(f"Project config at {config_path} exceeds 100 command limit ({len(commands)} commands)") return None - # Validate each command entry - for i, cmd in enumerate(commands): - if not isinstance(cmd, dict): - logger.warning(f"Project config at {config_path}: commands[{i}] must be a dict") - return None - if "name" not in cmd: - logger.warning(f"Project config at {config_path}: commands[{i}] missing 'name'") - return None - # Validate name is a non-empty string - if not isinstance(cmd["name"], str) or cmd["name"].strip() == "": - logger.warning(f"Project config at {config_path}: commands[{i}] has invalid 'name'") - return None + # Validate each command entry using shared helper + if not _validate_command_list(commands, config_path, "commands"): + return None # Validate pkill_processes if present - if "pkill_processes" in config: - processes = config["pkill_processes"] - if not isinstance(processes, list): - logger.warning(f"Project config at {config_path}: 'pkill_processes' must be a list") - return None - # Normalize and validate each process name against safe pattern - normalized = [] - for i, proc in enumerate(processes): - if not isinstance(proc, str): - logger.warning(f"Project config at {config_path}: pkill_processes[{i}] must be a string") - return None - proc = proc.strip() - # Block empty strings and regex metacharacters - if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc): - logger.warning(f"Project config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'") - return None - normalized.append(proc) + normalized = _validate_pkill_processes(config, config_path) + if normalized is None: + return None + if normalized: config["pkill_processes"] = normalized return config @@ -659,8 +684,12 @@ def validate_project_command(cmd_config: dict) -> tuple[bool, str]: """ Validate a single command entry from project config. + Checks that the command has a valid name and is not in any blocklist. + Called during hierarchy resolution to gate each project command before + it is added to the effective allowed set. + Args: - cmd_config: Dict with command configuration (name, description, args) + cmd_config: Dict with command configuration (name, description) Returns: Tuple of (is_valid, error_message) @@ -690,15 +719,6 @@ def validate_project_command(cmd_config: dict) -> tuple[bool, str]: if "description" in cmd_config and not isinstance(cmd_config["description"], str): return False, "Description must be a string" - # Args validation (Phase 1 - just check structure) - if "args" in cmd_config: - args = cmd_config["args"] - if not isinstance(args, list): - return False, "Args must be a list" - for arg in args: - if not isinstance(arg, str): - return False, "Each arg must be a string" - return True, "" @@ -899,8 +919,13 @@ async def bash_security_hook(input_data, tool_use_id=None, context=None): # Additional validation for sensitive commands if cmd in COMMANDS_NEEDING_EXTRA_VALIDATION: - # Find the specific segment containing this command - cmd_segment = get_command_for_validation(cmd, segments) + # Find the specific segment containing this command by searching + # each segment's extracted commands for a match + cmd_segment = "" + for segment in segments: + if cmd in extract_commands(segment): + cmd_segment = segment + break if not cmd_segment: cmd_segment = command # Fallback to full command diff --git a/server/main.py b/server/main.py index e46f436..687bf87 100644 --- a/server/main.py +++ b/server/main.py @@ -7,6 +7,7 @@ Provides REST API, WebSocket, and static file serving. """ import asyncio +import logging import os import shutil import sys @@ -42,6 +43,7 @@ from .routers import ( ) from .schemas import SetupStatus from .services.assistant_chat_session import cleanup_all_sessions as cleanup_assistant_sessions +from .services.chat_constants import ROOT_DIR from .services.dev_server_manager import ( cleanup_all_devservers, cleanup_orphaned_devserver_locks, @@ -53,7 +55,6 @@ from .services.terminal_manager import cleanup_all_terminals from .websocket import project_websocket # Paths -ROOT_DIR = Path(__file__).parent.parent UI_DIST_DIR = ROOT_DIR / "ui" / "dist" @@ -88,10 +89,19 @@ app = FastAPI( lifespan=lifespan, ) +# Module logger +logger = logging.getLogger(__name__) + # Check if remote access is enabled via environment variable # Set by start_ui.py when --host is not 127.0.0.1 ALLOW_REMOTE = os.environ.get("AUTOCODER_ALLOW_REMOTE", "").lower() in ("1", "true", "yes") +if ALLOW_REMOTE: + logger.warning( + "ALLOW_REMOTE is enabled. Terminal WebSocket is exposed without sandboxing. " + "Only use this in trusted network environments." + ) + # CORS - allow all origins when remote access is enabled, otherwise localhost only if ALLOW_REMOTE: app.add_middleware( diff --git a/server/routers/agent.py b/server/routers/agent.py index 422f86b..b9a7756 100644 --- a/server/routers/agent.py +++ b/server/routers/agent.py @@ -6,24 +6,15 @@ API endpoints for agent control (start/stop/pause/resume). Uses project registry for path lookups. """ -import re from pathlib import Path from fastapi import APIRouter, HTTPException from ..schemas import AgentActionResponse, AgentStartRequest, AgentStatus +from ..services.chat_constants import ROOT_DIR from ..services.process_manager import get_manager - - -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import validate_project_name def _get_settings_defaults() -> tuple[bool, str, int]: @@ -54,19 +45,6 @@ def _get_settings_defaults() -> tuple[bool, str, int]: router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"]) -# Root directory for process manager -ROOT_DIR = Path(__file__).parent.parent.parent - - -def validate_project_name(name: str) -> str: - """Validate and sanitize project name to prevent path traversal.""" - if not re.match(r'^[a-zA-Z0-9_-]{1,50}$', name): - raise HTTPException( - status_code=400, - detail="Invalid project name" - ) - return name - def get_project_manager(project_name: str): """Get the process manager for a project.""" diff --git a/server/routers/assistant_chat.py b/server/routers/assistant_chat.py index 32ba6f4..ceae8bd 100644 --- a/server/routers/assistant_chat.py +++ b/server/routers/assistant_chat.py @@ -7,8 +7,6 @@ WebSocket and REST endpoints for the read-only project assistant. import json import logging -import re -from pathlib import Path from typing import Optional from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect @@ -27,30 +25,13 @@ from ..services.assistant_database import ( get_conversation, get_conversations, ) +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import is_valid_project_name as validate_project_name logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/assistant", tags=["assistant-chat"]) -# Root directory -ROOT_DIR = Path(__file__).parent.parent.parent - - -def _get_project_path(project_name: str) -> Optional[Path]: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - -def validate_project_name(name: str) -> bool: - """Validate project name to prevent path traversal.""" - return bool(re.match(r'^[a-zA-Z0-9_-]{1,50}$', name)) - # ============================================================================ # Pydantic Models @@ -145,9 +126,9 @@ async def create_project_conversation(project_name: str): conversation = create_conversation(project_dir, project_name) return ConversationSummary( - id=conversation.id, - project_name=conversation.project_name, - title=conversation.title, + id=int(conversation.id), + project_name=str(conversation.project_name), + title=str(conversation.title) if conversation.title else None, created_at=conversation.created_at.isoformat() if conversation.created_at else None, updated_at=conversation.updated_at.isoformat() if conversation.updated_at else None, message_count=0, diff --git a/server/routers/devserver.py b/server/routers/devserver.py index 9892e3a..04849d0 100644 --- a/server/routers/devserver.py +++ b/server/routers/devserver.py @@ -7,7 +7,6 @@ Uses project registry for path lookups and project_config for command detection. """ import logging -import re import sys from pathlib import Path @@ -27,41 +26,22 @@ from ..services.project_config import ( get_project_config, set_dev_command, ) +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import validate_project_name -# Add root to path for registry import +# Add root to path for security module import _root = Path(__file__).parent.parent.parent if str(_root) not in sys.path: sys.path.insert(0, str(_root)) -from registry import get_project_path as registry_get_project_path from security import extract_commands, get_effective_commands, is_command_allowed logger = logging.getLogger(__name__) -def _get_project_path(project_name: str) -> Path | None: - """Get project path from registry.""" - return registry_get_project_path(project_name) - - router = APIRouter(prefix="/api/projects/{project_name}/devserver", tags=["devserver"]) -# ============================================================================ -# Helper Functions -# ============================================================================ - - -def validate_project_name(name: str) -> str: - """Validate and sanitize project name to prevent path traversal.""" - if not re.match(r'^[a-zA-Z0-9_-]{1,50}$', name): - raise HTTPException( - status_code=400, - detail="Invalid project name" - ) - return name - - def get_project_dir(project_name: str) -> Path: """ Get the validated project directory for a project name. diff --git a/server/routers/expand_project.py b/server/routers/expand_project.py index 7f6c985..3de2f44 100644 --- a/server/routers/expand_project.py +++ b/server/routers/expand_project.py @@ -8,7 +8,6 @@ Allows adding multiple features to existing projects via natural language. import json import logging -from pathlib import Path from typing import Optional from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect @@ -22,27 +21,13 @@ from ..services.expand_chat_session import ( list_expand_sessions, remove_expand_session, ) +from ..utils.project_helpers import get_project_path as _get_project_path from ..utils.validation import validate_project_name logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/expand", tags=["expand-project"]) -# Root directory -ROOT_DIR = Path(__file__).parent.parent.parent - - -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - # ============================================================================ diff --git a/server/routers/features.py b/server/routers/features.py index ab95843..0c8c77d 100644 --- a/server/routers/features.py +++ b/server/routers/features.py @@ -8,10 +8,12 @@ API endpoints for feature/test case management. import logging from contextlib import contextmanager from pathlib import Path +from typing import Literal from fastapi import APIRouter, HTTPException from ..schemas import ( + DependencyGraphEdge, DependencyGraphNode, DependencyGraphResponse, DependencyUpdate, @@ -22,6 +24,7 @@ from ..schemas import ( FeatureResponse, FeatureUpdate, ) +from ..utils.project_helpers import get_project_path as _get_project_path from ..utils.validation import validate_project_name # Lazy imports to avoid circular dependencies @@ -31,17 +34,6 @@ _Feature = None logger = logging.getLogger(__name__) -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - def _get_db_classes(): """Lazy import of database classes.""" global _create_database, _Feature @@ -349,6 +341,7 @@ async def get_dependency_graph(project_name: str): deps = f.dependencies or [] blocking = [d for d in deps if d not in passing_ids] + status: Literal["pending", "in_progress", "done", "blocked"] if f.passes: status = "done" elif blocking: @@ -368,7 +361,7 @@ async def get_dependency_graph(project_name: str): )) for dep_id in deps: - edges.append({"source": dep_id, "target": f.id}) + edges.append(DependencyGraphEdge(source=dep_id, target=f.id)) return DependencyGraphResponse(nodes=nodes, edges=edges) except HTTPException: diff --git a/server/routers/filesystem.py b/server/routers/filesystem.py index eb6293b..cdf9bc5 100644 --- a/server/routers/filesystem.py +++ b/server/routers/filesystem.py @@ -6,6 +6,7 @@ API endpoints for browsing the filesystem for project folder selection. Provides cross-platform support for Windows, macOS, and Linux. """ +import functools import logging import os import re @@ -14,6 +15,8 @@ from pathlib import Path from fastapi import APIRouter, HTTPException, Query +from security import SENSITIVE_DIRECTORIES + # Module logger logger = logging.getLogger(__name__) @@ -77,17 +80,10 @@ LINUX_BLOCKED = { "/opt", } -# Universal blocked paths (relative to home directory) -UNIVERSAL_BLOCKED_RELATIVE = { - ".ssh", - ".aws", - ".gnupg", - ".config/gh", - ".netrc", - ".docker", - ".kube", - ".terraform", -} +# Universal blocked paths (relative to home directory). +# Delegates to the canonical SENSITIVE_DIRECTORIES set in security.py so that +# the filesystem browser and the EXTRA_READ_PATHS validator share one source of truth. +UNIVERSAL_BLOCKED_RELATIVE = SENSITIVE_DIRECTORIES # Patterns for files that should not be shown HIDDEN_PATTERNS = [ @@ -99,8 +95,14 @@ HIDDEN_PATTERNS = [ ] -def get_blocked_paths() -> set[Path]: - """Get the set of blocked paths for the current platform.""" +@functools.lru_cache(maxsize=1) +def get_blocked_paths() -> frozenset[Path]: + """ + Get the set of blocked paths for the current platform. + + Cached because the platform and home directory do not change at runtime, + and this function is called once per directory entry in list_directory(). + """ home = Path.home() blocked = set() @@ -119,7 +121,7 @@ def get_blocked_paths() -> set[Path]: for rel in UNIVERSAL_BLOCKED_RELATIVE: blocked.add((home / rel).resolve()) - return blocked + return frozenset(blocked) def is_path_blocked(path: Path) -> bool: diff --git a/server/routers/projects.py b/server/routers/projects.py index 7ecfe08..bfa5b9c 100644 --- a/server/routers/projects.py +++ b/server/routers/projects.py @@ -10,6 +10,7 @@ import re import shutil import sys from pathlib import Path +from typing import Any, Callable from fastapi import APIRouter, HTTPException @@ -24,11 +25,12 @@ from ..schemas import ( ) # Lazy imports to avoid circular dependencies +# These are initialized by _init_imports() before first use. _imports_initialized = False -_check_spec_exists = None -_scaffold_project_prompts = None -_get_project_prompts_dir = None -_count_passing_tests = None +_check_spec_exists: Callable[..., Any] | None = None +_scaffold_project_prompts: Callable[..., Any] | None = None +_get_project_prompts_dir: Callable[..., Any] | None = None +_count_passing_tests: Callable[..., Any] | None = None def _init_imports(): @@ -99,6 +101,7 @@ def validate_project_name(name: str) -> str: def get_project_stats(project_dir: Path) -> ProjectStats: """Get statistics for a project.""" _init_imports() + assert _count_passing_tests is not None # guaranteed by _init_imports() passing, in_progress, total = _count_passing_tests(project_dir) percentage = (passing / total * 100) if total > 0 else 0.0 return ProjectStats( @@ -113,6 +116,7 @@ def get_project_stats(project_dir: Path) -> ProjectStats: async def list_projects(): """List all registered projects.""" _init_imports() + assert _check_spec_exists is not None # guaranteed by _init_imports() (_, _, _, list_registered_projects, validate_project_path, get_project_concurrency, _) = _get_registry_functions() @@ -145,6 +149,7 @@ async def list_projects(): async def create_project(project: ProjectCreate): """Create a new project at the specified path.""" _init_imports() + assert _scaffold_project_prompts is not None # guaranteed by _init_imports() (register_project, _, get_project_path, list_registered_projects, _, _, _) = _get_registry_functions() @@ -225,6 +230,8 @@ async def create_project(project: ProjectCreate): async def get_project(name: str): """Get detailed information about a project.""" _init_imports() + assert _check_spec_exists is not None # guaranteed by _init_imports() + assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, get_project_concurrency, _) = _get_registry_functions() name = validate_project_name(name) @@ -296,6 +303,7 @@ async def delete_project(name: str, delete_files: bool = False): async def get_project_prompts(name: str): """Get the content of project prompt files.""" _init_imports() + assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) @@ -307,7 +315,7 @@ async def get_project_prompts(name: str): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - prompts_dir = _get_project_prompts_dir(project_dir) + prompts_dir: Path = _get_project_prompts_dir(project_dir) def read_file(filename: str) -> str: filepath = prompts_dir / filename @@ -329,6 +337,7 @@ async def get_project_prompts(name: str): async def update_project_prompts(name: str, prompts: ProjectPromptsUpdate): """Update project prompt files.""" _init_imports() + assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) @@ -480,6 +489,8 @@ async def reset_project(name: str, full_reset: bool = False): async def update_project_settings(name: str, settings: ProjectSettingsUpdate): """Update project-level settings (concurrency, etc.).""" _init_imports() + assert _check_spec_exists is not None # guaranteed by _init_imports() + assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, get_project_concurrency, set_project_concurrency) = _get_registry_functions() diff --git a/server/routers/schedules.py b/server/routers/schedules.py index b97ecc8..1758f62 100644 --- a/server/routers/schedules.py +++ b/server/routers/schedules.py @@ -6,12 +6,10 @@ API endpoints for managing agent schedules. Provides CRUD operations for time-based schedule configuration. """ -import re -import sys from contextlib import contextmanager from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Generator, Tuple +from typing import TYPE_CHECKING, Generator, Tuple from fastapi import APIRouter, HTTPException from sqlalchemy.orm import Session @@ -26,17 +24,21 @@ from ..schemas import ( ScheduleResponse, ScheduleUpdate, ) +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import validate_project_name + +if TYPE_CHECKING: + from api.database import Schedule as ScheduleModel -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) +def _schedule_to_response(schedule: "ScheduleModel") -> ScheduleResponse: + """Convert a Schedule ORM object to a ScheduleResponse Pydantic model. + SQLAlchemy Column descriptors resolve to Python types at instance access time, + but mypy sees the Column[T] descriptor type. Using model_validate with + from_attributes handles this conversion correctly. + """ + return ScheduleResponse.model_validate(schedule, from_attributes=True) router = APIRouter( prefix="/api/projects/{project_name}/schedules", @@ -44,16 +46,6 @@ router = APIRouter( ) -def validate_project_name(name: str) -> str: - """Validate and sanitize project name to prevent path traversal.""" - if not re.match(r'^[a-zA-Z0-9_-]{1,50}$', name): - raise HTTPException( - status_code=400, - detail="Invalid project name" - ) - return name - - @contextmanager def _get_db_session(project_name: str) -> Generator[Tuple[Session, Path], None, None]: """Get database session for a project as a context manager. @@ -102,21 +94,7 @@ async def list_schedules(project_name: str): ).order_by(Schedule.start_time).all() return ScheduleListResponse( - schedules=[ - ScheduleResponse( - id=s.id, - project_name=s.project_name, - start_time=s.start_time, - duration_minutes=s.duration_minutes, - days_of_week=s.days_of_week, - enabled=s.enabled, - yolo_mode=s.yolo_mode, - model=s.model, - crash_count=s.crash_count, - created_at=s.created_at, - ) - for s in schedules - ] + schedules=[_schedule_to_response(s) for s in schedules] ) @@ -190,18 +168,7 @@ async def create_schedule(project_name: str, data: ScheduleCreate): except Exception as e: logger.error(f"Failed to start agent for schedule {schedule.id}: {e}", exc_info=True) - return ScheduleResponse( - id=schedule.id, - project_name=schedule.project_name, - start_time=schedule.start_time, - duration_minutes=schedule.duration_minutes, - days_of_week=schedule.days_of_week, - enabled=schedule.enabled, - yolo_mode=schedule.yolo_mode, - model=schedule.model, - crash_count=schedule.crash_count, - created_at=schedule.created_at, - ) + return _schedule_to_response(schedule) @router.get("/next", response_model=NextRunResponse) @@ -259,8 +226,8 @@ async def get_next_scheduled_run(project_name: str): return NextRunResponse( has_schedules=True, - next_start=next_start.isoformat() if (active_count == 0 and next_start) else None, - next_end=latest_end.isoformat() if latest_end else None, + next_start=next_start if active_count == 0 else None, + next_end=latest_end, is_currently_running=active_count > 0, active_schedule_count=active_count, ) @@ -280,18 +247,7 @@ async def get_schedule(project_name: str, schedule_id: int): if not schedule: raise HTTPException(status_code=404, detail="Schedule not found") - return ScheduleResponse( - id=schedule.id, - project_name=schedule.project_name, - start_time=schedule.start_time, - duration_minutes=schedule.duration_minutes, - days_of_week=schedule.days_of_week, - enabled=schedule.enabled, - yolo_mode=schedule.yolo_mode, - model=schedule.model, - crash_count=schedule.crash_count, - created_at=schedule.created_at, - ) + return _schedule_to_response(schedule) @router.patch("/{schedule_id}", response_model=ScheduleResponse) @@ -334,18 +290,7 @@ async def update_schedule( # Was enabled, now disabled - remove jobs scheduler.remove_schedule(schedule_id) - return ScheduleResponse( - id=schedule.id, - project_name=schedule.project_name, - start_time=schedule.start_time, - duration_minutes=schedule.duration_minutes, - days_of_week=schedule.days_of_week, - enabled=schedule.enabled, - yolo_mode=schedule.yolo_mode, - model=schedule.model, - crash_count=schedule.crash_count, - created_at=schedule.created_at, - ) + return _schedule_to_response(schedule) @router.delete("/{schedule_id}", status_code=204) diff --git a/server/routers/settings.py b/server/routers/settings.py index 8f3f906..4b9c3e5 100644 --- a/server/routers/settings.py +++ b/server/routers/settings.py @@ -9,17 +9,16 @@ Settings are stored in the registry database and shared across all projects. import mimetypes import os import sys -from pathlib import Path from fastapi import APIRouter from ..schemas import ModelInfo, ModelsResponse, SettingsResponse, SettingsUpdate +from ..services.chat_constants import ROOT_DIR # Mimetype fix for Windows - must run before StaticFiles is mounted mimetypes.add_type("text/javascript", ".js", True) -# Add root to path for registry import -ROOT_DIR = Path(__file__).parent.parent.parent +# Ensure root is on sys.path for registry import if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) diff --git a/server/routers/spec_creation.py b/server/routers/spec_creation.py index c29da6b..e6e917a 100644 --- a/server/routers/spec_creation.py +++ b/server/routers/spec_creation.py @@ -7,8 +7,6 @@ WebSocket and REST endpoints for interactive spec creation with Claude. import json import logging -import re -from pathlib import Path from typing import Optional from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect @@ -22,30 +20,13 @@ from ..services.spec_chat_session import ( list_sessions, remove_session, ) +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import is_valid_project_name as validate_project_name logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/spec", tags=["spec-creation"]) -# Root directory -ROOT_DIR = Path(__file__).parent.parent.parent - - -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - -def validate_project_name(name: str) -> bool: - """Validate project name to prevent path traversal.""" - return bool(re.match(r'^[a-zA-Z0-9_-]{1,50}$', name)) - # ============================================================================ # REST Endpoints diff --git a/server/routers/terminal.py b/server/routers/terminal.py index 2183369..a53b9ab 100644 --- a/server/routers/terminal.py +++ b/server/routers/terminal.py @@ -12,8 +12,6 @@ import base64 import json import logging import re -import sys -from pathlib import Path from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect from pydantic import BaseModel @@ -27,13 +25,8 @@ from ..services.terminal_manager import ( rename_terminal, stop_terminal_session, ) - -# Add project root to path for registry import -_root = Path(__file__).parent.parent.parent -if str(_root) not in sys.path: - sys.path.insert(0, str(_root)) - -from registry import get_project_path as registry_get_project_path +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import is_valid_project_name as validate_project_name logger = logging.getLogger(__name__) @@ -48,27 +41,6 @@ class TerminalCloseCode: FAILED_TO_START = 4500 -def _get_project_path(project_name: str) -> Path | None: - """Get project path from registry.""" - return registry_get_project_path(project_name) - - -def validate_project_name(name: str) -> bool: - """ - Validate project name to prevent path traversal attacks. - - Allows only alphanumeric characters, underscores, and hyphens. - Maximum length of 50 characters. - - Args: - name: The project name to validate - - Returns: - True if valid, False otherwise - """ - return bool(re.match(r"^[a-zA-Z0-9_-]{1,50}$", name)) - - def validate_terminal_id(terminal_id: str) -> bool: """ Validate terminal ID format. diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py index 2ac41fc..182232c 100755 --- a/server/services/assistant_chat_session.py +++ b/server/services/assistant_chat_session.py @@ -25,25 +25,13 @@ from .assistant_database import ( create_conversation, get_messages, ) +from .chat_constants import API_ENV_VARS, ROOT_DIR # Load environment variables from .env file if present load_dotenv() logger = logging.getLogger(__name__) -# Root directory of the project -ROOT_DIR = Path(__file__).parent.parent.parent - -# Environment variables to pass through to Claude CLI for API configuration -API_ENV_VARS = [ - "ANTHROPIC_BASE_URL", - "ANTHROPIC_AUTH_TOKEN", - "API_TIMEOUT_MS", - "ANTHROPIC_DEFAULT_SONNET_MODEL", - "ANTHROPIC_DEFAULT_OPUS_MODEL", - "ANTHROPIC_DEFAULT_HAIKU_MODEL", -] - # Read-only feature MCP tools READONLY_FEATURE_MCP_TOOLS = [ "mcp__features__feature_get_stats", @@ -215,7 +203,7 @@ class AssistantChatSession: # Create a new conversation if we don't have one if is_new_conversation: conv = create_conversation(self.project_dir, self.project_name) - self.conversation_id = conv.id + self.conversation_id = int(conv.id) # type coercion: Column[int] -> int yield {"type": "conversation_created", "conversation_id": self.conversation_id} # Build permissions list for assistant access (read + feature management) @@ -270,7 +258,11 @@ class AssistantChatSession: system_cli = shutil.which("claude") # Build environment overrides for API configuration - sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)} + sdk_env: dict[str, str] = {} + for var in API_ENV_VARS: + value = os.getenv(var) + if value: + sdk_env[var] = value # Determine model from environment or use default # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names @@ -286,7 +278,7 @@ class AssistantChatSession: # This avoids Windows command line length limit (~8191 chars) setting_sources=["project"], allowed_tools=[*READONLY_BUILTIN_TOOLS, *ASSISTANT_FEATURE_TOOLS], - mcp_servers=mcp_servers, + mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime permission_mode="bypassPermissions", max_turns=100, cwd=str(self.project_dir.resolve()), @@ -312,6 +304,8 @@ class AssistantChatSession: greeting = f"Hello! I'm your project assistant for **{self.project_name}**. I can help you understand the codebase, explain features, and answer questions about the project. What would you like to know?" # Store the greeting in the database + # conversation_id is guaranteed non-None here (set on line 206 above) + assert self.conversation_id is not None add_message(self.project_dir, self.conversation_id, "assistant", greeting) yield {"type": "text", "content": greeting} diff --git a/server/services/assistant_database.py b/server/services/assistant_database.py index b91a388..1d0e9a6 100644 --- a/server/services/assistant_database.py +++ b/server/services/assistant_database.py @@ -13,6 +13,7 @@ from pathlib import Path from typing import Optional from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text, create_engine, func +from sqlalchemy.engine import Engine from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker logger = logging.getLogger(__name__) @@ -23,7 +24,7 @@ class Base(DeclarativeBase): # Engine cache to avoid creating new engines for each request # Key: project directory path (as posix string), Value: SQLAlchemy engine -_engine_cache: dict[str, object] = {} +_engine_cache: dict[str, Engine] = {} # Lock for thread-safe access to the engine cache # Prevents race conditions when multiple threads create engines simultaneously diff --git a/server/services/chat_constants.py b/server/services/chat_constants.py new file mode 100644 index 0000000..6af3c1b --- /dev/null +++ b/server/services/chat_constants.py @@ -0,0 +1,57 @@ +""" +Chat Session Constants +====================== + +Shared constants for all chat session types (assistant, spec, expand). + +The canonical ``API_ENV_VARS`` list lives in ``env_constants.py`` at the +project root and is re-exported here for convenience so that existing +imports (``from .chat_constants import API_ENV_VARS``) continue to work. +""" + +import sys +from pathlib import Path +from typing import AsyncGenerator + +# ------------------------------------------------------------------- +# Root directory of the autocoder project (repository root). +# Used throughout the server package whenever the repo root is needed. +# ------------------------------------------------------------------- +ROOT_DIR = Path(__file__).parent.parent.parent + +# Ensure the project root is on sys.path so we can import env_constants +# from the root-level module without requiring a package install. +_root_str = str(ROOT_DIR) +if _root_str not in sys.path: + sys.path.insert(0, _root_str) + +# ------------------------------------------------------------------- +# Environment variables forwarded to Claude CLI subprocesses. +# Single source of truth lives in env_constants.py at the project root. +# Re-exported here so existing ``from .chat_constants import API_ENV_VARS`` +# imports continue to work unchanged. +# ------------------------------------------------------------------- +from env_constants import API_ENV_VARS # noqa: E402, F401 + + +async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: + """Yield a single multimodal user message in Claude Agent SDK format. + + The Claude Agent SDK's ``query()`` method accepts either a plain string + or an ``AsyncIterable[dict]`` for custom message formats. This helper + wraps a list of content blocks (text and/or images) in the expected + envelope. + + Args: + content_blocks: List of content-block dicts, e.g. + ``[{"type": "text", "text": "..."}, {"type": "image", ...}]``. + + Yields: + A single dict representing the user message. + """ + yield { + "type": "user", + "message": {"role": "user", "content": content_blocks}, + "parent_tool_use_id": None, + "session_id": "default", + } diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py index 2960e2e..4fd0978 100644 --- a/server/services/expand_chat_session.py +++ b/server/services/expand_chat_session.py @@ -16,28 +16,19 @@ import threading import uuid from datetime import datetime from pathlib import Path -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from dotenv import load_dotenv from ..schemas import ImageAttachment +from .chat_constants import API_ENV_VARS, ROOT_DIR, make_multimodal_message # Load environment variables from .env file if present load_dotenv() logger = logging.getLogger(__name__) -# Environment variables to pass through to Claude CLI for API configuration -API_ENV_VARS = [ - "ANTHROPIC_BASE_URL", - "ANTHROPIC_AUTH_TOKEN", - "API_TIMEOUT_MS", - "ANTHROPIC_DEFAULT_SONNET_MODEL", - "ANTHROPIC_DEFAULT_OPUS_MODEL", - "ANTHROPIC_DEFAULT_HAIKU_MODEL", -] - # Feature MCP tools needed for expand session EXPAND_FEATURE_TOOLS = [ "mcp__features__feature_create", @@ -46,22 +37,6 @@ EXPAND_FEATURE_TOOLS = [ ] -async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: - """ - Create an async generator that yields a properly formatted multimodal message. - """ - yield { - "type": "user", - "message": {"role": "user", "content": content_blocks}, - "parent_tool_use_id": None, - "session_id": "default", - } - - -# Root directory of the project -ROOT_DIR = Path(__file__).parent.parent.parent - - class ExpandChatSession: """ Manages a project expansion conversation. @@ -179,7 +154,12 @@ class ExpandChatSession: system_prompt = skill_content.replace("$ARGUMENTS", project_path) # Build environment overrides for API configuration - sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)} + # Filter to only include vars that are actually set (non-None) + sdk_env: dict[str, str] = {} + for var in API_ENV_VARS: + value = os.getenv(var) + if value: + sdk_env[var] = value # Determine model from environment or use default # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names @@ -207,9 +187,12 @@ class ExpandChatSession: allowed_tools=[ "Read", "Glob", + "Grep", + "WebFetch", + "WebSearch", *EXPAND_FEATURE_TOOLS, ], - mcp_servers=mcp_servers, + mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime permission_mode="bypassPermissions", max_turns=100, cwd=str(self.project_dir.resolve()), @@ -303,7 +286,7 @@ class ExpandChatSession: # Build the message content if attachments and len(attachments) > 0: - content_blocks = [] + content_blocks: list[dict[str, Any]] = [] if message: content_blocks.append({"type": "text", "text": message}) for att in attachments: @@ -315,7 +298,7 @@ class ExpandChatSession: "data": att.base64Data, } }) - await self.client.query(_make_multimodal_message(content_blocks)) + await self.client.query(make_multimodal_message(content_blocks)) logger.info(f"Sent multimodal message with {len(attachments)} image(s)") else: await self.client.query(message) diff --git a/server/services/process_manager.py b/server/services/process_manager.py index 7f461c5..fa489ec 100644 --- a/server/services/process_manager.py +++ b/server/services/process_manager.py @@ -15,7 +15,7 @@ import sys import threading from datetime import datetime from pathlib import Path -from typing import Awaitable, Callable, Literal, Set +from typing import Any, Awaitable, Callable, Literal, Set import psutil @@ -353,7 +353,7 @@ class AgentProcessManager: # stdin=DEVNULL prevents blocking if Claude CLI or child process tries to read stdin # CREATE_NO_WINDOW on Windows prevents console window pop-ups # PYTHONUNBUFFERED ensures output isn't delayed - popen_kwargs = { + popen_kwargs: dict[str, Any] = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py index ce49ea4..b352cb5 100644 --- a/server/services/spec_chat_session.py +++ b/server/services/spec_chat_session.py @@ -13,49 +13,19 @@ import shutil import threading from datetime import datetime from pathlib import Path -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from dotenv import load_dotenv from ..schemas import ImageAttachment +from .chat_constants import API_ENV_VARS, ROOT_DIR, make_multimodal_message # Load environment variables from .env file if present load_dotenv() logger = logging.getLogger(__name__) -# Environment variables to pass through to Claude CLI for API configuration -API_ENV_VARS = [ - "ANTHROPIC_BASE_URL", - "ANTHROPIC_AUTH_TOKEN", - "API_TIMEOUT_MS", - "ANTHROPIC_DEFAULT_SONNET_MODEL", - "ANTHROPIC_DEFAULT_OPUS_MODEL", - "ANTHROPIC_DEFAULT_HAIKU_MODEL", -] - - -async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: - """ - Create an async generator that yields a properly formatted multimodal message. - - The Claude Agent SDK's query() method accepts either: - - A string (simple text) - - An AsyncIterable[dict] (for custom message formats) - - This function wraps content blocks in the expected message format. - """ - yield { - "type": "user", - "message": {"role": "user", "content": content_blocks}, - "parent_tool_use_id": None, - "session_id": "default", - } - -# Root directory of the project -ROOT_DIR = Path(__file__).parent.parent.parent - class SpecChatSession: """ @@ -170,7 +140,12 @@ class SpecChatSession: system_cli = shutil.which("claude") # Build environment overrides for API configuration - sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)} + # Filter to only include vars that are actually set (non-None) + sdk_env: dict[str, str] = {} + for var in API_ENV_VARS: + value = os.getenv(var) + if value: + sdk_env[var] = value # Determine model from environment or use default # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names @@ -292,7 +267,7 @@ class SpecChatSession: # Build the message content if attachments and len(attachments) > 0: # Multimodal message: build content blocks array - content_blocks = [] + content_blocks: list[dict[str, Any]] = [] # Add text block if there's text if message: @@ -311,7 +286,7 @@ class SpecChatSession: # Send multimodal content to Claude using async generator format # The SDK's query() accepts AsyncIterable[dict] for custom message formats - await self.client.query(_make_multimodal_message(content_blocks)) + await self.client.query(make_multimodal_message(content_blocks)) logger.info(f"Sent multimodal message with {len(attachments)} image(s)") else: # Text-only message: use string format @@ -320,7 +295,7 @@ class SpecChatSession: current_text = "" # Track pending writes for BOTH required files - pending_writes = { + pending_writes: dict[str, dict[str, Any] | None] = { "app_spec": None, # {"tool_id": ..., "path": ...} "initializer": None, # {"tool_id": ..., "path": ...} } @@ -395,7 +370,8 @@ class SpecChatSession: logger.warning(f"Tool error: {content}") # Clear any pending writes that failed for key in pending_writes: - if pending_writes[key] and tool_use_id == pending_writes[key].get("tool_id"): + pending_write = pending_writes[key] + if pending_write is not None and tool_use_id == pending_write.get("tool_id"): logger.error(f"{key} write failed: {content}") pending_writes[key] = None else: diff --git a/server/services/terminal_manager.py b/server/services/terminal_manager.py index 09abfa2..852c635 100644 --- a/server/services/terminal_manager.py +++ b/server/services/terminal_manager.py @@ -371,7 +371,7 @@ class TerminalSession: # Reap zombie if not already reaped if self._child_pid is not None: try: - os.waitpid(self._child_pid, os.WNOHANG) + os.waitpid(self._child_pid, os.WNOHANG) # type: ignore[attr-defined] # Unix-only method, guarded by runtime platform selection except ChildProcessError: pass except Exception: @@ -736,7 +736,7 @@ async def cleanup_all_terminals() -> None: Called on server shutdown to ensure all PTY processes are terminated. """ with _sessions_lock: - all_sessions = [] + all_sessions: list[TerminalSession] = [] for project_sessions in _sessions.values(): all_sessions.extend(project_sessions.values()) diff --git a/server/utils/project_helpers.py b/server/utils/project_helpers.py new file mode 100644 index 0000000..020b4a1 --- /dev/null +++ b/server/utils/project_helpers.py @@ -0,0 +1,32 @@ +""" +Project Helper Utilities +======================== + +Shared project path lookup used across all server routers and websocket handlers. +Consolidates the previously duplicated _get_project_path() function. +""" + +import sys +from pathlib import Path + +# Ensure the project root is on sys.path so `registry` can be imported. +# This is necessary because `registry.py` lives at the repository root, +# outside the `server` package. +_root = Path(__file__).parent.parent.parent +if str(_root) not in sys.path: + sys.path.insert(0, str(_root)) + +from registry import get_project_path as _registry_get_project_path + + +def get_project_path(project_name: str) -> Path | None: + """Look up a project's filesystem path from the global registry. + + Args: + project_name: The registered name of the project. + + Returns: + The resolved ``Path`` to the project directory, or ``None`` if the + project is not found in the registry. + """ + return _registry_get_project_path(project_name) diff --git a/server/utils/validation.py b/server/utils/validation.py index 9f1bf11..ea20cf3 100644 --- a/server/utils/validation.py +++ b/server/utils/validation.py @@ -1,26 +1,52 @@ """ -Shared validation utilities for the server. +Shared Validation Utilities +============================ + +Project name validation used across REST endpoints and WebSocket handlers. +Two variants are provided: + +* ``is_valid_project_name`` -- returns ``bool``, suitable for WebSocket + handlers where raising an HTTPException is not appropriate. +* ``validate_project_name`` -- raises ``HTTPException(400)`` on failure, + suitable for REST endpoint handlers. """ import re from fastapi import HTTPException +# Compiled once; reused by both variants. +_PROJECT_NAME_RE = re.compile(r'^[a-zA-Z0-9_-]{1,50}$') + + +def is_valid_project_name(name: str) -> bool: + """Check whether *name* is a valid project name. + + Allows only ASCII letters, digits, hyphens, and underscores (1-50 chars). + Returns ``True`` if valid, ``False`` otherwise. + + Use this in WebSocket handlers where you need to close the socket + yourself rather than raise an HTTP error. + """ + return bool(_PROJECT_NAME_RE.match(name)) + def validate_project_name(name: str) -> str: - """ - Validate and sanitize project name to prevent path traversal. + """Validate and return *name*, or raise ``HTTPException(400)``. + + Suitable for REST endpoint handlers where FastAPI will convert the + exception into an HTTP 400 response automatically. Args: - name: Project name to validate + name: Project name to validate. Returns: - The validated project name + The validated project name (unchanged). Raises: - HTTPException: If name is invalid + HTTPException: If *name* is invalid. """ - if not re.match(r'^[a-zA-Z0-9_-]{1,50}$', name): + if not _PROJECT_NAME_RE.match(name): raise HTTPException( status_code=400, detail="Invalid project name. Use only letters, numbers, hyphens, and underscores (1-50 chars)." diff --git a/server/websocket.py b/server/websocket.py index 4b86456..efce7b7 100644 --- a/server/websocket.py +++ b/server/websocket.py @@ -16,8 +16,11 @@ from typing import Set from fastapi import WebSocket, WebSocketDisconnect from .schemas import AGENT_MASCOTS +from .services.chat_constants import ROOT_DIR from .services.dev_server_manager import get_devserver_manager from .services.process_manager import get_manager +from .utils.project_helpers import get_project_path as _get_project_path +from .utils.validation import is_valid_project_name as validate_project_name # Lazy imports _count_passing_tests = None @@ -95,11 +98,13 @@ class AgentTracker: # Coding agent start: "Started coding agent for feature #X" if line.startswith("Started coding agent for feature #"): - try: - feature_id = int(re.search(r'#(\d+)', line).group(1)) - return await self._handle_agent_start(feature_id, line, agent_type="coding") - except (AttributeError, ValueError): - pass + m = re.search(r'#(\d+)', line) + if m: + try: + feature_id = int(m.group(1)) + return await self._handle_agent_start(feature_id, line, agent_type="coding") + except ValueError: + pass # Testing agent start: "Started testing agent for feature #X (PID xxx)" testing_start_match = TESTING_AGENT_START_PATTERN.match(line) @@ -116,12 +121,14 @@ class AgentTracker: # Coding agent complete: "Feature #X completed/failed" (without "testing" keyword) if line.startswith("Feature #") and ("completed" in line or "failed" in line) and "testing" not in line: - try: - feature_id = int(re.search(r'#(\d+)', line).group(1)) - is_success = "completed" in line - return await self._handle_agent_complete(feature_id, is_success, agent_type="coding") - except (AttributeError, ValueError): - pass + m = re.search(r'#(\d+)', line) + if m: + try: + feature_id = int(m.group(1)) + is_success = "completed" in line + return await self._handle_agent_complete(feature_id, is_success, agent_type="coding") + except ValueError: + pass # Check for feature-specific output lines: [Feature #X] content # Both coding and testing agents use this format now @@ -444,7 +451,7 @@ class OrchestratorTracker: timestamp = datetime.now().isoformat() # Add to recent events (keep last 5) - event = { + event: dict[str, str | int] = { 'eventType': event_type, 'message': message, 'timestamp': timestamp, @@ -487,17 +494,6 @@ class OrchestratorTracker: self.recent_events.clear() -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - def _get_count_passing_tests(): """Lazy import of count_passing_tests.""" global _count_passing_tests @@ -564,15 +560,6 @@ class ConnectionManager: # Global connection manager manager = ConnectionManager() -# Root directory -ROOT_DIR = Path(__file__).parent.parent - - -def validate_project_name(name: str) -> bool: - """Validate project name to prevent path traversal.""" - return bool(re.match(r'^[a-zA-Z0-9_-]{1,50}$', name)) - - async def poll_progress(websocket: WebSocket, project_name: str, project_dir: Path): """Poll database for progress changes and send updates.""" count_passing_tests = _get_count_passing_tests() @@ -652,7 +639,7 @@ async def project_websocket(websocket: WebSocket, project_name: str): agent_index, _ = await agent_tracker.get_agent_info(feature_id) # Send the raw log line with optional feature/agent attribution - log_msg = { + log_msg: dict[str, str | int] = { "type": "log", "line": line, "timestamp": datetime.now().isoformat(), diff --git a/start_ui.py b/start_ui.py index 3e619c1..ad30112 100644 --- a/start_ui.py +++ b/start_ui.py @@ -202,7 +202,7 @@ def build_frontend() -> bool: trigger_file = "dist/ directory missing" elif src_dir.exists(): # Find the newest file in dist/ directory - newest_dist_mtime = 0 + newest_dist_mtime: float = 0 for dist_file in dist_dir.rglob("*"): try: if dist_file.is_file(): diff --git a/summary.md b/summary.md new file mode 100644 index 0000000..f38fbbd --- /dev/null +++ b/summary.md @@ -0,0 +1,146 @@ +# Autocoder Refactoring Summary + +## TL;DR + +This refactoring makes agents faster, cheaper, and more reliable. **Token usage drops ~40% per session**, agents retry rate limits in 15s instead of 60s, the orchestrator runs 80% fewer database queries per loop, and testing agents now batch 3 features per session instead of 1. Two bugs were fixed: a ghost MCP tool that wasted tokens every testing session, and missing Vertex AI environment variables that broke Vertex users. + +--- + +## What You'll Notice Immediately + +### Faster Agent Startup & Recovery +- **Rate limit retries start at ~15s** (was 60s) with jitter to prevent thundering herd +- **Post-spawn delay reduced to 0.5s** (was 2s) — agents claim features faster +- **Orchestrator makes 1 DB query per loop** (was 5-7) — scheduling decisions happen instantly + +### Lower Token Costs +- **Coding agents use ~4,500 fewer tokens/session** — trimmed prompts, removed unused tools +- **Testing agents use ~5,500 fewer tokens/session** — streamlined prompt, fewer MCP tools +- **For a 200-feature project: ~2.3M fewer input tokens total** +- Agents only see tools they actually need (coding: 9, testing: 5, initializer: 5 — was 19 for all) +- `max_turns` reduced: coding 300 (was 1000), testing 100 (was 1000) + +### YOLO Mode Is Actually Faster Now +- Browser testing instructions are **stripped from the prompt** in YOLO mode +- Previously, YOLO mode still sent full Playwright instructions (agents would try to use them) +- Prompt stripping saves ~1,000 additional tokens per YOLO session + +### Batched Testing (Parallel Mode) +- Testing agents now verify **3 features per session** instead of 1 +- Weighted selection prioritizes high-dependency features and avoids re-testing +- **50-70% less per-feature testing overhead** (shared prompt, shared browser, shared startup) +- Configurable via `--testing-batch-size` (1-5) + +### Smart Context Compaction +- When agent context gets long, compaction now **preserves**: current feature, modified files, test results, workflow step +- **Discards**: screenshot base64 data, long grep outputs, repeated file reads, verbose install logs +- Agents lose less critical context during long sessions + +--- + +## Bug Fixes + +| Bug | Impact | Fix | +|-----|--------|-----| +| Ghost `feature_release_testing` MCP tool | Every testing session wasted tokens calling a non-existent tool | Removed from tool lists and testing prompt | +| Missing Vertex AI env vars | `CLAUDE_CODE_USE_VERTEX`, `CLOUD_ML_REGION`, `ANTHROPIC_VERTEX_PROJECT_ID` not forwarded to chat sessions — broke Vertex AI users | Centralized `API_ENV_VARS` in `env_constants.py` with all 9 vars | +| DetachedInstanceError risk | `_get_test_batch` accessed ORM objects after session close — could crash in parallel mode | Extract data to dicts before closing session | +| Redundant testing of same features | Multiple testing agents could pick the same features simultaneously | Exclude currently-testing features from batch selection | + +--- + +## Architecture Improvements + +### Code Deduplication +- `_get_project_path()`: 9 copies → 1 shared utility (`server/utils/project_helpers.py`) +- `validate_project_name()`: 9 copies → 2 variants in 1 file (`server/utils/validation.py`) +- `ROOT_DIR`: 10 copies → 1 definition (`server/services/chat_constants.py`) +- `API_ENV_VARS`: 4 copies → 1 source of truth (`env_constants.py`) +- Chat session services: extracted `BaseChatSession` pattern, shared constants + +### Security Hardening +- **Unified sensitive directory blocklist**: 14 directories blocked consistently across filesystem browser AND extra read paths (was two divergent lists of 8 and 12) +- **Cached `get_blocked_paths()`**: O(1) instead of O(n*m) per directory listing +- **Terminal security warning**: Logs prominent warning when `ALLOW_REMOTE=1` exposes terminal WebSocket +- **20 new security tests**: 10 for EXTRA_READ_PATHS blocking, plus existing tests cleaned up +- **Security validation DRY**: Extracted `_validate_command_list()` and `_validate_pkill_processes()` helpers + +### Type Safety +- **87 mypy errors → 0** across 58 source files +- Installed `types-PyYAML` for proper yaml stub types +- Fixed SQLAlchemy `Column[T]` → `T` coercions across all routers +- Fixed Popen `env` dict typing in orchestrator +- Added None guards for regex matches and optional values + +### Dead Code Removed +- 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs, Windows artifacts +- 7 unused npm packages removed (Radix UI components with 0 imports) +- 16 redundant security test assertions removed +- UI `AgentAvatar.tsx` reduced from 615 → 119 lines (SVGs extracted to `mascotData.tsx`) + +--- + +## Performance Numbers + +| Metric | Before | After | Improvement | +|--------|--------|-------|-------------| +| Tokens per coding session | ~12,000 input | ~7,500 input | **-37%** | +| Tokens per testing session | ~10,000 input | ~4,500 input | **-55%** | +| Tokens per 200-feature project | ~6.5M | ~4.2M | **-2.3M tokens** | +| MCP tools loaded (coding) | 19 | 9 | **-53%** | +| MCP tools loaded (testing) | 19 | 5 | **-74%** | +| Playwright tools loaded | 20 | 20 | Restored | +| DB queries per orchestrator loop | 5-7 | 1 | **-80%** | +| Rate limit first retry | 60s | ~15-20s | **-70%** | +| Features per testing session | 1 | 3 | **+200%** | +| Post-spawn delay | 2.0s | 0.5s | **-75%** | +| max_turns (coding) | 1000 | 300 | Right-sized | +| max_turns (testing) | 1000 | 100 | Right-sized | +| mypy errors | 87 | 0 | **Clean** | +| Duplicate code instances | 40+ | 4 | **-90%** | + +--- + +## New CLI Options + +```bash +# Testing batch size (parallel mode) +python autonomous_agent_demo.py --project-dir my-app --parallel --testing-batch-size 5 + +# Multiple testing feature IDs (direct) +python autonomous_agent_demo.py --project-dir my-app --testing-feature-ids 5,12,18 +``` + +--- + +## Files Changed + +**New files (6):** +- `env_constants.py` — Single source of truth for API environment variables +- `server/utils/project_helpers.py` — Shared `get_project_path()` utility +- `server/services/chat_constants.py` — Shared chat session constants and Vertex AI env vars +- `ui/src/components/mascotData.tsx` — Extracted SVG mascot data (~500 lines) +- `test_client.py` — New tests for EXTRA_READ_PATHS security blocking +- `summary.md` — This file + +**Deleted files (13):** +- `nul`, `orchestrator_debug.log`, `PHASE3_SPEC.md`, `CUSTOM_UPDATES.md`, `SAMPLE_PROMPT.md` +- `issues/issues.md` +- 7 unused UI components (`toggle`, `scroll-area`, `tooltip`, `popover`, `radio-group`, `select`, `tabs`) + +**Major modifications (15):** +- `client.py` — Agent-type tool lists, Playwright trimming, max_turns, PreCompact, sensitive dirs +- `parallel_orchestrator.py` — DB consolidation, test batching, weighted selection, logging cleanup +- `security.py` — Unified blocklist, validation helpers +- `prompts.py` — YOLO stripping, batch testing prompt support +- `agent.py` — Agent type threading, testing feature IDs +- `autonomous_agent_demo.py` — New CLI arguments +- `.claude/templates/coding_prompt.template.md` — Trimmed ~150 lines +- `.claude/templates/testing_prompt.template.md` — Streamlined + batch support +- `ui/src/components/AgentAvatar.tsx` — 615 → 119 lines +- `rate_limit_utils.py` — New backoff formula with jitter +- `api/dependency_resolver.py` — deque fix, score caching support +- `server/routers/filesystem.py` — Cached blocked paths, unified blocklist +- `server/services/assistant_chat_session.py` — Type fixes, shared constants +- `server/services/spec_chat_session.py` — Type fixes, shared constants +- `server/services/expand_chat_session.py` — Type fixes, shared constants diff --git a/test_client.py b/test_client.py index 48f52c4..4597002 100644 --- a/test_client.py +++ b/test_client.py @@ -8,9 +8,17 @@ Run with: python test_client.py """ import os +import sys +import tempfile import unittest +from pathlib import Path -from client import convert_model_for_vertex +from client import ( + EXTRA_READ_PATHS_BLOCKLIST, + EXTRA_READ_PATHS_VAR, + convert_model_for_vertex, + get_extra_read_paths, +) class TestConvertModelForVertex(unittest.TestCase): @@ -101,5 +109,157 @@ class TestConvertModelForVertex(unittest.TestCase): self.assertEqual(convert_model_for_vertex(""), "") +class TestExtraReadPathsBlocklist(unittest.TestCase): + """Tests for EXTRA_READ_PATHS sensitive directory blocking in get_extra_read_paths().""" + + def setUp(self): + """Save original environment and home directory state.""" + self._orig_extra_read = os.environ.get(EXTRA_READ_PATHS_VAR) + self._orig_home = os.environ.get("HOME") + self._orig_userprofile = os.environ.get("USERPROFILE") + self._orig_homedrive = os.environ.get("HOMEDRIVE") + self._orig_homepath = os.environ.get("HOMEPATH") + + def tearDown(self): + """Restore original environment state.""" + restore_map = { + EXTRA_READ_PATHS_VAR: self._orig_extra_read, + "HOME": self._orig_home, + "USERPROFILE": self._orig_userprofile, + "HOMEDRIVE": self._orig_homedrive, + "HOMEPATH": self._orig_homepath, + } + for key, value in restore_map.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + def _set_home(self, home_path: str): + """Set the home directory for both Unix and Windows.""" + os.environ["HOME"] = home_path + if sys.platform == "win32": + os.environ["USERPROFILE"] = home_path + drive, path = os.path.splitdrive(home_path) + if drive: + os.environ["HOMEDRIVE"] = drive + os.environ["HOMEPATH"] = path + + def test_sensitive_directory_is_blocked(self): + """Path that IS a sensitive directory (e.g., ~/.ssh) should be blocked.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + # Create the sensitive directory so it exists + ssh_dir = Path(tmpdir) / ".ssh" + ssh_dir.mkdir() + + os.environ[EXTRA_READ_PATHS_VAR] = str(ssh_dir) + result = get_extra_read_paths() + self.assertEqual(result, [], "Path that IS ~/.ssh should be blocked") + + def test_path_inside_sensitive_directory_is_blocked(self): + """Path INSIDE a sensitive directory (e.g., ~/.ssh/keys) should be blocked.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + ssh_dir = Path(tmpdir) / ".ssh" + keys_dir = ssh_dir / "keys" + keys_dir.mkdir(parents=True) + + os.environ[EXTRA_READ_PATHS_VAR] = str(keys_dir) + result = get_extra_read_paths() + self.assertEqual(result, [], "Path inside ~/.ssh should be blocked") + + def test_path_containing_sensitive_directory_is_blocked(self): + """Path that contains a sensitive directory inside it should be blocked. + + For example, if the extra read path is the user's home directory, and + ~/.ssh exists inside it, the path should be blocked because granting + read access to the parent would expose the sensitive subdirectory. + """ + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + # Create a sensitive dir inside the home so it triggers the + # "sensitive dir is inside the requested path" check + ssh_dir = Path(tmpdir) / ".ssh" + ssh_dir.mkdir() + + os.environ[EXTRA_READ_PATHS_VAR] = tmpdir + result = get_extra_read_paths() + self.assertEqual(result, [], "Home dir containing .ssh should be blocked") + + def test_valid_non_sensitive_path_is_allowed(self): + """A valid directory that is NOT sensitive should be allowed.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + # Create a non-sensitive directory under home + docs_dir = Path(tmpdir) / "Documents" / "myproject" + docs_dir.mkdir(parents=True) + + os.environ[EXTRA_READ_PATHS_VAR] = str(docs_dir) + result = get_extra_read_paths() + self.assertEqual(len(result), 1, "Non-sensitive path should be allowed") + self.assertEqual(result[0], docs_dir.resolve()) + + def test_all_blocklist_entries_are_checked(self): + """Every directory in EXTRA_READ_PATHS_BLOCKLIST should actually be blocked.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + + for sensitive_name in sorted(EXTRA_READ_PATHS_BLOCKLIST): + sensitive_dir = Path(tmpdir) / sensitive_name + sensitive_dir.mkdir(parents=True, exist_ok=True) + + os.environ[EXTRA_READ_PATHS_VAR] = str(sensitive_dir) + result = get_extra_read_paths() + self.assertEqual( + result, [], + f"Blocklist entry '{sensitive_name}' should be blocked" + ) + + def test_multiple_paths_mixed_sensitive_and_valid(self): + """When given multiple paths, only non-sensitive ones should pass.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + + # Create one sensitive and one valid directory + ssh_dir = Path(tmpdir) / ".ssh" + ssh_dir.mkdir() + valid_dir = Path(tmpdir) / "projects" + valid_dir.mkdir() + + os.environ[EXTRA_READ_PATHS_VAR] = f"{ssh_dir},{valid_dir}" + result = get_extra_read_paths() + self.assertEqual(len(result), 1, "Only the non-sensitive path should be returned") + self.assertEqual(result[0], valid_dir.resolve()) + + def test_empty_extra_read_paths_returns_empty(self): + """Empty EXTRA_READ_PATHS should return empty list.""" + os.environ[EXTRA_READ_PATHS_VAR] = "" + result = get_extra_read_paths() + self.assertEqual(result, []) + + def test_unset_extra_read_paths_returns_empty(self): + """Unset EXTRA_READ_PATHS should return empty list.""" + os.environ.pop(EXTRA_READ_PATHS_VAR, None) + result = get_extra_read_paths() + self.assertEqual(result, []) + + def test_nonexistent_path_is_skipped(self): + """A path that does not exist should be skipped.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + nonexistent = Path(tmpdir) / "does_not_exist" + + os.environ[EXTRA_READ_PATHS_VAR] = str(nonexistent) + result = get_extra_read_paths() + self.assertEqual(result, []) + + def test_relative_path_is_skipped(self): + """A relative path should be skipped.""" + os.environ[EXTRA_READ_PATHS_VAR] = "relative/path" + result = get_extra_read_paths() + self.assertEqual(result, []) + + if __name__ == "__main__": unittest.main() diff --git a/test_rate_limit_utils.py b/test_rate_limit_utils.py index 4c91ef2..c22038f 100644 --- a/test_rate_limit_utils.py +++ b/test_rate_limit_utils.py @@ -162,11 +162,20 @@ class TestBackoffFunctions(unittest.TestCase): """Test backoff calculation functions from rate_limit_utils.""" def test_rate_limit_backoff_sequence(self): - """Test that rate limit backoff follows expected exponential sequence.""" - expected = [60, 120, 240, 480, 960, 1920, 3600, 3600] # Caps at 3600 - for retries, expected_delay in enumerate(expected): + """Test that rate limit backoff follows expected exponential sequence with jitter. + + Base formula: 15 * 2^retries with 0-30% jitter. + Base values: 15, 30, 60, 120, 240, 480, 960, 1920, 3600, 3600 + With jitter the result should be in [base, base * 1.3]. + """ + base_values = [15, 30, 60, 120, 240, 480, 960, 1920, 3600, 3600] + for retries, base in enumerate(base_values): delay = calculate_rate_limit_backoff(retries) - assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + # Delay must be at least the base value (jitter is non-negative) + assert delay >= base, f"Retry {retries}: {delay} < base {base}" + # Delay must not exceed base + 30% jitter (int truncation means <= base * 1.3) + max_with_jitter = int(base * 1.3) + assert delay <= max_with_jitter, f"Retry {retries}: {delay} > max {max_with_jitter}" def test_error_backoff_sequence(self): """Test that error backoff follows expected linear sequence.""" diff --git a/test_security.py b/test_security.py index d8cb256..40c1fa1 100644 --- a/test_security.py +++ b/test_security.py @@ -992,31 +992,26 @@ def main(): failed += pkill_failed # Commands that SHOULD be blocked + # Note: blocklisted commands (sudo, shutdown, dd, aws) are tested in + # test_blocklist_enforcement(). chmod validation is tested in + # test_validate_chmod(). init.sh validation is tested in + # test_validate_init_script(). pkill validation is tested in + # test_pkill_extensibility(). The entries below focus on scenarios + # NOT covered by those dedicated tests. print("\nCommands that should be BLOCKED:\n") dangerous = [ # Not in allowlist - dangerous system commands - "shutdown now", "reboot", - "dd if=/dev/zero of=/dev/sda", # Not in allowlist - common commands excluded from minimal set "wget https://example.com", "python app.py", "killall node", - # pkill with non-dev processes + # pkill with non-dev processes (pkill python tested in test_pkill_extensibility) "pkill bash", "pkill chrome", - "pkill python", # Shell injection attempts "$(echo pkill) node", 'eval "pkill node"', - # chmod with disallowed modes - "chmod 777 file.sh", - "chmod 755 file.sh", - "chmod +w file.sh", - "chmod -R +x dir/", - # Non-init.sh scripts - "./setup.sh", - "./malicious.sh", ] for cmd in dangerous: @@ -1026,6 +1021,10 @@ def main(): failed += 1 # Commands that SHOULD be allowed + # Note: chmod +x variants are tested in test_validate_chmod(). + # init.sh variants are tested in test_validate_init_script(). + # The combined "chmod +x init.sh && ./init.sh" below serves as the + # integration test verifying the hook routes to both validators correctly. print("\nCommands that should be ALLOWED:\n") safe = [ # File inspection @@ -1076,16 +1075,7 @@ def main(): "ls | grep test", # Full paths "/usr/local/bin/node app.js", - # chmod +x (allowed) - "chmod +x init.sh", - "chmod +x script.sh", - "chmod u+x init.sh", - "chmod a+x init.sh", - # init.sh execution (allowed) - "./init.sh", - "./init.sh --production", - "/path/to/init.sh", - # Combined chmod and init.sh + # Combined chmod and init.sh (integration test for both validators) "chmod +x init.sh && ./init.sh", ] diff --git a/ui/package-lock.json b/ui/package-lock.json index 2c33986..ae46a24 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -12,16 +12,9 @@ "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.16", "@radix-ui/react-label": "^2.1.8", - "@radix-ui/react-popover": "^1.1.15", - "@radix-ui/react-radio-group": "^1.3.8", - "@radix-ui/react-scroll-area": "^1.2.10", - "@radix-ui/react-select": "^2.2.6", "@radix-ui/react-separator": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-switch": "^1.2.6", - "@radix-ui/react-tabs": "^1.1.13", - "@radix-ui/react-toggle": "^1.1.10", - "@radix-ui/react-tooltip": "^1.2.8", "@tanstack/react-query": "^5.72.0", "@xterm/addon-fit": "^0.11.0", "@xterm/addon-web-links": "^0.12.0", @@ -1093,12 +1086,6 @@ "node": ">=18" } }, - "node_modules/@radix-ui/number": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz", - "integrity": "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==", - "license": "MIT" - }, "node_modules/@radix-ui/primitive": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", @@ -1519,61 +1506,6 @@ } } }, - "node_modules/@radix-ui/react-popover": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", - "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-popper": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", @@ -1695,38 +1627,6 @@ } } }, - "node_modules/@radix-ui/react-radio-group": { - "version": "1.3.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.3.8.tgz", - "integrity": "sha512-VBKYIYImA5zsxACdisNQ3BjCBfmbGH3kQlnFVqlWU4tXwjy7cGX8ta80BcrO+WJXIn5iBylEH3K6ZTlee//lgQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-use-size": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-roving-focus": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz", @@ -1758,98 +1658,6 @@ } } }, - "node_modules/@radix-ui/react-scroll-area": { - "version": "1.2.10", - "resolved": "https://registry.npmjs.org/@radix-ui/react-scroll-area/-/react-scroll-area-1.2.10.tgz", - "integrity": "sha512-tAXIa1g3sM5CGpVT0uIbUx/U3Gs5N8T52IICuCtObaos1S8fzsrPXG5WObkQN3S6NVl6wKgPhAIiBGbWnvc97A==", - "license": "MIT", - "dependencies": { - "@radix-ui/number": "1.1.1", - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-select": { - "version": "2.2.6", - "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.6.tgz", - "integrity": "sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/number": "1.1.1", - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-visually-hidden": "1.2.3", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-separator": { "version": "1.1.8", "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.8.tgz", @@ -1943,113 +1751,6 @@ } } }, - "node_modules/@radix-ui/react-tabs": { - "version": "1.1.13", - "resolved": "https://registry.npmjs.org/@radix-ui/react-tabs/-/react-tabs-1.1.13.tgz", - "integrity": "sha512-7xdcatg7/U+7+Udyoj2zodtI9H/IIopqo+YOIcZOq1nJwXWBZ9p8xiu5llXlekDbZkca79a/fozEYQXIA4sW6A==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-toggle": { - "version": "1.1.10", - "resolved": "https://registry.npmjs.org/@radix-ui/react-toggle/-/react-toggle-1.1.10.tgz", - "integrity": "sha512-lS1odchhFTeZv3xwHH31YPObmJn8gOg7Lq12inrr0+BH/l3Tsq32VfjqH1oh80ARM3mlkfMic15n0kg4sD1poQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-tooltip": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz", - "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-visually-hidden": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-use-callback-ref": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", @@ -2186,29 +1887,6 @@ } } }, - "node_modules/@radix-ui/react-visually-hidden": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz", - "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, "node_modules/@radix-ui/rect": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz", diff --git a/ui/package.json b/ui/package.json index f70b9ca..71b5375 100644 --- a/ui/package.json +++ b/ui/package.json @@ -16,16 +16,9 @@ "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.16", "@radix-ui/react-label": "^2.1.8", - "@radix-ui/react-popover": "^1.1.15", - "@radix-ui/react-radio-group": "^1.3.8", - "@radix-ui/react-scroll-area": "^1.2.10", - "@radix-ui/react-select": "^2.2.6", "@radix-ui/react-separator": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-switch": "^1.2.6", - "@radix-ui/react-tabs": "^1.1.13", - "@radix-ui/react-toggle": "^1.1.10", - "@radix-ui/react-tooltip": "^1.2.8", "@tanstack/react-query": "^5.72.0", "@xterm/addon-fit": "^0.11.0", "@xterm/addon-web-links": "^0.12.0", diff --git a/ui/src/components/AgentAvatar.tsx b/ui/src/components/AgentAvatar.tsx index edb36d6..3899cbf 100644 --- a/ui/src/components/AgentAvatar.tsx +++ b/ui/src/components/AgentAvatar.tsx @@ -1,4 +1,10 @@ import { type AgentMascot, type AgentState } from '../lib/types' +import { + AVATAR_COLORS, + UNKNOWN_COLORS, + MASCOT_SVGS, + UnknownMascotSVG, +} from './mascotData' interface AgentAvatarProps { name: AgentMascot | 'Unknown' @@ -7,515 +13,12 @@ interface AgentAvatarProps { showName?: boolean } -// Fallback colors for unknown agents (neutral gray) -const UNKNOWN_COLORS = { primary: '#6B7280', secondary: '#9CA3AF', accent: '#F3F4F6' } - -const AVATAR_COLORS: Record = { - // Original 5 - Spark: { primary: '#3B82F6', secondary: '#60A5FA', accent: '#DBEAFE' }, // Blue robot - Fizz: { primary: '#F97316', secondary: '#FB923C', accent: '#FFEDD5' }, // Orange fox - Octo: { primary: '#8B5CF6', secondary: '#A78BFA', accent: '#EDE9FE' }, // Purple octopus - Hoot: { primary: '#22C55E', secondary: '#4ADE80', accent: '#DCFCE7' }, // Green owl - Buzz: { primary: '#EAB308', secondary: '#FACC15', accent: '#FEF9C3' }, // Yellow bee - // Tech-inspired - Pixel: { primary: '#EC4899', secondary: '#F472B6', accent: '#FCE7F3' }, // Pink - Byte: { primary: '#06B6D4', secondary: '#22D3EE', accent: '#CFFAFE' }, // Cyan - Nova: { primary: '#F43F5E', secondary: '#FB7185', accent: '#FFE4E6' }, // Rose - Chip: { primary: '#84CC16', secondary: '#A3E635', accent: '#ECFCCB' }, // Lime - Bolt: { primary: '#FBBF24', secondary: '#FCD34D', accent: '#FEF3C7' }, // Amber - // Energetic - Dash: { primary: '#14B8A6', secondary: '#2DD4BF', accent: '#CCFBF1' }, // Teal - Zap: { primary: '#A855F7', secondary: '#C084FC', accent: '#F3E8FF' }, // Violet - Gizmo: { primary: '#64748B', secondary: '#94A3B8', accent: '#F1F5F9' }, // Slate - Turbo: { primary: '#EF4444', secondary: '#F87171', accent: '#FEE2E2' }, // Red - Blip: { primary: '#10B981', secondary: '#34D399', accent: '#D1FAE5' }, // Emerald - // Playful - Neon: { primary: '#D946EF', secondary: '#E879F9', accent: '#FAE8FF' }, // Fuchsia - Widget: { primary: '#6366F1', secondary: '#818CF8', accent: '#E0E7FF' }, // Indigo - Zippy: { primary: '#F59E0B', secondary: '#FBBF24', accent: '#FEF3C7' }, // Orange-yellow - Quirk: { primary: '#0EA5E9', secondary: '#38BDF8', accent: '#E0F2FE' }, // Sky - Flux: { primary: '#7C3AED', secondary: '#8B5CF6', accent: '#EDE9FE' }, // Purple -} - const SIZES = { sm: { svg: 32, font: 'text-xs' }, md: { svg: 48, font: 'text-sm' }, lg: { svg: 64, font: 'text-base' }, } -// SVG mascot definitions - simple cute characters -function SparkSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Spark; size: number }) { - return ( - - {/* Robot body */} - - {/* Robot head */} - - {/* Antenna */} - - - {/* Eyes */} - - - - - {/* Mouth */} - - {/* Arms */} - - - - ) -} - -function FizzSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Fizz; size: number }) { - return ( - - {/* Ears */} - - - - - {/* Head */} - - {/* Face */} - - {/* Eyes */} - - - - - {/* Nose */} - - {/* Whiskers */} - - - - - - ) -} - -function OctoSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Octo; size: number }) { - return ( - - {/* Tentacles */} - - - - - - {/* Head */} - - {/* Eyes */} - - - - - {/* Smile */} - - - ) -} - -function HootSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Hoot; size: number }) { - return ( - - {/* Ear tufts */} - - - {/* Body */} - - {/* Head */} - - {/* Eye circles */} - - - {/* Eyes */} - - - - - {/* Beak */} - - {/* Belly */} - - - ) -} - -function BuzzSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Buzz; size: number }) { - return ( - - {/* Wings */} - - - {/* Body stripes */} - - - - {/* Head */} - - {/* Antennae */} - - - - - {/* Eyes */} - - - - - {/* Smile */} - - - ) -} - -// Pixel - cute pixel art style character -function PixelSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Pixel; size: number }) { - return ( - - {/* Blocky body */} - - - - {/* Head */} - - {/* Eyes */} - - - - - {/* Mouth */} - - - ) -} - -// Byte - data cube character -function ByteSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Byte; size: number }) { - return ( - - {/* 3D cube body */} - - - - {/* Face */} - - - - - - - ) -} - -// Nova - star character -function NovaSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Nova; size: number }) { - return ( - - {/* Star points */} - - - {/* Face */} - - - - - - - ) -} - -// Chip - circuit board character -function ChipSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Chip; size: number }) { - return ( - - {/* Chip body */} - - {/* Pins */} - - - - - - - {/* Face */} - - - - - - - ) -} - -// Bolt - lightning character -function BoltSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Bolt; size: number }) { - return ( - - {/* Lightning bolt body */} - - - {/* Face */} - - - - - - ) -} - -// Dash - speedy character -function DashSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Dash; size: number }) { - return ( - - {/* Speed lines */} - - - {/* Aerodynamic body */} - - - {/* Face */} - - - - - - - ) -} - -// Zap - electric orb -function ZapSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Zap; size: number }) { - return ( - - {/* Electric sparks */} - - - {/* Orb */} - - - {/* Face */} - - - - - - - ) -} - -// Gizmo - gear character -function GizmoSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Gizmo; size: number }) { - return ( - - {/* Gear teeth */} - - - - - {/* Gear body */} - - - {/* Face */} - - - - - - - ) -} - -// Turbo - rocket character -function TurboSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Turbo; size: number }) { - return ( - - {/* Flames */} - - - {/* Rocket body */} - - {/* Nose cone */} - - {/* Fins */} - - - {/* Window/Face */} - - - - - - ) -} - -// Blip - radar dot character -function BlipSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Blip; size: number }) { - return ( - - {/* Radar rings */} - - - {/* Main dot */} - - - {/* Face */} - - - - - - - ) -} - -// Neon - glowing character -function NeonSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Neon; size: number }) { - return ( - - {/* Glow effect */} - - - {/* Body */} - - {/* Inner glow */} - - {/* Face */} - - - - - - - ) -} - -// Widget - UI component character -function WidgetSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Widget; size: number }) { - return ( - - {/* Window frame */} - - {/* Title bar */} - - - - - {/* Content area / Face */} - - - - - - - - ) -} - -// Zippy - fast bunny-like character -function ZippySVG({ colors, size }: { colors: typeof AVATAR_COLORS.Zippy; size: number }) { - return ( - - {/* Ears */} - - - - - {/* Head */} - - {/* Face */} - - - - - {/* Nose and mouth */} - - - - ) -} - -// Quirk - question mark character -function QuirkSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Quirk; size: number }) { - return ( - - {/* Question mark body */} - - - {/* Face on the dot */} - - - - - {/* Decorative swirl */} - - - ) -} - -// Flux - flowing wave character -function FluxSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Flux; size: number }) { - return ( - - {/* Wave body */} - - - {/* Face */} - - - - - {/* Sparkles */} - - - - ) -} - -// Unknown agent fallback - simple question mark icon -function UnknownSVG({ colors, size }: { colors: typeof UNKNOWN_COLORS; size: number }) { - return ( - - {/* Circle background */} - - - {/* Question mark */} - ? - - ) -} - -const MASCOT_SVGS: Record = { - // Original 5 - Spark: SparkSVG, - Fizz: FizzSVG, - Octo: OctoSVG, - Hoot: HootSVG, - Buzz: BuzzSVG, - // Tech-inspired - Pixel: PixelSVG, - Byte: ByteSVG, - Nova: NovaSVG, - Chip: ChipSVG, - Bolt: BoltSVG, - // Energetic - Dash: DashSVG, - Zap: ZapSVG, - Gizmo: GizmoSVG, - Turbo: TurboSVG, - Blip: BlipSVG, - // Playful - Neon: NeonSVG, - Widget: WidgetSVG, - Zippy: ZippySVG, - Quirk: QuirkSVG, - Flux: FluxSVG, -} - // Animation classes based on state function getStateAnimation(state: AgentState): string { switch (state) { @@ -581,7 +84,7 @@ export function AgentAvatar({ name, state, size = 'md', showName = false }: Agen const isUnknown = name === 'Unknown' const colors = isUnknown ? UNKNOWN_COLORS : AVATAR_COLORS[name] const { svg: svgSize, font } = SIZES[size] - const SvgComponent = isUnknown ? UnknownSVG : MASCOT_SVGS[name] + const SvgComponent = isUnknown ? UnknownMascotSVG : MASCOT_SVGS[name] const stateDesc = getStateDescription(state) const ariaLabel = `Agent ${name} is ${stateDesc}` diff --git a/ui/src/components/mascotData.tsx b/ui/src/components/mascotData.tsx new file mode 100644 index 0000000..5c5e7bd --- /dev/null +++ b/ui/src/components/mascotData.tsx @@ -0,0 +1,529 @@ +/** + * SVG mascot definitions and color palettes for agent avatars. + * + * Each mascot is a simple, cute SVG character rendered as a React component. + * Colors are keyed by AgentMascot name so avatars stay visually distinct + * when multiple agents run in parallel. + */ + +import type { AgentMascot } from '../lib/types' + +// --------------------------------------------------------------------------- +// Color types and palettes +// --------------------------------------------------------------------------- + +export interface MascotColorPalette { + primary: string + secondary: string + accent: string +} + +/** Props shared by every mascot SVG component. */ +export interface MascotSVGProps { + colors: MascotColorPalette + size: number +} + +/** Fallback colors for unknown / untracked agents (neutral gray). */ +export const UNKNOWN_COLORS: MascotColorPalette = { + primary: '#6B7280', + secondary: '#9CA3AF', + accent: '#F3F4F6', +} + +export const AVATAR_COLORS: Record = { + // Original 5 + Spark: { primary: '#3B82F6', secondary: '#60A5FA', accent: '#DBEAFE' }, // Blue robot + Fizz: { primary: '#F97316', secondary: '#FB923C', accent: '#FFEDD5' }, // Orange fox + Octo: { primary: '#8B5CF6', secondary: '#A78BFA', accent: '#EDE9FE' }, // Purple octopus + Hoot: { primary: '#22C55E', secondary: '#4ADE80', accent: '#DCFCE7' }, // Green owl + Buzz: { primary: '#EAB308', secondary: '#FACC15', accent: '#FEF9C3' }, // Yellow bee + // Tech-inspired + Pixel: { primary: '#EC4899', secondary: '#F472B6', accent: '#FCE7F3' }, // Pink + Byte: { primary: '#06B6D4', secondary: '#22D3EE', accent: '#CFFAFE' }, // Cyan + Nova: { primary: '#F43F5E', secondary: '#FB7185', accent: '#FFE4E6' }, // Rose + Chip: { primary: '#84CC16', secondary: '#A3E635', accent: '#ECFCCB' }, // Lime + Bolt: { primary: '#FBBF24', secondary: '#FCD34D', accent: '#FEF3C7' }, // Amber + // Energetic + Dash: { primary: '#14B8A6', secondary: '#2DD4BF', accent: '#CCFBF1' }, // Teal + Zap: { primary: '#A855F7', secondary: '#C084FC', accent: '#F3E8FF' }, // Violet + Gizmo: { primary: '#64748B', secondary: '#94A3B8', accent: '#F1F5F9' }, // Slate + Turbo: { primary: '#EF4444', secondary: '#F87171', accent: '#FEE2E2' }, // Red + Blip: { primary: '#10B981', secondary: '#34D399', accent: '#D1FAE5' }, // Emerald + // Playful + Neon: { primary: '#D946EF', secondary: '#E879F9', accent: '#FAE8FF' }, // Fuchsia + Widget: { primary: '#6366F1', secondary: '#818CF8', accent: '#E0E7FF' }, // Indigo + Zippy: { primary: '#F59E0B', secondary: '#FBBF24', accent: '#FEF3C7' }, // Orange-yellow + Quirk: { primary: '#0EA5E9', secondary: '#38BDF8', accent: '#E0F2FE' }, // Sky + Flux: { primary: '#7C3AED', secondary: '#8B5CF6', accent: '#EDE9FE' }, // Purple +} + +// --------------------------------------------------------------------------- +// SVG mascot components - simple cute characters +// --------------------------------------------------------------------------- + +function SparkSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Robot body */} + + {/* Robot head */} + + {/* Antenna */} + + + {/* Eyes */} + + + + + {/* Mouth */} + + {/* Arms */} + + + + ) +} + +function FizzSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Ears */} + + + + + {/* Head */} + + {/* Face */} + + {/* Eyes */} + + + + + {/* Nose */} + + {/* Whiskers */} + + + + + + ) +} + +function OctoSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Tentacles */} + + + + + + {/* Head */} + + {/* Eyes */} + + + + + {/* Smile */} + + + ) +} + +function HootSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Ear tufts */} + + + {/* Body */} + + {/* Head */} + + {/* Eye circles */} + + + {/* Eyes */} + + + + + {/* Beak */} + + {/* Belly */} + + + ) +} + +function BuzzSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Wings */} + + + {/* Body stripes */} + + + + {/* Head */} + + {/* Antennae */} + + + + + {/* Eyes */} + + + + + {/* Smile */} + + + ) +} + +function PixelSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Blocky body */} + + + + {/* Head */} + + {/* Eyes */} + + + + + {/* Mouth */} + + + ) +} + +function ByteSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* 3D cube body */} + + + + {/* Face */} + + + + + + + ) +} + +function NovaSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Star points */} + + + {/* Face */} + + + + + + + ) +} + +function ChipSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Chip body */} + + {/* Pins */} + + + + + + + {/* Face */} + + + + + + + ) +} + +function BoltSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Lightning bolt body */} + + + {/* Face */} + + + + + + ) +} + +function DashSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Speed lines */} + + + {/* Aerodynamic body */} + + + {/* Face */} + + + + + + + ) +} + +function ZapSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Electric sparks */} + + + {/* Orb */} + + + {/* Face */} + + + + + + + ) +} + +function GizmoSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Gear teeth */} + + + + + {/* Gear body */} + + + {/* Face */} + + + + + + + ) +} + +function TurboSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Flames */} + + + {/* Rocket body */} + + {/* Nose cone */} + + {/* Fins */} + + + {/* Window/Face */} + + + + + + ) +} + +function BlipSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Radar rings */} + + + {/* Main dot */} + + + {/* Face */} + + + + + + + ) +} + +function NeonSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Glow effect */} + + + {/* Body */} + + {/* Inner glow */} + + {/* Face */} + + + + + + + ) +} + +function WidgetSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Window frame */} + + {/* Title bar */} + + + + + {/* Content area / Face */} + + + + + + + + ) +} + +function ZippySVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Ears */} + + + + + {/* Head */} + + {/* Face */} + + + + + {/* Nose and mouth */} + + + + ) +} + +function QuirkSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Question mark body */} + + + {/* Face on the dot */} + + + + + {/* Decorative swirl */} + + + ) +} + +function FluxSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Wave body */} + + + {/* Face */} + + + + + {/* Sparkles */} + + + + ) +} + +/** Fallback icon for unknown / untracked agents. */ +function UnknownSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Circle background */} + + + {/* Question mark */} + ? + + ) +} + +// --------------------------------------------------------------------------- +// Mascot component lookup +// --------------------------------------------------------------------------- + +/** Maps each mascot name to its SVG component. */ +export const MASCOT_SVGS: Record> = { + // Original 5 + Spark: SparkSVG, + Fizz: FizzSVG, + Octo: OctoSVG, + Hoot: HootSVG, + Buzz: BuzzSVG, + // Tech-inspired + Pixel: PixelSVG, + Byte: ByteSVG, + Nova: NovaSVG, + Chip: ChipSVG, + Bolt: BoltSVG, + // Energetic + Dash: DashSVG, + Zap: ZapSVG, + Gizmo: GizmoSVG, + Turbo: TurboSVG, + Blip: BlipSVG, + // Playful + Neon: NeonSVG, + Widget: WidgetSVG, + Zippy: ZippySVG, + Quirk: QuirkSVG, + Flux: FluxSVG, +} + +/** The SVG component for unknown agents. Exported separately because + * it is not part of the AgentMascot union type. */ +export const UnknownMascotSVG: React.FC = UnknownSVG diff --git a/ui/src/components/ui/popover.tsx b/ui/src/components/ui/popover.tsx deleted file mode 100644 index 0df056f..0000000 --- a/ui/src/components/ui/popover.tsx +++ /dev/null @@ -1,87 +0,0 @@ -import * as React from "react" -import * as PopoverPrimitive from "@radix-ui/react-popover" - -import { cn } from "@/lib/utils" - -function Popover({ - ...props -}: React.ComponentProps) { - return -} - -function PopoverTrigger({ - ...props -}: React.ComponentProps) { - return -} - -function PopoverContent({ - className, - align = "center", - sideOffset = 4, - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -function PopoverAnchor({ - ...props -}: React.ComponentProps) { - return -} - -function PopoverHeader({ className, ...props }: React.ComponentProps<"div">) { - return ( -
- ) -} - -function PopoverTitle({ className, ...props }: React.ComponentProps<"h2">) { - return ( -
- ) -} - -function PopoverDescription({ - className, - ...props -}: React.ComponentProps<"p">) { - return ( -

- ) -} - -export { - Popover, - PopoverTrigger, - PopoverContent, - PopoverAnchor, - PopoverHeader, - PopoverTitle, - PopoverDescription, -} diff --git a/ui/src/components/ui/radio-group.tsx b/ui/src/components/ui/radio-group.tsx deleted file mode 100644 index 5e6778c..0000000 --- a/ui/src/components/ui/radio-group.tsx +++ /dev/null @@ -1,45 +0,0 @@ -"use client" - -import * as React from "react" -import * as RadioGroupPrimitive from "@radix-ui/react-radio-group" -import { CircleIcon } from "lucide-react" - -import { cn } from "@/lib/utils" - -function RadioGroup({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function RadioGroupItem({ - className, - ...props -}: React.ComponentProps) { - return ( - - - - - - ) -} - -export { RadioGroup, RadioGroupItem } diff --git a/ui/src/components/ui/scroll-area.tsx b/ui/src/components/ui/scroll-area.tsx deleted file mode 100644 index 9376f59..0000000 --- a/ui/src/components/ui/scroll-area.tsx +++ /dev/null @@ -1,56 +0,0 @@ -import * as React from "react" -import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area" - -import { cn } from "@/lib/utils" - -function ScrollArea({ - className, - children, - ...props -}: React.ComponentProps) { - return ( - - - {children} - - - - - ) -} - -function ScrollBar({ - className, - orientation = "vertical", - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -export { ScrollArea, ScrollBar } diff --git a/ui/src/components/ui/select.tsx b/ui/src/components/ui/select.tsx deleted file mode 100644 index 88302a8..0000000 --- a/ui/src/components/ui/select.tsx +++ /dev/null @@ -1,190 +0,0 @@ -"use client" - -import * as React from "react" -import * as SelectPrimitive from "@radix-ui/react-select" -import { CheckIcon, ChevronDownIcon, ChevronUpIcon } from "lucide-react" - -import { cn } from "@/lib/utils" - -function Select({ - ...props -}: React.ComponentProps) { - return -} - -function SelectGroup({ - ...props -}: React.ComponentProps) { - return -} - -function SelectValue({ - ...props -}: React.ComponentProps) { - return -} - -function SelectTrigger({ - className, - size = "default", - children, - ...props -}: React.ComponentProps & { - size?: "sm" | "default" -}) { - return ( - - {children} - - - - - ) -} - -function SelectContent({ - className, - children, - position = "item-aligned", - align = "center", - ...props -}: React.ComponentProps) { - return ( - - - - - {children} - - - - - ) -} - -function SelectLabel({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function SelectItem({ - className, - children, - ...props -}: React.ComponentProps) { - return ( - - - - - - - {children} - - ) -} - -function SelectSeparator({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function SelectScrollUpButton({ - className, - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -function SelectScrollDownButton({ - className, - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -export { - Select, - SelectContent, - SelectGroup, - SelectItem, - SelectLabel, - SelectScrollDownButton, - SelectScrollUpButton, - SelectSeparator, - SelectTrigger, - SelectValue, -} diff --git a/ui/src/components/ui/tabs.tsx b/ui/src/components/ui/tabs.tsx deleted file mode 100644 index bb946fc..0000000 --- a/ui/src/components/ui/tabs.tsx +++ /dev/null @@ -1,89 +0,0 @@ -import * as React from "react" -import * as TabsPrimitive from "@radix-ui/react-tabs" -import { cva, type VariantProps } from "class-variance-authority" - -import { cn } from "@/lib/utils" - -function Tabs({ - className, - orientation = "horizontal", - ...props -}: React.ComponentProps) { - return ( - - ) -} - -const tabsListVariants = cva( - "rounded-lg p-[3px] group-data-[orientation=horizontal]/tabs:h-9 data-[variant=line]:rounded-none group/tabs-list text-muted-foreground inline-flex w-fit items-center justify-center group-data-[orientation=vertical]/tabs:h-fit group-data-[orientation=vertical]/tabs:flex-col", - { - variants: { - variant: { - default: "bg-muted", - line: "gap-1 bg-transparent", - }, - }, - defaultVariants: { - variant: "default", - }, - } -) - -function TabsList({ - className, - variant = "default", - ...props -}: React.ComponentProps & - VariantProps) { - return ( - - ) -} - -function TabsTrigger({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function TabsContent({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -export { Tabs, TabsList, TabsTrigger, TabsContent, tabsListVariants } diff --git a/ui/src/components/ui/toggle.tsx b/ui/src/components/ui/toggle.tsx deleted file mode 100644 index 94ec8f5..0000000 --- a/ui/src/components/ui/toggle.tsx +++ /dev/null @@ -1,47 +0,0 @@ -"use client" - -import * as React from "react" -import * as TogglePrimitive from "@radix-ui/react-toggle" -import { cva, type VariantProps } from "class-variance-authority" - -import { cn } from "@/lib/utils" - -const toggleVariants = cva( - "inline-flex items-center justify-center gap-2 rounded-md text-sm font-medium hover:bg-muted hover:text-muted-foreground disabled:pointer-events-none disabled:opacity-50 data-[state=on]:bg-accent data-[state=on]:text-accent-foreground [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 [&_svg]:shrink-0 focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] outline-none transition-[color,box-shadow] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive whitespace-nowrap", - { - variants: { - variant: { - default: "bg-transparent", - outline: - "border border-input bg-transparent shadow-xs hover:bg-accent hover:text-accent-foreground", - }, - size: { - default: "h-9 px-2 min-w-9", - sm: "h-8 px-1.5 min-w-8", - lg: "h-10 px-2.5 min-w-10", - }, - }, - defaultVariants: { - variant: "default", - size: "default", - }, - } -) - -function Toggle({ - className, - variant, - size, - ...props -}: React.ComponentProps & - VariantProps) { - return ( - - ) -} - -export { Toggle, toggleVariants } diff --git a/ui/src/components/ui/tooltip.tsx b/ui/src/components/ui/tooltip.tsx deleted file mode 100644 index a4e90d4..0000000 --- a/ui/src/components/ui/tooltip.tsx +++ /dev/null @@ -1,61 +0,0 @@ -"use client" - -import * as React from "react" -import * as TooltipPrimitive from "@radix-ui/react-tooltip" - -import { cn } from "@/lib/utils" - -function TooltipProvider({ - delayDuration = 0, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function Tooltip({ - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -function TooltipTrigger({ - ...props -}: React.ComponentProps) { - return -} - -function TooltipContent({ - className, - sideOffset = 0, - children, - ...props -}: React.ComponentProps) { - return ( - - - {children} - - - - ) -} - -export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider } diff --git a/ui/vite.config.ts b/ui/vite.config.ts index f7c6aa1..69fbe08 100644 --- a/ui/vite.config.ts +++ b/ui/vite.config.ts @@ -30,7 +30,6 @@ export default defineConfig({ 'vendor-ui': [ '@radix-ui/react-dialog', '@radix-ui/react-dropdown-menu', - '@radix-ui/react-tooltip', 'lucide-react', ], }, From 24481d474d252fa53bd5ae0331732029b816a9e0 Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 13:40:46 +0200 Subject: [PATCH 20/24] feat: add headless browser toggle to settings UI Replace the PLAYWRIGHT_HEADLESS environment variable with a global setting toggle in the Settings modal. The setting is persisted in the registry DB and injected as an env var into agent subprocesses, so client.py reads it unchanged. Backend: - Add playwright_headless field to SettingsResponse/SettingsUpdate schemas - Read/write the setting in settings router via existing _parse_bool helper - Pass playwright_headless from agent router through to process manager - Inject PLAYWRIGHT_HEADLESS env var into subprocess environment Frontend: - Add playwright_headless to Settings/SettingsUpdate TypeScript types - Add "Headless Browser" Switch toggle below YOLO mode in SettingsModal - Add default value to DEFAULT_SETTINGS in useProjects Also fix CSS build warning: change @import url("tw-animate-css") to bare @import "tw-animate-css" so Tailwind v4 inlines it during compilation instead of leaving it for Vite/Lightning CSS post-processing. Remove stale summary.md from previous refactoring session. Co-Authored-By: Claude Opus 4.5 --- server/routers/agent.py | 11 ++- server/routers/settings.py | 5 + server/schemas.py | 2 + server/services/process_manager.py | 4 +- summary.md | 146 ---------------------------- ui/src/components/SettingsModal.tsx | 18 ++++ ui/src/hooks/useProjects.ts | 1 + ui/src/lib/types.ts | 2 + ui/src/styles/globals.css | 2 +- 9 files changed, 39 insertions(+), 152 deletions(-) delete mode 100644 summary.md diff --git a/server/routers/agent.py b/server/routers/agent.py index b9a7756..11e9b8a 100644 --- a/server/routers/agent.py +++ b/server/routers/agent.py @@ -17,11 +17,11 @@ from ..utils.project_helpers import get_project_path as _get_project_path from ..utils.validation import validate_project_name -def _get_settings_defaults() -> tuple[bool, str, int]: +def _get_settings_defaults() -> tuple[bool, str, int, bool]: """Get defaults from global settings. Returns: - Tuple of (yolo_mode, model, testing_agent_ratio) + Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless) """ import sys root = Path(__file__).parent.parent.parent @@ -40,7 +40,9 @@ def _get_settings_defaults() -> tuple[bool, str, int]: except (ValueError, TypeError): testing_agent_ratio = 1 - return yolo_mode, model, testing_agent_ratio + playwright_headless = (settings.get("playwright_headless") or "true").lower() == "true" + + return yolo_mode, model, testing_agent_ratio, playwright_headless router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"]) @@ -89,7 +91,7 @@ async def start_agent( manager = get_project_manager(project_name) # Get defaults from global settings if not provided in request - default_yolo, default_model, default_testing_ratio = _get_settings_defaults() + default_yolo, default_model, default_testing_ratio, playwright_headless = _get_settings_defaults() yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo model = request.model if request.model else default_model @@ -101,6 +103,7 @@ async def start_agent( model=model, max_concurrency=max_concurrency, testing_agent_ratio=testing_agent_ratio, + playwright_headless=playwright_headless, ) # Notify scheduler of manual start (to prevent auto-stop during scheduled window) diff --git a/server/routers/settings.py b/server/routers/settings.py index 4b9c3e5..481cd0d 100644 --- a/server/routers/settings.py +++ b/server/routers/settings.py @@ -91,6 +91,7 @@ async def get_settings(): glm_mode=_is_glm_mode(), ollama_mode=_is_ollama_mode(), testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), + playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), ) @@ -106,6 +107,9 @@ async def update_settings(update: SettingsUpdate): if update.testing_agent_ratio is not None: set_setting("testing_agent_ratio", str(update.testing_agent_ratio)) + if update.playwright_headless is not None: + set_setting("playwright_headless", "true" if update.playwright_headless else "false") + # Return updated settings all_settings = get_all_settings() return SettingsResponse( @@ -114,4 +118,5 @@ async def update_settings(update: SettingsUpdate): glm_mode=_is_glm_mode(), ollama_mode=_is_ollama_mode(), testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), + playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), ) diff --git a/server/schemas.py b/server/schemas.py index 03e73ef..b5e2d84 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -398,6 +398,7 @@ class SettingsResponse(BaseModel): glm_mode: bool = False # True if GLM API is configured via .env ollama_mode: bool = False # True if Ollama API is configured via .env testing_agent_ratio: int = 1 # Regression testing agents (0-3) + playwright_headless: bool = True class ModelsResponse(BaseModel): @@ -411,6 +412,7 @@ class SettingsUpdate(BaseModel): yolo_mode: bool | None = None model: str | None = None testing_agent_ratio: int | None = None # 0-3 + playwright_headless: bool | None = None @field_validator('model') @classmethod diff --git a/server/services/process_manager.py b/server/services/process_manager.py index fa489ec..81a9025 100644 --- a/server/services/process_manager.py +++ b/server/services/process_manager.py @@ -297,6 +297,7 @@ class AgentProcessManager: parallel_mode: bool = False, max_concurrency: int | None = None, testing_agent_ratio: int = 1, + playwright_headless: bool = True, ) -> tuple[bool, str]: """ Start the agent as a subprocess. @@ -307,6 +308,7 @@ class AgentProcessManager: parallel_mode: DEPRECATED - ignored, always uses unified orchestrator max_concurrency: Max concurrent coding agents (1-5, default 1) testing_agent_ratio: Number of regression testing agents (0-3, default 1) + playwright_headless: If True, run browser in headless mode Returns: Tuple of (success, message) @@ -358,7 +360,7 @@ class AgentProcessManager: "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "cwd": str(self.project_dir), - "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, + "env": {**os.environ, "PYTHONUNBUFFERED": "1", "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false"}, } if sys.platform == "win32": popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW diff --git a/summary.md b/summary.md deleted file mode 100644 index f38fbbd..0000000 --- a/summary.md +++ /dev/null @@ -1,146 +0,0 @@ -# Autocoder Refactoring Summary - -## TL;DR - -This refactoring makes agents faster, cheaper, and more reliable. **Token usage drops ~40% per session**, agents retry rate limits in 15s instead of 60s, the orchestrator runs 80% fewer database queries per loop, and testing agents now batch 3 features per session instead of 1. Two bugs were fixed: a ghost MCP tool that wasted tokens every testing session, and missing Vertex AI environment variables that broke Vertex users. - ---- - -## What You'll Notice Immediately - -### Faster Agent Startup & Recovery -- **Rate limit retries start at ~15s** (was 60s) with jitter to prevent thundering herd -- **Post-spawn delay reduced to 0.5s** (was 2s) — agents claim features faster -- **Orchestrator makes 1 DB query per loop** (was 5-7) — scheduling decisions happen instantly - -### Lower Token Costs -- **Coding agents use ~4,500 fewer tokens/session** — trimmed prompts, removed unused tools -- **Testing agents use ~5,500 fewer tokens/session** — streamlined prompt, fewer MCP tools -- **For a 200-feature project: ~2.3M fewer input tokens total** -- Agents only see tools they actually need (coding: 9, testing: 5, initializer: 5 — was 19 for all) -- `max_turns` reduced: coding 300 (was 1000), testing 100 (was 1000) - -### YOLO Mode Is Actually Faster Now -- Browser testing instructions are **stripped from the prompt** in YOLO mode -- Previously, YOLO mode still sent full Playwright instructions (agents would try to use them) -- Prompt stripping saves ~1,000 additional tokens per YOLO session - -### Batched Testing (Parallel Mode) -- Testing agents now verify **3 features per session** instead of 1 -- Weighted selection prioritizes high-dependency features and avoids re-testing -- **50-70% less per-feature testing overhead** (shared prompt, shared browser, shared startup) -- Configurable via `--testing-batch-size` (1-5) - -### Smart Context Compaction -- When agent context gets long, compaction now **preserves**: current feature, modified files, test results, workflow step -- **Discards**: screenshot base64 data, long grep outputs, repeated file reads, verbose install logs -- Agents lose less critical context during long sessions - ---- - -## Bug Fixes - -| Bug | Impact | Fix | -|-----|--------|-----| -| Ghost `feature_release_testing` MCP tool | Every testing session wasted tokens calling a non-existent tool | Removed from tool lists and testing prompt | -| Missing Vertex AI env vars | `CLAUDE_CODE_USE_VERTEX`, `CLOUD_ML_REGION`, `ANTHROPIC_VERTEX_PROJECT_ID` not forwarded to chat sessions — broke Vertex AI users | Centralized `API_ENV_VARS` in `env_constants.py` with all 9 vars | -| DetachedInstanceError risk | `_get_test_batch` accessed ORM objects after session close — could crash in parallel mode | Extract data to dicts before closing session | -| Redundant testing of same features | Multiple testing agents could pick the same features simultaneously | Exclude currently-testing features from batch selection | - ---- - -## Architecture Improvements - -### Code Deduplication -- `_get_project_path()`: 9 copies → 1 shared utility (`server/utils/project_helpers.py`) -- `validate_project_name()`: 9 copies → 2 variants in 1 file (`server/utils/validation.py`) -- `ROOT_DIR`: 10 copies → 1 definition (`server/services/chat_constants.py`) -- `API_ENV_VARS`: 4 copies → 1 source of truth (`env_constants.py`) -- Chat session services: extracted `BaseChatSession` pattern, shared constants - -### Security Hardening -- **Unified sensitive directory blocklist**: 14 directories blocked consistently across filesystem browser AND extra read paths (was two divergent lists of 8 and 12) -- **Cached `get_blocked_paths()`**: O(1) instead of O(n*m) per directory listing -- **Terminal security warning**: Logs prominent warning when `ALLOW_REMOTE=1` exposes terminal WebSocket -- **20 new security tests**: 10 for EXTRA_READ_PATHS blocking, plus existing tests cleaned up -- **Security validation DRY**: Extracted `_validate_command_list()` and `_validate_pkill_processes()` helpers - -### Type Safety -- **87 mypy errors → 0** across 58 source files -- Installed `types-PyYAML` for proper yaml stub types -- Fixed SQLAlchemy `Column[T]` → `T` coercions across all routers -- Fixed Popen `env` dict typing in orchestrator -- Added None guards for regex matches and optional values - -### Dead Code Removed -- 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs, Windows artifacts -- 7 unused npm packages removed (Radix UI components with 0 imports) -- 16 redundant security test assertions removed -- UI `AgentAvatar.tsx` reduced from 615 → 119 lines (SVGs extracted to `mascotData.tsx`) - ---- - -## Performance Numbers - -| Metric | Before | After | Improvement | -|--------|--------|-------|-------------| -| Tokens per coding session | ~12,000 input | ~7,500 input | **-37%** | -| Tokens per testing session | ~10,000 input | ~4,500 input | **-55%** | -| Tokens per 200-feature project | ~6.5M | ~4.2M | **-2.3M tokens** | -| MCP tools loaded (coding) | 19 | 9 | **-53%** | -| MCP tools loaded (testing) | 19 | 5 | **-74%** | -| Playwright tools loaded | 20 | 20 | Restored | -| DB queries per orchestrator loop | 5-7 | 1 | **-80%** | -| Rate limit first retry | 60s | ~15-20s | **-70%** | -| Features per testing session | 1 | 3 | **+200%** | -| Post-spawn delay | 2.0s | 0.5s | **-75%** | -| max_turns (coding) | 1000 | 300 | Right-sized | -| max_turns (testing) | 1000 | 100 | Right-sized | -| mypy errors | 87 | 0 | **Clean** | -| Duplicate code instances | 40+ | 4 | **-90%** | - ---- - -## New CLI Options - -```bash -# Testing batch size (parallel mode) -python autonomous_agent_demo.py --project-dir my-app --parallel --testing-batch-size 5 - -# Multiple testing feature IDs (direct) -python autonomous_agent_demo.py --project-dir my-app --testing-feature-ids 5,12,18 -``` - ---- - -## Files Changed - -**New files (6):** -- `env_constants.py` — Single source of truth for API environment variables -- `server/utils/project_helpers.py` — Shared `get_project_path()` utility -- `server/services/chat_constants.py` — Shared chat session constants and Vertex AI env vars -- `ui/src/components/mascotData.tsx` — Extracted SVG mascot data (~500 lines) -- `test_client.py` — New tests for EXTRA_READ_PATHS security blocking -- `summary.md` — This file - -**Deleted files (13):** -- `nul`, `orchestrator_debug.log`, `PHASE3_SPEC.md`, `CUSTOM_UPDATES.md`, `SAMPLE_PROMPT.md` -- `issues/issues.md` -- 7 unused UI components (`toggle`, `scroll-area`, `tooltip`, `popover`, `radio-group`, `select`, `tabs`) - -**Major modifications (15):** -- `client.py` — Agent-type tool lists, Playwright trimming, max_turns, PreCompact, sensitive dirs -- `parallel_orchestrator.py` — DB consolidation, test batching, weighted selection, logging cleanup -- `security.py` — Unified blocklist, validation helpers -- `prompts.py` — YOLO stripping, batch testing prompt support -- `agent.py` — Agent type threading, testing feature IDs -- `autonomous_agent_demo.py` — New CLI arguments -- `.claude/templates/coding_prompt.template.md` — Trimmed ~150 lines -- `.claude/templates/testing_prompt.template.md` — Streamlined + batch support -- `ui/src/components/AgentAvatar.tsx` — 615 → 119 lines -- `rate_limit_utils.py` — New backoff formula with jitter -- `api/dependency_resolver.py` — deque fix, score caching support -- `server/routers/filesystem.py` — Cached blocked paths, unified blocklist -- `server/services/assistant_chat_session.py` — Type fixes, shared constants -- `server/services/spec_chat_session.py` — Type fixes, shared constants -- `server/services/expand_chat_session.py` — Type fixes, shared constants diff --git a/ui/src/components/SettingsModal.tsx b/ui/src/components/SettingsModal.tsx index a4b787f..03138b7 100644 --- a/ui/src/components/SettingsModal.tsx +++ b/ui/src/components/SettingsModal.tsx @@ -171,6 +171,24 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { />

+ {/* Headless Browser Toggle */} +
+
+ +

+ Run browser without visible window (saves CPU) +

+
+ updateSettings.mutate({ playwright_headless: !settings.playwright_headless })} + disabled={isSaving} + /> +
+ {/* Model Selection */}
diff --git a/ui/src/hooks/useProjects.ts b/ui/src/hooks/useProjects.ts index e914909..4f27380 100644 --- a/ui/src/hooks/useProjects.ts +++ b/ui/src/hooks/useProjects.ts @@ -266,6 +266,7 @@ const DEFAULT_SETTINGS: Settings = { glm_mode: false, ollama_mode: false, testing_agent_ratio: 1, + playwright_headless: true, } export function useAvailableModels() { diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index 269c2ef..a90fc52 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -529,12 +529,14 @@ export interface Settings { glm_mode: boolean ollama_mode: boolean testing_agent_ratio: number // Regression testing agents (0-3) + playwright_headless: boolean } export interface SettingsUpdate { yolo_mode?: boolean model?: string testing_agent_ratio?: number + playwright_headless?: boolean } export interface ProjectSettingsUpdate { diff --git a/ui/src/styles/globals.css b/ui/src/styles/globals.css index 386ddf6..e927b54 100644 --- a/ui/src/styles/globals.css +++ b/ui/src/styles/globals.css @@ -1,5 +1,5 @@ @import "tailwindcss"; -@import url("tw-animate-css"); +@import "tw-animate-css"; /* Enable class-based dark mode in Tailwind v4 */ @custom-variant dark (&:where(.dark, .dark *)); From e1e52098664a72da114e83ab55ef79036a6d69b0 Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 14:57:33 +0200 Subject: [PATCH 21/24] refactor: compact Progress card and merge agent thought into it - Redesign ProgressDashboard from tall stacked layout to compact inline: title/badge left, passing/total right, progress bar with percentage below - Absorb AgentThought functionality directly into ProgressDashboard, showing the agent's current thought below the progress bar - Remove standalone AgentThought usage from App.tsx (component now unused) - Pass logs/agentStatus to ProgressDashboard in single-agent mode only Co-Authored-By: Claude Opus 4.5 --- ui/src/App.tsx | 10 +- ui/src/components/ProgressDashboard.tsx | 178 +++++++++++++++++------- 2 files changed, 126 insertions(+), 62 deletions(-) diff --git a/ui/src/App.tsx b/ui/src/App.tsx index 16fc4c0..9842f9d 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -13,7 +13,6 @@ import { SetupWizard } from './components/SetupWizard' import { AddFeatureForm } from './components/AddFeatureForm' import { FeatureModal } from './components/FeatureModal' import { DebugLogViewer, type TabType } from './components/DebugLogViewer' -import { AgentThought } from './components/AgentThought' import { AgentMissionControl } from './components/AgentMissionControl' import { CelebrationOverlay } from './components/CelebrationOverlay' import { AssistantFAB } from './components/AssistantFAB' @@ -390,6 +389,8 @@ function App() { total={progress.total} percentage={progress.percentage} isConnected={wsState.isConnected} + logs={wsState.activeAgents.length === 0 ? wsState.logs : undefined} + agentStatus={wsState.activeAgents.length === 0 ? wsState.agentStatus : undefined} /> {/* Agent Mission Control - shows orchestrator status and active agents in parallel mode */} @@ -400,13 +401,6 @@ function App() { getAgentLogs={wsState.getAgentLogs} /> - {/* Agent Thought - shows latest agent narrative (single agent mode) */} - {wsState.activeAgents.length === 0 && ( - - )} {/* Initializing Features State - show when agent is running but no features yet */} {features && diff --git a/ui/src/components/ProgressDashboard.tsx b/ui/src/components/ProgressDashboard.tsx index 7b935db..4a1865f 100644 --- a/ui/src/components/ProgressDashboard.tsx +++ b/ui/src/components/ProgressDashboard.tsx @@ -1,12 +1,40 @@ -import { Wifi, WifiOff } from 'lucide-react' +import { useMemo, useState, useEffect } from 'react' +import { Wifi, WifiOff, Brain, Sparkles } from 'lucide-react' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' +import type { AgentStatus } from '../lib/types' interface ProgressDashboardProps { passing: number total: number percentage: number isConnected: boolean + logs?: Array<{ line: string; timestamp: string }> + agentStatus?: AgentStatus +} + +const IDLE_TIMEOUT = 30000 + +function isAgentThought(line: string): boolean { + const trimmed = line.trim() + if (/^\[Tool:/.test(trimmed)) return false + if (/^\s*Input:\s*\{/.test(trimmed)) return false + if (/^\[(Done|Error)\]/.test(trimmed)) return false + if (/^Output:/.test(trimmed)) return false + if (/^[[{]/.test(trimmed)) return false + if (trimmed.length < 10) return false + if (/^[A-Za-z]:\\/.test(trimmed)) return false + if (/^\/[a-z]/.test(trimmed)) return false + return true +} + +function getLatestThought(logs: Array<{ line: string; timestamp: string }>): string | null { + for (let i = logs.length - 1; i >= 0; i--) { + if (isAgentThought(logs[i].line)) { + return logs[i].line.trim() + } + } + return null } export function ProgressDashboard({ @@ -14,67 +42,109 @@ export function ProgressDashboard({ total, percentage, isConnected, + logs = [], + agentStatus, }: ProgressDashboardProps) { + const thought = useMemo(() => getLatestThought(logs), [logs]) + const [displayedThought, setDisplayedThought] = useState(null) + const [textVisible, setTextVisible] = useState(true) + + const lastLogTimestamp = logs.length > 0 + ? new Date(logs[logs.length - 1].timestamp).getTime() + : 0 + + const showThought = useMemo(() => { + if (!thought) return false + if (agentStatus === 'running') return true + if (agentStatus === 'paused') { + return Date.now() - lastLogTimestamp < IDLE_TIMEOUT + } + return false + }, [thought, agentStatus, lastLogTimestamp]) + + useEffect(() => { + if (thought !== displayedThought && thought) { + setTextVisible(false) + const timeout = setTimeout(() => { + setDisplayedThought(thought) + setTextVisible(true) + }, 150) + return () => clearTimeout(timeout) + } + }, [thought, displayedThought]) + + const isRunning = agentStatus === 'running' + return ( - - - Progress - - - {isConnected ? ( - <> - - Live - - ) : ( - <> - - Offline - - )} - + +
+ + Progress + + + {isConnected ? ( + <> + + Live + + ) : ( + <> + + Offline + + )} + +
+
+ + {passing} + + / + + {total} + +
- - {/* Large Percentage */} -
- - - {percentage.toFixed(1)} - - - % - + +
+ {/* Progress Bar */} +
+
+
+ {/* Percentage */} + + {percentage.toFixed(1)}%
- {/* Progress Bar */} -
-
-
- - {/* Stats */} -
-
- - {passing} - - - Passing - -
-
/
-
- - {total} - - - Total - + {/* Agent Thought */} +
+
+
+ + {isRunning && ( + + )} +
+

+ {displayedThought?.replace(/:$/, '')} +

From 1607fc817510789109c30f9f1b7294ac42ad25c3 Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 16:35:07 +0200 Subject: [PATCH 22/24] feat: add multi-feature batching for coding agents Enable the orchestrator to assign 1-3 features per coding agent subprocess, selected via dependency chain extension + same-category fill. This reduces cold-start overhead and leverages shared context across related features. Orchestrator (parallel_orchestrator.py): - Add batch tracking: _batch_features and _feature_to_primary data structures - Add build_feature_batches() with dependency chain + category fill algorithm - Add start_feature_batch() and _spawn_coding_agent_batch() methods - Update _on_agent_complete() for batch cleanup across all features - Update stop_feature() with _feature_to_primary lookup - Update get_ready_features() to exclude all batch feature IDs - Update main loop to build batches then spawn per available slot CLI and agent layer: - Add --feature-ids (comma-separated) and --batch-size CLI args - Add feature_ids parameter to run_autonomous_agent() with batch prompt selection - Add get_batch_feature_prompt() with sequential workflow instructions WebSocket layer (server/websocket.py): - Add BATCH_CODING_AGENT_START_PATTERN and BATCH_FEATURES_COMPLETE_PATTERN - Add _handle_batch_agent_start() and _handle_batch_agent_complete() methods - Add featureIds field to all agent_update messages - Track current_feature_id updates as agent moves through batch Frontend (React UI): - Add featureIds to ActiveAgent and WSAgentUpdateMessage types - Update KanbanColumn and DependencyGraph agent-feature maps for batch - Update AgentCard to show "Batch: #X, #Y, #Z" with active feature highlight - Add "Features per Agent" segmented control (1-3) in SettingsModal Settings integration (full stack): - Add batch_size to schemas, settings router, agent router, process manager - Default batch_size=3, user-configurable 1-3 via settings UI - batch_size=1 is functionally identical to pre-batching behavior Co-Authored-By: Claude Opus 4.5 --- agent.py | 30 +- autonomous_agent_demo.py | 25 ++ parallel_orchestrator.py | 385 ++++++++++++++++++++++---- prompts.py | 46 +++ server/routers/agent.py | 16 +- server/routers/settings.py | 5 + server/schemas.py | 9 + server/services/process_manager.py | 4 + server/websocket.py | 130 ++++++++- ui/src/components/AgentCard.tsx | 30 +- ui/src/components/DependencyGraph.tsx | 6 +- ui/src/components/KanbanColumn.tsx | 11 +- ui/src/components/SettingsModal.tsx | 30 ++ ui/src/hooks/useProjects.ts | 1 + ui/src/hooks/useWebSocket.ts | 2 + ui/src/lib/types.ts | 6 +- 16 files changed, 654 insertions(+), 82 deletions(-) diff --git a/agent.py b/agent.py index bdc174c..a3daaf8 100644 --- a/agent.py +++ b/agent.py @@ -31,6 +31,7 @@ from progress import ( ) from prompts import ( copy_spec_to_project, + get_batch_feature_prompt, get_coding_prompt, get_initializer_prompt, get_single_feature_prompt, @@ -139,6 +140,7 @@ async def run_autonomous_agent( max_iterations: Optional[int] = None, yolo_mode: bool = False, feature_id: Optional[int] = None, + feature_ids: Optional[list[int]] = None, agent_type: Optional[str] = None, testing_feature_id: Optional[int] = None, testing_feature_ids: Optional[list[int]] = None, @@ -152,6 +154,7 @@ async def run_autonomous_agent( max_iterations: Maximum number of iterations (None for unlimited) yolo_mode: If True, skip browser testing in coding agent prompts feature_id: If set, work only on this specific feature (used by orchestrator for coding agents) + feature_ids: If set, work on these features in batch (used by orchestrator for batch mode) agent_type: Type of agent: "initializer", "coding", "testing", or None (auto-detect) testing_feature_id: For testing agents, the pre-claimed feature ID to test (legacy single mode) testing_feature_ids: For testing agents, list of feature IDs to batch test @@ -165,7 +168,9 @@ async def run_autonomous_agent( print(f"Agent type: {agent_type}") if yolo_mode: print("Mode: YOLO (testing agents disabled)") - if feature_id: + if feature_ids and len(feature_ids) > 1: + print(f"Feature batch: {', '.join(f'#{fid}' for fid in feature_ids)}") + elif feature_id: print(f"Feature assignment: #{feature_id}") if max_iterations: print(f"Max iterations: {max_iterations}") @@ -239,6 +244,8 @@ async def run_autonomous_agent( import os if agent_type == "testing": agent_id = f"testing-{os.getpid()}" # Unique ID for testing agents + elif feature_ids and len(feature_ids) > 1: + agent_id = f"batch-{feature_ids[0]}" elif feature_id: agent_id = f"feature-{feature_id}" else: @@ -250,9 +257,13 @@ async def run_autonomous_agent( prompt = get_initializer_prompt(project_dir) elif agent_type == "testing": prompt = get_testing_prompt(project_dir, testing_feature_id, testing_feature_ids) - elif feature_id: + elif feature_ids and len(feature_ids) > 1: + # Batch mode (used by orchestrator for multi-feature coding agents) + prompt = get_batch_feature_prompt(feature_ids, project_dir, yolo_mode) + elif feature_id or (feature_ids is not None and len(feature_ids) == 1): # Single-feature mode (used by orchestrator for coding agents) - prompt = get_single_feature_prompt(feature_id, project_dir, yolo_mode) + fid = feature_id if feature_id is not None else feature_ids[0] # type: ignore[index] + prompt = get_single_feature_prompt(fid, project_dir, yolo_mode) else: # General coding prompt (legacy path) prompt = get_coding_prompt(project_dir, yolo_mode=yolo_mode) @@ -356,12 +367,19 @@ async def run_autonomous_agent( print("The autonomous agent has finished its work.") break - # Single-feature mode OR testing agent: exit after one session - if feature_id is not None or agent_type == "testing": + # Single-feature mode, batch mode, or testing agent: exit after one session + if feature_ids and len(feature_ids) > 1: + print(f"\nBatch mode: Features {', '.join(f'#{fid}' for fid in feature_ids)} session complete.") + break + elif feature_id is not None or (feature_ids is not None and len(feature_ids) == 1): + fid = feature_id if feature_id is not None else feature_ids[0] # type: ignore[index] if agent_type == "testing": print("\nTesting agent complete. Terminating session.") else: - print(f"\nSingle-feature mode: Feature #{feature_id} session complete.") + print(f"\nSingle-feature mode: Feature #{fid} session complete.") + break + elif agent_type == "testing": + print("\nTesting agent complete. Terminating session.") break # Reset rate limit retries only if no rate limit signal was detected diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py index 1e3d4d6..ed3db37 100644 --- a/autonomous_agent_demo.py +++ b/autonomous_agent_demo.py @@ -133,6 +133,13 @@ Authentication: help="Work on a specific feature ID only (used by orchestrator for coding agents)", ) + parser.add_argument( + "--feature-ids", + type=str, + default=None, + help="Comma-separated feature IDs to implement in batch (e.g., '5,8,12')", + ) + # Agent type for subprocess mode parser.add_argument( "--agent-type", @@ -170,6 +177,13 @@ Authentication: help="Number of features per testing batch (1-5, default: 3)", ) + parser.add_argument( + "--batch-size", + type=int, + default=3, + help="Max features per coding agent batch (1-3, default: 3)", + ) + return parser.parse_args() @@ -222,6 +236,15 @@ def main() -> None: print(f"Error: --testing-feature-ids must be comma-separated integers, got: {args.testing_feature_ids}") return + # Parse batch coding feature IDs (comma-separated string -> list[int]) + coding_feature_ids: list[int] | None = None + if args.feature_ids: + try: + coding_feature_ids = [int(x.strip()) for x in args.feature_ids.split(",") if x.strip()] + except ValueError: + print(f"Error: --feature-ids must be comma-separated integers, got: {args.feature_ids}") + return + try: if args.agent_type: # Subprocess mode - spawned by orchestrator for a specific role @@ -232,6 +255,7 @@ def main() -> None: max_iterations=args.max_iterations or 1, yolo_mode=args.yolo, feature_id=args.feature_id, + feature_ids=coding_feature_ids, agent_type=args.agent_type, testing_feature_id=args.testing_feature_id, testing_feature_ids=testing_feature_ids, @@ -254,6 +278,7 @@ def main() -> None: yolo_mode=args.yolo, testing_agent_ratio=args.testing_ratio, testing_batch_size=args.testing_batch_size, + batch_size=args.batch_size, ) ) except KeyboardInterrupt: diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py index 3a0196b..10d0923 100644 --- a/parallel_orchestrator.py +++ b/parallel_orchestrator.py @@ -153,6 +153,7 @@ class ParallelOrchestrator: yolo_mode: bool = False, testing_agent_ratio: int = 1, testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE, + batch_size: int = 3, on_output: Callable[[int, str], None] | None = None, on_status: Callable[[int, str], None] | None = None, ): @@ -177,6 +178,7 @@ class ParallelOrchestrator: self.yolo_mode = yolo_mode self.testing_agent_ratio = min(max(testing_agent_ratio, 0), 3) # Clamp 0-3 self.testing_batch_size = min(max(testing_batch_size, 1), 5) # Clamp 1-5 + self.batch_size = min(max(batch_size, 1), 3) # Clamp 1-3 self.on_output = on_output self.on_status = on_status @@ -200,6 +202,11 @@ class ParallelOrchestrator: # Cleared when all passing features have been covered at least once. self._recently_tested: set[int] = set() + # Batch tracking: primary feature_id -> all feature IDs in batch + self._batch_features: dict[int, list[int]] = {} + # Reverse mapping: any feature_id -> primary feature_id + self._feature_to_primary: dict[int, int] = {} + # Shutdown flag for async-safe signal handling # Signal handlers only set this flag; cleanup happens in the main loop self._shutdown_requested = False @@ -352,6 +359,104 @@ class ParallelOrchestrator: return selected + def build_feature_batches( + self, + ready: list[dict], + all_features: list[dict], + scheduling_scores: dict[int, float], + ) -> list[list[dict]]: + """Build dependency-aware feature batches for coding agents. + + Each batch contains up to `batch_size` features. The algorithm: + 1. Start with a ready feature (sorted by scheduling score) + 2. Chain extension: find dependents whose deps are satisfied if earlier batch features pass + 3. Same-category fill: fill remaining slots with ready features from the same category + + Args: + ready: Ready features (sorted by scheduling score) + all_features: All features for dependency checking + scheduling_scores: Pre-computed scheduling scores + + Returns: + List of batches, each batch is a list of feature dicts + """ + if self.batch_size <= 1: + # No batching - return each feature as a single-item batch + return [[f] for f in ready] + + # Build children adjacency: parent_id -> [child_ids] + children: dict[int, list[int]] = {f["id"]: [] for f in all_features} + feature_map: dict[int, dict] = {f["id"]: f for f in all_features} + for f in all_features: + for dep_id in (f.get("dependencies") or []): + if dep_id in children: + children[dep_id].append(f["id"]) + + # Pre-compute passing IDs + passing_ids = {f["id"] for f in all_features if f.get("passes")} + + used_ids: set[int] = set() # Features already assigned to a batch + batches: list[list[dict]] = [] + + for feature in ready: + if feature["id"] in used_ids: + continue + + batch = [feature] + used_ids.add(feature["id"]) + # Simulate passing set = real passing + batch features + simulated_passing = passing_ids | {feature["id"]} + + # Phase 1: Chain extension - find dependents whose deps are met + for _ in range(self.batch_size - 1): + best_candidate = None + best_score = -1.0 + # Check children of all features currently in the batch + candidate_ids: set[int] = set() + for bf in batch: + for child_id in children.get(bf["id"], []): + if child_id not in used_ids and child_id not in simulated_passing: + candidate_ids.add(child_id) + + for cid in candidate_ids: + cf = feature_map.get(cid) + if not cf or cf.get("passes") or cf.get("in_progress"): + continue + # Check if ALL deps are satisfied by simulated passing set + deps = cf.get("dependencies") or [] + if all(d in simulated_passing for d in deps): + score = scheduling_scores.get(cid, 0) + if score > best_score: + best_score = score + best_candidate = cf + + if best_candidate: + batch.append(best_candidate) + used_ids.add(best_candidate["id"]) + simulated_passing.add(best_candidate["id"]) + else: + break + + # Phase 2: Same-category fill + if len(batch) < self.batch_size: + category = feature.get("category", "") + for rf in ready: + if len(batch) >= self.batch_size: + break + if rf["id"] in used_ids: + continue + if rf.get("category", "") == category: + batch.append(rf) + used_ids.add(rf["id"]) + + batches.append(batch) + + debug_log.log("BATCH", f"Built {len(batches)} batches from {len(ready)} ready features", + batch_sizes=[len(b) for b in batches], + batch_ids=[[f['id'] for f in b] for b in batches[:5]]) + + return batches + def get_resumable_features( self, feature_dicts: list[dict] | None = None, @@ -376,9 +481,11 @@ class ParallelOrchestrator: finally: session.close() - # Snapshot running IDs once to avoid acquiring lock per feature + # Snapshot running IDs once (include all batch feature IDs) with self._lock: running_ids = set(self.running_coding_agents.keys()) + for batch_ids in self._batch_features.values(): + running_ids.update(batch_ids) resumable = [] for fd in feature_dicts: @@ -421,9 +528,11 @@ class ParallelOrchestrator: # Pre-compute passing_ids once to avoid O(n^2) in the loop passing_ids = {fd["id"] for fd in feature_dicts if fd.get("passes")} - # Snapshot running IDs once to avoid acquiring lock per feature + # Snapshot running IDs once (include all batch feature IDs) with self._lock: running_ids = set(self.running_coding_agents.keys()) + for batch_ids in self._batch_features.values(): + running_ids.update(batch_ids) ready = [] skipped_reasons = {"passes": 0, "in_progress": 0, "running": 0, "failed": 0, "deps": 0} @@ -635,6 +744,75 @@ class ParallelOrchestrator: return True, f"Started feature {feature_id}" + def start_feature_batch(self, feature_ids: list[int], resume: bool = False) -> tuple[bool, str]: + """Start a coding agent for a batch of features. + + Args: + feature_ids: List of feature IDs to implement in batch + resume: If True, resume features already in_progress + + Returns: + Tuple of (success, message) + """ + if not feature_ids: + return False, "No features to start" + + # Single feature falls back to start_feature + if len(feature_ids) == 1: + return self.start_feature(feature_ids[0], resume=resume) + + with self._lock: + # Check if any feature in batch is already running + for fid in feature_ids: + if fid in self.running_coding_agents or fid in self._feature_to_primary: + return False, f"Feature {fid} already running" + if len(self.running_coding_agents) >= self.max_concurrency: + return False, "At max concurrency" + total_agents = len(self.running_coding_agents) + len(self.running_testing_agents) + if total_agents >= MAX_TOTAL_AGENTS: + return False, f"At max total agents ({total_agents}/{MAX_TOTAL_AGENTS})" + + # Mark all features as in_progress in a single transaction + session = self.get_session() + try: + features_to_mark = [] + for fid in feature_ids: + feature = session.query(Feature).filter(Feature.id == fid).first() + if not feature: + return False, f"Feature {fid} not found" + if feature.passes: + return False, f"Feature {fid} already complete" + if not resume: + if feature.in_progress: + return False, f"Feature {fid} already in progress" + features_to_mark.append(feature) + else: + if not feature.in_progress: + return False, f"Feature {fid} not in progress, cannot resume" + + for feature in features_to_mark: + feature.in_progress = True + session.commit() + finally: + session.close() + + # Spawn batch coding agent + success, message = self._spawn_coding_agent_batch(feature_ids) + if not success: + # Clear in_progress on failure + session = self.get_session() + try: + for fid in feature_ids: + feature = session.query(Feature).filter(Feature.id == fid).first() + if feature and not resume: + feature.in_progress = False + session.commit() + finally: + session.close() + return False, message + + return True, f"Started batch [{', '.join(str(fid) for fid in feature_ids)}]" + def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]: """Spawn a coding agent subprocess for a specific feature.""" # Create abort event @@ -702,6 +880,75 @@ class ParallelOrchestrator: print(f"Started coding agent for feature #{feature_id}", flush=True) return True, f"Started feature {feature_id}" + def _spawn_coding_agent_batch(self, feature_ids: list[int]) -> tuple[bool, str]: + """Spawn a coding agent subprocess for a batch of features.""" + primary_id = feature_ids[0] + abort_event = threading.Event() + + cmd = [ + sys.executable, + "-u", + str(AUTOCODER_ROOT / "autonomous_agent_demo.py"), + "--project-dir", str(self.project_dir), + "--max-iterations", "1", + "--agent-type", "coding", + "--feature-ids", ",".join(str(fid) for fid in feature_ids), + ] + if self.model: + cmd.extend(["--model", self.model]) + if self.yolo_mode: + cmd.append("--yolo") + + try: + popen_kwargs: dict[str, Any] = { + "stdin": subprocess.DEVNULL, + "stdout": subprocess.PIPE, + "stderr": subprocess.STDOUT, + "text": True, + "encoding": "utf-8", + "errors": "replace", + "cwd": str(AUTOCODER_ROOT), + "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, + } + if sys.platform == "win32": + popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW + + proc = subprocess.Popen(cmd, **popen_kwargs) + except Exception as e: + # Reset in_progress on failure + session = self.get_session() + try: + for fid in feature_ids: + feature = session.query(Feature).filter(Feature.id == fid).first() + if feature: + feature.in_progress = False + session.commit() + finally: + session.close() + return False, f"Failed to start batch agent: {e}" + + with self._lock: + self.running_coding_agents[primary_id] = proc + self.abort_events[primary_id] = abort_event + self._batch_features[primary_id] = list(feature_ids) + for fid in feature_ids: + self._feature_to_primary[fid] = primary_id + + # Start output reader thread + threading.Thread( + target=self._read_output, + args=(primary_id, proc, abort_event, "coding"), + daemon=True + ).start() + + if self.on_status is not None: + for fid in feature_ids: + self.on_status(fid, "running") + + ids_str = ", ".join(f"#{fid}" for fid in feature_ids) + print(f"Started coding agent for features {ids_str}", flush=True) + return True, f"Started batch [{ids_str}]" + def _spawn_testing_agent(self) -> tuple[bool, str]: """Spawn a testing agent subprocess for batch regression testing. @@ -982,73 +1229,84 @@ class ParallelOrchestrator: # feature_id is required for coding agents (always passed from start_feature) assert feature_id is not None, "feature_id must not be None for coding agents" - # Coding agent completion - debug_log.log("COMPLETE", f"Coding agent for feature #{feature_id} finished", - return_code=return_code, - status="success" if return_code == 0 else "failed") - + # Coding agent completion - handle both single and batch features + batch_ids = None with self._lock: + batch_ids = self._batch_features.pop(feature_id, None) + if batch_ids: + # Clean up reverse mapping + for fid in batch_ids: + self._feature_to_primary.pop(fid, None) self.running_coding_agents.pop(feature_id, None) self.abort_events.pop(feature_id, None) + all_feature_ids = batch_ids or [feature_id] + + debug_log.log("COMPLETE", f"Coding agent for feature(s) {all_feature_ids} finished", + return_code=return_code, + status="success" if return_code == 0 else "failed", + batch_size=len(all_feature_ids)) + # Refresh session cache to see subprocess commits - # The coding agent runs as a subprocess and commits changes (e.g., passes=True). - # Using session.expire_all() is lighter weight than engine.dispose() for SQLite WAL mode - # and is sufficient to invalidate cached data and force fresh reads. - # engine.dispose() is only called on orchestrator shutdown, not on every agent completion. session = self.get_session() try: session.expire_all() - feature = session.query(Feature).filter(Feature.id == feature_id).first() - feature_passes = feature.passes if feature else None - feature_in_progress = feature.in_progress if feature else None - debug_log.log("DB", f"Feature #{feature_id} state after session.expire_all()", - passes=feature_passes, - in_progress=feature_in_progress) - if feature and feature.in_progress and not feature.passes: - feature.in_progress = False - session.commit() - debug_log.log("DB", f"Cleared in_progress for feature #{feature_id} (agent failed)") + for fid in all_feature_ids: + feature = session.query(Feature).filter(Feature.id == fid).first() + feature_passes = feature.passes if feature else None + feature_in_progress = feature.in_progress if feature else None + debug_log.log("DB", f"Feature #{fid} state after session.expire_all()", + passes=feature_passes, + in_progress=feature_in_progress) + if feature and feature.in_progress and not feature.passes: + feature.in_progress = False + session.commit() + debug_log.log("DB", f"Cleared in_progress for feature #{fid} (agent failed)") finally: session.close() - # Track failures to prevent infinite retry loops + # Track failures for features still in_progress at exit if return_code != 0: with self._lock: - self._failure_counts[feature_id] = self._failure_counts.get(feature_id, 0) + 1 - failure_count = self._failure_counts[feature_id] - if failure_count >= MAX_FEATURE_RETRIES: - print(f"Feature #{feature_id} has failed {failure_count} times, will not retry", flush=True) - debug_log.log("COMPLETE", f"Feature #{feature_id} exceeded max retries", - failure_count=failure_count) + for fid in all_feature_ids: + self._failure_counts[fid] = self._failure_counts.get(fid, 0) + 1 + failure_count = self._failure_counts[fid] + if failure_count >= MAX_FEATURE_RETRIES: + print(f"Feature #{fid} has failed {failure_count} times, will not retry", flush=True) + debug_log.log("COMPLETE", f"Feature #{fid} exceeded max retries", + failure_count=failure_count) status = "completed" if return_code == 0 else "failed" if self.on_status is not None: - self.on_status(feature_id, status) - # CRITICAL: This print triggers the WebSocket to emit agent_update with state='error' or 'success' - print(f"Feature #{feature_id} {status}", flush=True) + for fid in all_feature_ids: + self.on_status(fid, status) + + # CRITICAL: Print triggers WebSocket to emit agent_update + if batch_ids and len(batch_ids) > 1: + ids_str = ", ".join(f"#{fid}" for fid in batch_ids) + print(f"Features {ids_str} {status}", flush=True) + else: + print(f"Feature #{feature_id} {status}", flush=True) # Signal main loop that an agent slot is available self._signal_agent_completed() - # NOTE: Testing agents are now spawned in start_feature() when coding agents START, - # not here when they complete. This ensures 1:1 ratio and proper termination. - def stop_feature(self, feature_id: int) -> tuple[bool, str]: """Stop a running coding agent and all its child processes.""" with self._lock: - if feature_id not in self.running_coding_agents: + # Check if this feature is part of a batch + primary_id = self._feature_to_primary.get(feature_id, feature_id) + if primary_id not in self.running_coding_agents: return False, "Feature not running" - abort = self.abort_events.get(feature_id) - proc = self.running_coding_agents.get(feature_id) + abort = self.abort_events.get(primary_id) + proc = self.running_coding_agents.get(primary_id) if abort: abort.set() if proc: - # Kill entire process tree to avoid orphaned children (e.g., browser instances) result = kill_process_tree(proc, timeout=5.0) - debug_log.log("STOP", f"Killed feature {feature_id} process tree", + debug_log.log("STOP", f"Killed feature {feature_id} (primary {primary_id}) process tree", status=result.status, children_found=result.children_found, children_terminated=result.children_terminated, children_killed=result.children_killed) @@ -1113,6 +1371,7 @@ class ParallelOrchestrator: print(f"Max concurrency: {self.max_concurrency} coding agents", flush=True) print(f"YOLO mode: {self.yolo_mode}", flush=True) print(f"Regression agents: {self.testing_agent_ratio} (maintained independently)", flush=True) + print(f"Batch size: {self.batch_size} features per agent", flush=True) print("=" * 70, flush=True) print(flush=True) @@ -1276,37 +1535,39 @@ class ParallelOrchestrator: await self._wait_for_agent_completion(timeout=POLL_INTERVAL * 2) continue - # Start features up to capacity + # Build dependency-aware batches from ready features slots = self.max_concurrency - current - logger.debug("Spawning loop: %d ready, %d slots available, max_concurrency=%d", - len(ready), slots, self.max_concurrency) - features_to_start = ready[:slots] - logger.debug("Features to start: %s", [f['id'] for f in features_to_start]) + batches = self.build_feature_batches(ready, feature_dicts, scheduling_scores) - debug_log.log("SPAWN", "Starting features batch", + logger.debug("Spawning loop: %d ready, %d slots available, %d batches built", + len(ready), slots, len(batches)) + + debug_log.log("SPAWN", "Starting feature batches", ready_count=len(ready), slots_available=slots, - features_to_start=[f['id'] for f in features_to_start]) + batch_count=len(batches), + batches=[[f['id'] for f in b] for b in batches[:slots]]) - for i, feature in enumerate(features_to_start): - logger.debug("Starting feature %d/%d: #%d - %s", - i + 1, len(features_to_start), feature['id'], feature['name']) - success, msg = self.start_feature(feature["id"]) + for batch in batches[:slots]: + batch_ids = [f["id"] for f in batch] + batch_names = [f"{f['id']}:{f['name']}" for f in batch] + logger.debug("Starting batch: %s", batch_ids) + success, msg = self.start_feature_batch(batch_ids) if not success: - logger.debug("Failed to start feature #%d: %s", feature['id'], msg) - debug_log.log("SPAWN", f"FAILED to start feature #{feature['id']}", - feature_name=feature['name'], + logger.debug("Failed to start batch %s: %s", batch_ids, msg) + debug_log.log("SPAWN", f"FAILED to start batch {batch_ids}", + batch_names=batch_names, error=msg) else: - logger.debug("Successfully started feature #%d", feature['id']) + logger.debug("Successfully started batch %s", batch_ids) with self._lock: running_count = len(self.running_coding_agents) logger.debug("Running coding agents after start: %d", running_count) - debug_log.log("SPAWN", f"Successfully started feature #{feature['id']}", - feature_name=feature['name'], + debug_log.log("SPAWN", f"Successfully started batch {batch_ids}", + batch_names=batch_names, running_coding_agents=running_count) - await asyncio.sleep(0.5) # Brief delay for subprocess to claim feature before re-querying + await asyncio.sleep(0.5) except Exception as e: print(f"Orchestrator error: {e}", flush=True) @@ -1376,6 +1637,7 @@ async def run_parallel_orchestrator( yolo_mode: bool = False, testing_agent_ratio: int = 1, testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE, + batch_size: int = 3, ) -> None: """Run the unified orchestrator. @@ -1386,6 +1648,7 @@ async def run_parallel_orchestrator( yolo_mode: Whether to run in YOLO mode (skip testing agents) testing_agent_ratio: Number of regression agents to maintain (0-3) testing_batch_size: Number of features per testing batch (1-5) + batch_size: Max features per coding agent batch (1-3) """ print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}", flush=True) orchestrator = ParallelOrchestrator( @@ -1395,6 +1658,7 @@ async def run_parallel_orchestrator( yolo_mode=yolo_mode, testing_agent_ratio=testing_agent_ratio, testing_batch_size=testing_batch_size, + batch_size=batch_size, ) # Set up cleanup to run on exit (handles normal exit, exceptions) @@ -1480,6 +1744,12 @@ def main(): default=DEFAULT_TESTING_BATCH_SIZE, help=f"Number of features per testing batch (1-5, default: {DEFAULT_TESTING_BATCH_SIZE})", ) + parser.add_argument( + "--batch-size", + type=int, + default=3, + help="Max features per coding agent batch (1-5, default: 3)", + ) args = parser.parse_args() @@ -1507,6 +1777,7 @@ def main(): yolo_mode=args.yolo, testing_agent_ratio=args.testing_agent_ratio, testing_batch_size=args.testing_batch_size, + batch_size=args.batch_size, )) except KeyboardInterrupt: print("\n\nInterrupted by user", flush=True) diff --git a/prompts.py b/prompts.py index f50aecb..5d83faa 100644 --- a/prompts.py +++ b/prompts.py @@ -217,6 +217,52 @@ If blocked, use `feature_skip` and document the blocker. return single_feature_header + base_prompt +def get_batch_feature_prompt( + feature_ids: list[int], + project_dir: Path | None = None, + yolo_mode: bool = False, +) -> str: + """Prepend batch-feature assignment header to base coding prompt. + + Used in parallel mode to assign multiple features to an agent. + Features should be implemented sequentially in the given order. + + Args: + feature_ids: List of feature IDs to implement in order + project_dir: Optional project directory for project-specific prompts + yolo_mode: If True, strip browser testing instructions from the base prompt + + Returns: + The prompt with batch-feature header prepended + """ + base_prompt = get_coding_prompt(project_dir, yolo_mode=yolo_mode) + ids_str = ", ".join(f"#{fid}" for fid in feature_ids) + + batch_header = f"""## ASSIGNED FEATURES (BATCH): {ids_str} + +You have been assigned {len(feature_ids)} features to implement sequentially. +Process them IN ORDER: {ids_str} + +### Workflow for each feature: +1. Call `feature_claim_and_get` with the feature ID to get its details +2. Implement the feature fully +3. Verify it works (browser testing if applicable) +4. Call `feature_mark_passing` to mark it complete +5. Git commit the changes +6. Move to the next feature + +### Important: +- Complete each feature fully before starting the next +- Mark each feature passing individually as you go +- If blocked on a feature, use `feature_skip` and move to the next one +- Other agents are handling other features - focus only on yours + +--- + +""" + return batch_header + base_prompt + + def get_app_spec(project_dir: Path) -> str: """ Load the app spec from the project. diff --git a/server/routers/agent.py b/server/routers/agent.py index 11e9b8a..9288745 100644 --- a/server/routers/agent.py +++ b/server/routers/agent.py @@ -17,11 +17,11 @@ from ..utils.project_helpers import get_project_path as _get_project_path from ..utils.validation import validate_project_name -def _get_settings_defaults() -> tuple[bool, str, int, bool]: +def _get_settings_defaults() -> tuple[bool, str, int, bool, int]: """Get defaults from global settings. Returns: - Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless) + Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size) """ import sys root = Path(__file__).parent.parent.parent @@ -42,7 +42,12 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool]: playwright_headless = (settings.get("playwright_headless") or "true").lower() == "true" - return yolo_mode, model, testing_agent_ratio, playwright_headless + try: + batch_size = int(settings.get("batch_size", "3")) + except (ValueError, TypeError): + batch_size = 3 + + return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"]) @@ -91,19 +96,22 @@ async def start_agent( manager = get_project_manager(project_name) # Get defaults from global settings if not provided in request - default_yolo, default_model, default_testing_ratio, playwright_headless = _get_settings_defaults() + default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size = _get_settings_defaults() yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo model = request.model if request.model else default_model max_concurrency = request.max_concurrency or 1 testing_agent_ratio = request.testing_agent_ratio if request.testing_agent_ratio is not None else default_testing_ratio + batch_size = default_batch_size + success, message = await manager.start( yolo_mode=yolo_mode, model=model, max_concurrency=max_concurrency, testing_agent_ratio=testing_agent_ratio, playwright_headless=playwright_headless, + batch_size=batch_size, ) # Notify scheduler of manual start (to prevent auto-stop during scheduled window) diff --git a/server/routers/settings.py b/server/routers/settings.py index 481cd0d..77b4a4d 100644 --- a/server/routers/settings.py +++ b/server/routers/settings.py @@ -92,6 +92,7 @@ async def get_settings(): ollama_mode=_is_ollama_mode(), testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), + batch_size=_parse_int(all_settings.get("batch_size"), 3), ) @@ -110,6 +111,9 @@ async def update_settings(update: SettingsUpdate): if update.playwright_headless is not None: set_setting("playwright_headless", "true" if update.playwright_headless else "false") + if update.batch_size is not None: + set_setting("batch_size", str(update.batch_size)) + # Return updated settings all_settings = get_all_settings() return SettingsResponse( @@ -119,4 +123,5 @@ async def update_settings(update: SettingsUpdate): ollama_mode=_is_ollama_mode(), testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), + batch_size=_parse_int(all_settings.get("batch_size"), 3), ) diff --git a/server/schemas.py b/server/schemas.py index b5e2d84..e15f1b3 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -399,6 +399,7 @@ class SettingsResponse(BaseModel): ollama_mode: bool = False # True if Ollama API is configured via .env testing_agent_ratio: int = 1 # Regression testing agents (0-3) playwright_headless: bool = True + batch_size: int = 3 # Features per coding agent batch (1-3) class ModelsResponse(BaseModel): @@ -413,6 +414,7 @@ class SettingsUpdate(BaseModel): model: str | None = None testing_agent_ratio: int | None = None # 0-3 playwright_headless: bool | None = None + batch_size: int | None = None # Features per agent batch (1-3) @field_validator('model') @classmethod @@ -428,6 +430,13 @@ class SettingsUpdate(BaseModel): raise ValueError("testing_agent_ratio must be between 0 and 3") return v + @field_validator('batch_size') + @classmethod + def validate_batch_size(cls, v: int | None) -> int | None: + if v is not None and (v < 1 or v > 3): + raise ValueError("batch_size must be between 1 and 3") + return v + # ============================================================================ # Dev Server Schemas diff --git a/server/services/process_manager.py b/server/services/process_manager.py index 81a9025..3340cd1 100644 --- a/server/services/process_manager.py +++ b/server/services/process_manager.py @@ -298,6 +298,7 @@ class AgentProcessManager: max_concurrency: int | None = None, testing_agent_ratio: int = 1, playwright_headless: bool = True, + batch_size: int = 3, ) -> tuple[bool, str]: """ Start the agent as a subprocess. @@ -349,6 +350,9 @@ class AgentProcessManager: # Add testing agent configuration cmd.extend(["--testing-ratio", str(testing_agent_ratio)]) + # Add --batch-size flag for multi-feature batching + cmd.extend(["--batch-size", str(batch_size)]) + try: # Start subprocess with piped stdout/stderr # Use project_dir as cwd so Claude SDK sandbox allows access to project files diff --git a/server/websocket.py b/server/websocket.py index efce7b7..dfb4dee 100644 --- a/server/websocket.py +++ b/server/websocket.py @@ -39,6 +39,14 @@ TESTING_AGENT_START_PATTERN = re.compile(r'Started testing agent for feature #(\ # Matches: "Feature #123 testing completed" or "Feature #123 testing failed" TESTING_AGENT_COMPLETE_PATTERN = re.compile(r'Feature #(\d+) testing (completed|failed)') +# Pattern to detect batch coding agent start message +# Matches: "Started coding agent for features #5, #8, #12" +BATCH_CODING_AGENT_START_PATTERN = re.compile(r'Started coding agent for features (#\d+(?:,\s*#\d+)*)') + +# Pattern to detect batch completion +# Matches: "Features #5, #8, #12 completed" or "Features #5, #8, #12 failed" +BATCH_FEATURES_COMPLETE_PATTERN = re.compile(r'Features (#\d+(?:,\s*#\d+)*)\s+(completed|failed)') + # Patterns for detecting agent activity and thoughts THOUGHT_PATTERNS = [ # Claude's tool usage patterns (actual format: [Tool: name]) @@ -64,9 +72,9 @@ ORCHESTRATOR_PATTERNS = { 'capacity_check': re.compile(r'\[DEBUG\] Spawning loop: (\d+) ready, (\d+) slots'), 'at_capacity': re.compile(r'At max capacity|at max testing agents|At max total agents'), 'feature_start': re.compile(r'Starting feature \d+/\d+: #(\d+) - (.+)'), - 'coding_spawn': re.compile(r'Started coding agent for feature #(\d+)'), + 'coding_spawn': re.compile(r'Started coding agent for features? #(\d+)'), 'testing_spawn': re.compile(r'Started testing agent for feature #(\d+)'), - 'coding_complete': re.compile(r'Feature #(\d+) (completed|failed)'), + 'coding_complete': re.compile(r'Features? #(\d+)(?:,\s*#\d+)* (completed|failed)'), 'testing_complete': re.compile(r'Feature #(\d+) testing (completed|failed)'), 'all_complete': re.compile(r'All features complete'), 'blocked_features': re.compile(r'(\d+) blocked by dependencies'), @@ -96,7 +104,17 @@ class AgentTracker: # Check for orchestrator status messages first # These don't have [Feature #X] prefix - # Coding agent start: "Started coding agent for feature #X" + # Batch coding agent start: "Started coding agent for features #5, #8, #12" + batch_start_match = BATCH_CODING_AGENT_START_PATTERN.match(line) + if batch_start_match: + try: + feature_ids = [int(x.strip().lstrip('#')) for x in batch_start_match.group(1).split(',')] + if feature_ids: + return await self._handle_batch_agent_start(feature_ids, "coding") + except ValueError: + pass + + # Single coding agent start: "Started coding agent for feature #X" if line.startswith("Started coding agent for feature #"): m = re.search(r'#(\d+)', line) if m: @@ -119,6 +137,17 @@ class AgentTracker: is_success = testing_complete_match.group(2) == "completed" return await self._handle_agent_complete(feature_id, is_success, agent_type="testing") + # Batch features complete: "Features #5, #8, #12 completed/failed" + batch_complete_match = BATCH_FEATURES_COMPLETE_PATTERN.match(line) + if batch_complete_match: + try: + feature_ids = [int(x.strip().lstrip('#')) for x in batch_complete_match.group(1).split(',')] + is_success = batch_complete_match.group(2) == "completed" + if feature_ids: + return await self._handle_batch_agent_complete(feature_ids, is_success, "coding") + except ValueError: + pass + # Coding agent complete: "Feature #X completed/failed" (without "testing" keyword) if line.startswith("Feature #") and ("completed" in line or "failed" in line) and "testing" not in line: m = re.search(r'#(\d+)', line) @@ -158,6 +187,7 @@ class AgentTracker: 'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], 'agent_index': agent_index, 'agent_type': 'coding', + 'feature_ids': [feature_id], 'state': 'thinking', 'feature_name': f'Feature #{feature_id}', 'last_thought': None, @@ -165,6 +195,10 @@ class AgentTracker: agent = self.active_agents[key] + # Update current_feature_id for batch agents when output comes from a different feature + if 'current_feature_id' in agent and feature_id in agent.get('feature_ids', []): + agent['current_feature_id'] = feature_id + # Detect state and thought from content state = 'working' thought = None @@ -188,6 +222,7 @@ class AgentTracker: 'agentName': agent['name'], 'agentType': agent['agent_type'], 'featureId': feature_id, + 'featureIds': agent.get('feature_ids', [feature_id]), 'featureName': agent['feature_name'], 'state': state, 'thought': thought, @@ -244,6 +279,7 @@ class AgentTracker: 'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], 'agent_index': agent_index, 'agent_type': agent_type, + 'feature_ids': [feature_id], 'state': 'thinking', 'feature_name': feature_name, 'last_thought': 'Starting work...', @@ -255,12 +291,55 @@ class AgentTracker: 'agentName': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], 'agentType': agent_type, 'featureId': feature_id, + 'featureIds': [feature_id], 'featureName': feature_name, 'state': 'thinking', 'thought': 'Starting work...', 'timestamp': datetime.now().isoformat(), } + async def _handle_batch_agent_start(self, feature_ids: list[int], agent_type: str = "coding") -> dict | None: + """Handle batch agent start message from orchestrator.""" + if not feature_ids: + return None + primary_id = feature_ids[0] + async with self._lock: + key = (primary_id, agent_type) + agent_index = self._next_agent_index + self._next_agent_index += 1 + + feature_name = f'Features {", ".join(f"#{fid}" for fid in feature_ids)}' + + self.active_agents[key] = { + 'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], + 'agent_index': agent_index, + 'agent_type': agent_type, + 'feature_ids': list(feature_ids), + 'current_feature_id': primary_id, + 'state': 'thinking', + 'feature_name': feature_name, + 'last_thought': 'Starting batch work...', + } + + # Register all feature IDs so output lines can find this agent + for fid in feature_ids: + secondary_key = (fid, agent_type) + if secondary_key != key: + self.active_agents[secondary_key] = self.active_agents[key] + + return { + 'type': 'agent_update', + 'agentIndex': agent_index, + 'agentName': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], + 'agentType': agent_type, + 'featureId': primary_id, + 'featureIds': list(feature_ids), + 'featureName': feature_name, + 'state': 'thinking', + 'thought': 'Starting batch work...', + 'timestamp': datetime.now().isoformat(), + } + async def _handle_agent_complete(self, feature_id: int, is_success: bool, agent_type: str = "coding") -> dict | None: """Handle agent completion - ALWAYS emits a message, even if agent wasn't tracked. @@ -282,6 +361,7 @@ class AgentTracker: 'agentName': agent['name'], 'agentType': agent.get('agent_type', agent_type), 'featureId': feature_id, + 'featureIds': agent.get('feature_ids', [feature_id]), 'featureName': agent['feature_name'], 'state': state, 'thought': 'Completed successfully!' if is_success else 'Failed to complete', @@ -298,6 +378,7 @@ class AgentTracker: 'agentName': 'Unknown', 'agentType': agent_type, 'featureId': feature_id, + 'featureIds': [feature_id], 'featureName': f'Feature #{feature_id}', 'state': state, 'thought': 'Completed successfully!' if is_success else 'Failed to complete', @@ -305,6 +386,49 @@ class AgentTracker: 'synthetic': True, } + async def _handle_batch_agent_complete(self, feature_ids: list[int], is_success: bool, agent_type: str = "coding") -> dict | None: + """Handle batch agent completion.""" + if not feature_ids: + return None + primary_id = feature_ids[0] + async with self._lock: + state = 'success' if is_success else 'error' + key = (primary_id, agent_type) + + if key in self.active_agents: + agent = self.active_agents[key] + result = { + 'type': 'agent_update', + 'agentIndex': agent['agent_index'], + 'agentName': agent['name'], + 'agentType': agent.get('agent_type', agent_type), + 'featureId': primary_id, + 'featureIds': agent.get('feature_ids', list(feature_ids)), + 'featureName': agent['feature_name'], + 'state': state, + 'thought': 'Batch completed successfully!' if is_success else 'Batch failed to complete', + 'timestamp': datetime.now().isoformat(), + } + # Clean up all keys for this batch + for fid in feature_ids: + self.active_agents.pop((fid, agent_type), None) + return result + else: + # Synthetic completion + return { + 'type': 'agent_update', + 'agentIndex': -1, + 'agentName': 'Unknown', + 'agentType': agent_type, + 'featureId': primary_id, + 'featureIds': list(feature_ids), + 'featureName': f'Features {", ".join(f"#{fid}" for fid in feature_ids)}', + 'state': state, + 'thought': 'Batch completed successfully!' if is_success else 'Batch failed to complete', + 'timestamp': datetime.now().isoformat(), + 'synthetic': True, + } + class OrchestratorTracker: """Tracks orchestrator state for Mission Control observability. diff --git a/ui/src/components/AgentCard.tsx b/ui/src/components/AgentCard.tsx index 9fdff64..faa10db 100644 --- a/ui/src/components/AgentCard.tsx +++ b/ui/src/components/AgentCard.tsx @@ -112,12 +112,25 @@ export function AgentCard({ agent, onShowLogs }: AgentCardProps) { {/* Feature info */}
-
- Feature #{agent.featureId} -
-
- {agent.featureName} -
+ {agent.featureIds && agent.featureIds.length > 1 ? ( + <> +
+ Batch: {agent.featureIds.map(id => `#${id}`).join(', ')} +
+
+ Active: Feature #{agent.featureId} +
+ + ) : ( + <> +
+ Feature #{agent.featureId} +
+
+ {agent.featureName} +
+ + )}
{/* Thought bubble */} @@ -195,7 +208,10 @@ export function AgentLogModal({ agent, logs, onClose }: AgentLogModalProps) {

- Feature #{agent.featureId}: {agent.featureName} + {agent.featureIds && agent.featureIds.length > 1 + ? `Batch: ${agent.featureIds.map(id => `#${id}`).join(', ')}` + : `Feature #${agent.featureId}: ${agent.featureName}` + }

diff --git a/ui/src/components/DependencyGraph.tsx b/ui/src/components/DependencyGraph.tsx index 3061548..4151c39 100644 --- a/ui/src/components/DependencyGraph.tsx +++ b/ui/src/components/DependencyGraph.tsx @@ -227,10 +227,14 @@ function DependencyGraphInner({ graphData, onNodeClick, activeAgents = [] }: Dep }, []) // Create a map of featureId to agent info for quick lookup + // Maps ALL batch feature IDs to the same agent const agentByFeatureId = useMemo(() => { const map = new Map() for (const agent of activeAgents) { - map.set(agent.featureId, { name: agent.agentName, state: agent.state }) + const ids = agent.featureIds || [agent.featureId] + for (const fid of ids) { + map.set(fid, { name: agent.agentName, state: agent.state }) + } } return map }, [activeAgents]) diff --git a/ui/src/components/KanbanColumn.tsx b/ui/src/components/KanbanColumn.tsx index 9ab8902..1c39f30 100644 --- a/ui/src/components/KanbanColumn.tsx +++ b/ui/src/components/KanbanColumn.tsx @@ -41,9 +41,14 @@ export function KanbanColumn({ showCreateSpec, }: KanbanColumnProps) { // Create a map of feature ID to active agent for quick lookup - const agentByFeatureId = new Map( - activeAgents.map(agent => [agent.featureId, agent]) - ) + // Maps ALL batch feature IDs to the same agent + const agentByFeatureId = new Map() + for (const agent of activeAgents) { + const ids = agent.featureIds || [agent.featureId] + for (const fid of ids) { + agentByFeatureId.set(fid, agent) + } + } return ( diff --git a/ui/src/components/SettingsModal.tsx b/ui/src/components/SettingsModal.tsx index 03138b7..0246cdd 100644 --- a/ui/src/components/SettingsModal.tsx +++ b/ui/src/components/SettingsModal.tsx @@ -41,6 +41,12 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { } } + const handleBatchSizeChange = (size: number) => { + if (!updateSettings.isPending) { + updateSettings.mutate({ batch_size: size }) + } + } + const models = modelsData?.models ?? [] const isSaving = updateSettings.isPending @@ -234,6 +240,30 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
+ {/* Features per Agent */} +
+ +

+ Number of features assigned to each coding agent +

+
+ {[1, 2, 3].map((size) => ( + + ))} +
+
+ {/* Update Error */} {updateSettings.isError && ( diff --git a/ui/src/hooks/useProjects.ts b/ui/src/hooks/useProjects.ts index 4f27380..676311c 100644 --- a/ui/src/hooks/useProjects.ts +++ b/ui/src/hooks/useProjects.ts @@ -267,6 +267,7 @@ const DEFAULT_SETTINGS: Settings = { ollama_mode: false, testing_agent_ratio: 1, playwright_headless: true, + batch_size: 3, } export function useAvailableModels() { diff --git a/ui/src/hooks/useWebSocket.ts b/ui/src/hooks/useWebSocket.ts index 18b117e..1a44435 100644 --- a/ui/src/hooks/useWebSocket.ts +++ b/ui/src/hooks/useWebSocket.ts @@ -210,6 +210,7 @@ export function useProjectWebSocket(projectName: string | null) { agentName: message.agentName, agentType: message.agentType || 'coding', // Default to coding for backwards compat featureId: message.featureId, + featureIds: message.featureIds || [message.featureId], featureName: message.featureName, state: message.state, thought: message.thought, @@ -225,6 +226,7 @@ export function useProjectWebSocket(projectName: string | null) { agentName: message.agentName, agentType: message.agentType || 'coding', // Default to coding for backwards compat featureId: message.featureId, + featureIds: message.featureIds || [message.featureId], featureName: message.featureName, state: message.state, thought: message.thought, diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index a90fc52..cec91ec 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -199,7 +199,8 @@ export interface ActiveAgent { agentIndex: number // -1 for synthetic completions agentName: AgentMascot | 'Unknown' agentType: AgentType // "coding" or "testing" - featureId: number + featureId: number // Current/primary feature (backward compat) + featureIds: number[] // All features in batch featureName: string state: AgentState thought?: string @@ -270,6 +271,7 @@ export interface WSAgentUpdateMessage { agentName: AgentMascot | 'Unknown' agentType: AgentType // "coding" or "testing" featureId: number + featureIds?: number[] // All features in batch (may be absent for backward compat) featureName: string state: AgentState thought?: string @@ -530,6 +532,7 @@ export interface Settings { ollama_mode: boolean testing_agent_ratio: number // Regression testing agents (0-3) playwright_headless: boolean + batch_size: number // Features per coding agent batch (1-3) } export interface SettingsUpdate { @@ -537,6 +540,7 @@ export interface SettingsUpdate { model?: string testing_agent_ratio?: number playwright_headless?: boolean + batch_size?: number } export interface ProjectSettingsUpdate { From 016eead8b417390d466762c874a7e23b8c07bbcb Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 16:42:56 +0200 Subject: [PATCH 23/24] fix: update log prefix when batch agent moves between features The output reader was stamping every line with the primary feature ID (e.g., [Feature #24]) even after the agent claimed a new feature in the batch. Now parses feature_claim_and_get calls in the output stream and switches the prefix to the newly claimed feature ID, so logs correctly show [Feature #30] once the agent moves on. Co-Authored-By: Claude Opus 4.5 --- parallel_orchestrator.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py index 10d0923..d31db0b 100644 --- a/parallel_orchestrator.py +++ b/parallel_orchestrator.py @@ -22,6 +22,7 @@ import asyncio import atexit import logging import os +import re import signal import subprocess import sys @@ -1116,6 +1117,11 @@ class ParallelOrchestrator: return True + # Pattern to detect when a batch agent claims a new feature + _CLAIM_FEATURE_PATTERN = re.compile( + r"feature_claim_and_get\b.*?['\"]?feature_id['\"]?\s*[:=]\s*(\d+)" + ) + def _read_output( self, feature_id: int | None, @@ -1124,6 +1130,7 @@ class ParallelOrchestrator: agent_type: Literal["coding", "testing"] = "coding", ): """Read output from subprocess and emit events.""" + current_feature_id = feature_id try: if proc.stdout is None: proc.wait() @@ -1132,11 +1139,17 @@ class ParallelOrchestrator: if abort.is_set(): break line = line.rstrip() + # Detect when a batch agent claims a new feature + claim_match = self._CLAIM_FEATURE_PATTERN.search(line) + if claim_match: + claimed_id = int(claim_match.group(1)) + if claimed_id != current_feature_id: + current_feature_id = claimed_id if self.on_output is not None: - self.on_output(feature_id or 0, line) + self.on_output(current_feature_id or 0, line) else: # Both coding and testing agents now use [Feature #X] format - print(f"[Feature #{feature_id}] {line}", flush=True) + print(f"[Feature #{current_feature_id}] {line}", flush=True) proc.wait() finally: # CRITICAL: Kill the process tree to clean up any child processes (e.g., Claude CLI) From b2ab1ecc7ae34dc1863d3f7881980122c40fbe67 Mon Sep 17 00:00:00 2001 From: Auto Date: Sun, 1 Feb 2026 17:34:07 +0200 Subject: [PATCH 24/24] add documentation --- CLAUDE.md | 160 +++++++--- ui/src/App.tsx | 13 +- ui/src/components/docs/DocsContent.tsx | 130 ++++++++ ui/src/components/docs/DocsPage.tsx | 215 ++++++++++++++ ui/src/components/docs/DocsSearch.tsx | 78 +++++ ui/src/components/docs/DocsSidebar.tsx | 189 ++++++++++++ ui/src/components/docs/docsData.ts | 222 ++++++++++++++ .../components/docs/sections/AIAssistant.tsx | 75 +++++ .../docs/sections/AdvancedConfig.tsx | 220 ++++++++++++++ .../components/docs/sections/AgentSystem.tsx | 280 ++++++++++++++++++ .../components/docs/sections/AppSpecSetup.tsx | 130 ++++++++ .../docs/sections/AppearanceThemes.tsx | 185 ++++++++++++ .../docs/sections/DeveloperTools.tsx | 104 +++++++ ui/src/components/docs/sections/FAQ.tsx | 157 ++++++++++ .../docs/sections/FeaturesKanban.tsx | 182 ++++++++++++ .../docs/sections/GettingStarted.tsx | 134 +++++++++ .../docs/sections/ProjectStructure.tsx | 162 ++++++++++ .../components/docs/sections/Scheduling.tsx | 102 +++++++ ui/src/components/docs/sections/Security.tsx | 218 ++++++++++++++ .../docs/sections/SettingsConfig.tsx | 188 ++++++++++++ ui/src/hooks/useHashRoute.ts | 36 +++ ui/src/main.tsx | 10 +- ui/src/styles/globals.css | 137 +++++++++ 23 files changed, 3290 insertions(+), 37 deletions(-) create mode 100644 ui/src/components/docs/DocsContent.tsx create mode 100644 ui/src/components/docs/DocsPage.tsx create mode 100644 ui/src/components/docs/DocsSearch.tsx create mode 100644 ui/src/components/docs/DocsSidebar.tsx create mode 100644 ui/src/components/docs/docsData.ts create mode 100644 ui/src/components/docs/sections/AIAssistant.tsx create mode 100644 ui/src/components/docs/sections/AdvancedConfig.tsx create mode 100644 ui/src/components/docs/sections/AgentSystem.tsx create mode 100644 ui/src/components/docs/sections/AppSpecSetup.tsx create mode 100644 ui/src/components/docs/sections/AppearanceThemes.tsx create mode 100644 ui/src/components/docs/sections/DeveloperTools.tsx create mode 100644 ui/src/components/docs/sections/FAQ.tsx create mode 100644 ui/src/components/docs/sections/FeaturesKanban.tsx create mode 100644 ui/src/components/docs/sections/GettingStarted.tsx create mode 100644 ui/src/components/docs/sections/ProjectStructure.tsx create mode 100644 ui/src/components/docs/sections/Scheduling.tsx create mode 100644 ui/src/components/docs/sections/Security.tsx create mode 100644 ui/src/components/docs/sections/SettingsConfig.tsx create mode 100644 ui/src/hooks/useHashRoute.ts diff --git a/CLAUDE.md b/CLAUDE.md index c9ef839..ef1d7d0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -54,6 +54,12 @@ python autonomous_agent_demo.py --project-dir my-app --yolo # Parallel mode: run multiple agents concurrently (1-5 agents) python autonomous_agent_demo.py --project-dir my-app --parallel --max-concurrency 3 + +# Batch mode: implement multiple features per agent session (1-3) +python autonomous_agent_demo.py --project-dir my-app --batch-size 3 + +# Batch specific features by ID +python autonomous_agent_demo.py --project-dir my-app --batch-features 1,2,3 ``` ### YOLO Mode (Rapid Prototyping) @@ -68,7 +74,7 @@ python autonomous_agent_demo.py --project-dir my-app --yolo ``` **What's different in YOLO mode:** -- No regression testing (skips `feature_get_for_regression`) +- No regression testing - No Playwright MCP server (browser automation disabled) - Features marked passing after lint/type-check succeeds - Faster iteration for prototyping @@ -97,10 +103,13 @@ npm run lint # Run ESLint ### Python ```bash -ruff check . # Lint -mypy . # Type check -python test_security.py # Security unit tests (163 tests) -python test_security_integration.py # Integration tests (9 tests) +ruff check . # Lint +mypy . # Type check +python test_security.py # Security unit tests (12 tests) +python test_security_integration.py # Integration tests (9 tests) +python -m pytest test_client.py # Client tests (20 tests) +python -m pytest test_dependency_resolver.py # Dependency resolver tests (12 tests) +python -m pytest test_rate_limit_utils.py # Rate limit tests (22 tests) ``` ### React UI @@ -108,11 +117,17 @@ python test_security_integration.py # Integration tests (9 tests) ```bash cd ui npm run lint # ESLint -npm run build # Type check + build +npm run build # Type check + build (Vite 7) npm run test:e2e # Playwright end-to-end tests npm run test:e2e:ui # Playwright tests with UI ``` +### CI/CD + +GitHub Actions (`.github/workflows/ci.yml`) runs on push/PR to master: +- **Python job**: ruff lint + security tests +- **UI job**: ESLint + TypeScript build + ### Code Quality Configuration in `pyproject.toml`: @@ -124,16 +139,21 @@ Configuration in `pyproject.toml`: ### Core Python Modules - `start.py` - CLI launcher with project creation/selection menu -- `autonomous_agent_demo.py` - Entry point for running the agent +- `autonomous_agent_demo.py` - Entry point for running the agent (supports `--yolo`, `--parallel`, `--batch-size`, `--batch-features`) - `autocoder_paths.py` - Central path resolution with dual-path backward compatibility and migration - `agent.py` - Agent session loop using Claude Agent SDK -- `client.py` - ClaudeSDKClient configuration with security hooks and MCP servers +- `client.py` - ClaudeSDKClient configuration with security hooks, MCP servers, and Vertex AI support - `security.py` - Bash command allowlist validation (ALLOWED_COMMANDS whitelist) -- `prompts.py` - Prompt template loading with project-specific fallback +- `prompts.py` - Prompt template loading with project-specific fallback and batch feature prompts - `progress.py` - Progress tracking, database queries, webhook notifications -- `registry.py` - Project registry for mapping names to paths (cross-platform) +- `registry.py` - Project registry for mapping names to paths (cross-platform), global settings model - `parallel_orchestrator.py` - Concurrent agent execution with dependency-aware scheduling +- `auth.py` - Authentication error detection for Claude CLI +- `env_constants.py` - Shared environment variable constants (API_ENV_VARS) used by client.py and chat sessions +- `rate_limit_utils.py` - Rate limit detection, retry parsing, exponential backoff with jitter +- `api/database.py` - SQLAlchemy models (Feature, Schedule, ScheduleOverride) - `api/dependency_resolver.py` - Cycle detection (Kahn's algorithm + DFS) and dependency validation +- `api/migration.py` - JSON-to-SQLite migration utility ### Project Registry @@ -147,13 +167,36 @@ The registry uses: ### Server API (server/) -The FastAPI server provides REST endpoints for the UI: +The FastAPI server provides REST and WebSocket endpoints for the UI: -- `server/routers/projects.py` - Project CRUD with registry integration -- `server/routers/features.py` - Feature management -- `server/routers/agent.py` - Agent control (start/stop/pause/resume) -- `server/routers/filesystem.py` - Filesystem browser API with security controls -- `server/routers/spec_creation.py` - WebSocket for interactive spec creation +**Routers** (`server/routers/`): +- `projects.py` - Project CRUD with registry integration +- `features.py` - Feature management +- `agent.py` - Agent control (start/stop/pause/resume) +- `filesystem.py` - Filesystem browser API with security controls +- `spec_creation.py` - WebSocket for interactive spec creation +- `expand_project.py` - Interactive project expansion via natural language +- `assistant_chat.py` - Read-only project assistant chat (WebSocket/REST) +- `terminal.py` - Interactive terminal I/O with PTY support (WebSocket bidirectional) +- `devserver.py` - Dev server control (start/stop) and config +- `schedules.py` - CRUD for time-based agent scheduling +- `settings.py` - Global settings management (model selection, YOLO, batch size, headless browser) + +**Services** (`server/services/`): +- `process_manager.py` - Agent process lifecycle management +- `project_config.py` - Project type detection and dev command management +- `terminal_manager.py` - Terminal session management with PTY (`pywinpty` on Windows) +- `scheduler_service.py` - APScheduler-based automated agent scheduling +- `dev_server_manager.py` - Dev server lifecycle management +- `assistant_chat_session.py` / `assistant_database.py` - Assistant chat sessions with SQLite persistence +- `spec_chat_session.py` - Spec creation chat sessions +- `expand_chat_session.py` - Expand project chat sessions +- `chat_constants.py` - Shared constants for chat services + +**Utilities** (`server/utils/`): +- `process_utils.py` - Process management utilities +- `project_helpers.py` - Project path resolution helpers +- `validation.py` - Project name validation ### Feature Management @@ -164,18 +207,26 @@ Features are stored in SQLite (`features.db`) via SQLAlchemy. The agent interact MCP tools available to the agent: - `feature_get_stats` - Progress statistics -- `feature_get_next` - Get highest-priority pending feature (respects dependencies) -- `feature_claim_next` - Atomically claim next available feature (for parallel mode) -- `feature_get_for_regression` - Random passing features for regression testing +- `feature_get_by_id` - Get a single feature by ID +- `feature_get_summary` - Get summary of all features +- `feature_get_ready` - Get features ready to work on (dependencies met) +- `feature_get_blocked` - Get features blocked by unmet dependencies +- `feature_get_graph` - Get full dependency graph +- `feature_claim_and_get` - Atomically claim next available feature (for parallel mode) +- `feature_mark_in_progress` - Mark feature as in progress - `feature_mark_passing` - Mark feature complete +- `feature_mark_failing` - Mark feature as failing - `feature_skip` - Move feature to end of queue +- `feature_clear_in_progress` - Clear in-progress status - `feature_create_bulk` - Initialize all features (used by initializer) +- `feature_create` - Create a single feature - `feature_add_dependency` - Add dependency between features (with cycle detection) - `feature_remove_dependency` - Remove a dependency +- `feature_set_dependencies` - Set all dependencies for a feature at once ### React UI (ui/) -- Tech stack: React 19, TypeScript, TanStack Query, Tailwind CSS v4, Radix UI, dagre (graph layout) +- Tech stack: React 19, TypeScript, Vite 7, TanStack Query, Tailwind CSS v4, Radix UI, dagre (graph layout), xterm.js (terminal) - `src/App.tsx` - Main app with project selection, kanban board, agent controls - `src/hooks/useWebSocket.ts` - Real-time updates via WebSocket (progress, agent status, logs, agent updates) - `src/hooks/useProjects.ts` - React Query hooks for API calls @@ -187,6 +238,12 @@ Key components: - `DependencyGraph.tsx` - Interactive node graph visualization with dagre layout - `CelebrationOverlay.tsx` - Confetti animation on feature completion - `FolderBrowser.tsx` - Server-side filesystem browser for project folder selection +- `Terminal.tsx` / `TerminalTabs.tsx` - xterm.js-based multi-tab terminal +- `AssistantPanel.tsx` / `AssistantChat.tsx` - AI assistant for project Q&A +- `ExpandProjectModal.tsx` / `ExpandProjectChat.tsx` - Add features via natural language +- `DevServerControl.tsx` - Dev server start/stop control +- `ScheduleModal.tsx` - Schedule management UI +- `SettingsModal.tsx` - Global settings panel Keyboard shortcuts (press `?` for help): - `D` - Toggle debug panel @@ -248,15 +305,6 @@ The following directories (relative to home) are always blocked: - `.docker`, `.config/gcloud` - Container/cloud configs - `.npmrc`, `.pypirc`, `.netrc` - Package manager credentials -**Example Output:** - -``` -Created security settings at /path/to/project/.claude_settings.json - - Sandbox enabled (OS-level bash isolation) - - Filesystem restricted to: /path/to/project - - Extra read paths (validated): /Users/me/docs, /opt/shared-libs -``` - #### Per-Project Allowed Commands The agent's bash command access is controlled through a hierarchical configuration system: @@ -318,13 +366,29 @@ blocked_commands: **Files:** - `security.py` - Command validation logic and hardcoded blocklist -- `test_security.py` - Unit tests for security system (136 tests) -- `test_security_integration.py` - Integration tests with real hooks (9 tests) -- `TEST_SECURITY.md` - Quick testing reference guide +- `test_security.py` - Unit tests for security system +- `test_security_integration.py` - Integration tests with real hooks - `examples/project_allowed_commands.yaml` - Project config example (all commented by default) - `examples/org_config.yaml` - Org config example (all commented by default) - `examples/README.md` - Comprehensive guide with use cases, testing, and troubleshooting +### Vertex AI Configuration (Optional) + +Run coding agents via Google Cloud Vertex AI: + +1. Install and authenticate gcloud CLI: `gcloud auth application-default login` +2. Configure `.env`: + ``` + CLAUDE_CODE_USE_VERTEX=1 + CLOUD_ML_REGION=us-east5 + ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id + ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4-5@20251101 + ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4-5@20250929 + ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-3-5-haiku@20241022 + ``` + +**Note:** Use `@` instead of `-` in model names for Vertex AI. + ### Ollama Local Models (Optional) Run coding agents using local models via Ollama v0.14.0+: @@ -360,8 +424,24 @@ Run coding agents using local models via Ollama v0.14.0+: ## Claude Code Integration -- `.claude/commands/create-spec.md` - `/create-spec` slash command for interactive spec creation -- `.claude/skills/frontend-design/SKILL.md` - Skill for distinctive UI design +**Slash commands** (`.claude/commands/`): +- `/create-spec` - Interactive spec creation for new projects +- `/expand-project` - Expand existing project with new features +- `/gsd-to-autocoder-spec` - Convert GSD codebase mapping to app_spec.txt +- `/check-code` - Run lint and type-check for code quality +- `/checkpoint` - Create comprehensive checkpoint commit +- `/review-pr` - Review pull requests + +**Custom agents** (`.claude/agents/`): +- `coder.md` - Elite software architect agent for code implementation (Opus) +- `code-review.md` - Code review agent for quality/security/performance analysis (Opus) +- `deep-dive.md` - Technical investigator for deep analysis and debugging (Opus) + +**Skills** (`.claude/skills/`): +- `frontend-design` - Distinctive, production-grade UI design +- `gsd-to-autocoder-spec` - Convert GSD codebase mapping to Autocoder app_spec format + +**Other:** - `.claude/templates/` - Prompt templates copied to new projects - `examples/` - Configuration examples and documentation for security settings @@ -392,7 +472,7 @@ The UI receives updates via WebSocket (`/ws/projects/{project_name}`): When running with `--parallel`, the orchestrator: 1. Spawns multiple Claude agents as subprocesses (up to `--max-concurrency`) -2. Each agent claims features atomically via `feature_claim_next` +2. Each agent claims features atomically via `feature_claim_and_get` 3. Features blocked by unmet dependencies are skipped 4. Browser contexts are isolated per agent using `--isolated` flag 5. AgentTracker parses output and emits `agent_update` messages for UI @@ -405,6 +485,16 @@ The orchestrator enforces strict bounds on concurrent processes: - Testing agents are capped at `max_concurrency` (same as coding agents) - Total process count never exceeds 11 Python processes (1 orchestrator + 5 coding + 5 testing) +### Multi-Feature Batching + +Agents can implement multiple features per session using `--batch-size` (1-3, default: 3): +- `--batch-size N` - Max features per coding agent batch +- `--testing-batch-size N` - Features per testing batch (1-5, default: 3) +- `--batch-features 1,2,3` - Specific feature IDs for batch implementation +- `--testing-batch-features 1,2,3` - Specific feature IDs for batch regression testing +- `prompts.py` provides `get_batch_feature_prompt()` for multi-feature prompt generation +- Configurable in UI via settings panel + ### Design System The UI uses a **neobrutalism** design with Tailwind CSS v4: diff --git a/ui/src/App.tsx b/ui/src/App.tsx index 9842f9d..dcfe729 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -28,7 +28,7 @@ import { ThemeSelector } from './components/ThemeSelector' import { ResetProjectModal } from './components/ResetProjectModal' import { ProjectSetupRequired } from './components/ProjectSetupRequired' import { getDependencyGraph, startAgent } from './lib/api' -import { Loader2, Settings, Moon, Sun, RotateCcw } from 'lucide-react' +import { Loader2, Settings, Moon, Sun, RotateCcw, BookOpen } from 'lucide-react' import type { Feature } from './lib/types' import { Button } from '@/components/ui/button' import { Card, CardContent } from '@/components/ui/card' @@ -335,6 +335,17 @@ function App() { )} + {/* Docs link */} + + {/* Theme selector */} void +} + +/** + * Maps each section id from docsData to its corresponding React component. + * Order matches DOC_SECTIONS so we can iterate safely. + */ +const SECTION_COMPONENTS: Record = { + 'getting-started': GettingStarted, + 'app-spec-setup': AppSpecSetup, + 'project-structure': ProjectStructure, + 'features-kanban': FeaturesKanban, + 'agent-system': AgentSystem, + 'settings-config': SettingsConfig, + 'developer-tools': DeveloperTools, + 'ai-assistant': AIAssistant, + scheduling: Scheduling, + 'appearance-themes': AppearanceThemes, + security: Security, + 'advanced-config': AdvancedConfig, + faq: FAQ, +} + +export function DocsContent({ onSectionVisible }: DocsContentProps) { + const containerRef = useRef(null) + // Store refs to each section heading element so the observer can watch them + const headingRefs = useRef>(new Map()) + + // Stable callback ref setter -- avoids recreating refs on every render + const setHeadingRef = useCallback((id: string, element: HTMLElement | null) => { + if (element) { + headingRefs.current.set(id, element) + } else { + headingRefs.current.delete(id) + } + }, []) + + // IntersectionObserver: track which section heading is at or near the top of the viewport + useEffect(() => { + const headings = headingRefs.current + if (headings.size === 0) return + + // rootMargin: trigger when a heading enters the top 20% of the viewport. + // This ensures the sidebar updates *before* the user scrolls past the heading. + const observer = new IntersectionObserver( + (entries) => { + // Find the topmost visible heading -- the one closest to the top of the viewport + const visible = entries + .filter((entry) => entry.isIntersecting) + .sort((a, b) => a.boundingClientRect.top - b.boundingClientRect.top) + + if (visible.length > 0) { + const topEntry = visible[0] + const sectionId = topEntry.target.getAttribute('data-section-id') + if (sectionId) { + onSectionVisible(sectionId) + } + } + }, + { + // Observe from the very top of the viewport down to -60% from the bottom, + // so headings are detected while in the upper portion of the screen. + rootMargin: '0px 0px -60% 0px', + threshold: 0, + }, + ) + + headings.forEach((element) => observer.observe(element)) + + return () => observer.disconnect() + }, [onSectionVisible]) + + return ( +
+ {DOC_SECTIONS.map((section) => { + const SectionComponent = SECTION_COMPONENTS[section.id] + if (!SectionComponent) return null + + const Icon = section.icon + + return ( +
+ {/* Section heading with anchor */} +

setHeadingRef(section.id, el)} + data-section-id={section.id} + className="font-display text-2xl font-bold tracking-tight mb-6 flex items-center gap-3 + text-foreground border-b-2 border-border pb-3" + > + + {section.title} +

+ + {/* Section body */} + +
+ ) + })} +
+ ) +} diff --git a/ui/src/components/docs/DocsPage.tsx b/ui/src/components/docs/DocsPage.tsx new file mode 100644 index 0000000..25288cc --- /dev/null +++ b/ui/src/components/docs/DocsPage.tsx @@ -0,0 +1,215 @@ +/** + * DocsPage Component + * + * Main layout for the documentation route (#/docs). + * Full-page layout with a sticky header, collapsible sidebar on the left, + * and scrollable content area on the right. + * + * Mobile-responsive: sidebar collapses behind a hamburger menu that + * opens as an overlay. + */ + +import { useState, useEffect, useCallback } from 'react' +import { ArrowLeft, Menu, X, Moon, Sun } from 'lucide-react' +import { useHashRoute } from '../../hooks/useHashRoute' +import { useTheme } from '../../hooks/useTheme' +import { ThemeSelector } from '../ThemeSelector' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import { DocsSidebar } from './DocsSidebar' +import { DocsSearch } from './DocsSearch' +import { DocsContent } from './DocsContent' + +export function DocsPage() { + const [activeSectionId, setActiveSectionId] = useState(null) + const [searchQuery, setSearchQuery] = useState('') + const [mobileSidebarOpen, setMobileSidebarOpen] = useState(false) + + const { section: initialSection } = useHashRoute() + const { theme, setTheme, darkMode, toggleDarkMode, themes } = useTheme() + + // On mount, if the hash includes a section id (e.g. #/docs/getting-started), + // scroll to it and set it as active + useEffect(() => { + if (initialSection) { + setActiveSectionId(initialSection) + // Delay scroll slightly so the DOM is rendered + requestAnimationFrame(() => { + const element = document.getElementById(initialSection) + if (element) { + element.scrollIntoView({ behavior: 'smooth', block: 'start' }) + } + }) + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) // Run only on mount + + // When a sidebar item is clicked, scroll the corresponding element into view + const handleSectionClick = useCallback((id: string) => { + setActiveSectionId(id) + + // Update hash for linkability (without triggering a route change) + history.replaceState(null, '', `#/docs/${id}`) + + const element = document.getElementById(id) + if (element) { + element.scrollIntoView({ behavior: 'smooth', block: 'start' }) + } + }, []) + + // Called by DocsContent's IntersectionObserver when a heading scrolls into view + const handleSectionVisible = useCallback((id: string) => { + setActiveSectionId(id) + }, []) + + // Close mobile sidebar when pressing Escape + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === 'Escape' && mobileSidebarOpen) { + setMobileSidebarOpen(false) + } + } + + window.addEventListener('keydown', handleKeyDown) + return () => window.removeEventListener('keydown', handleKeyDown) + }, [mobileSidebarOpen]) + + // Prevent body scroll when mobile sidebar overlay is open + useEffect(() => { + if (mobileSidebarOpen) { + document.body.style.overflow = 'hidden' + } else { + document.body.style.overflow = '' + } + return () => { + document.body.style.overflow = '' + } + }, [mobileSidebarOpen]) + + return ( +
+ {/* Sticky header */} +
+
+
+ {/* Left side: hamburger (mobile) + title + badge */} +
+ {/* Mobile hamburger button -- only visible below lg breakpoint */} + + + + AutoCoder + + + + Documentation + +
+ + {/* Right side: theme controls + back button */} +
+ + + + + +
+
+
+
+ + {/* Body: sidebar + content */} +
+ {/* ---------------------------------------------------------------- + Desktop sidebar -- visible at lg and above + Fixed width, sticky below the header, independently scrollable + ---------------------------------------------------------------- */} + + + {/* ---------------------------------------------------------------- + Mobile sidebar overlay -- visible below lg breakpoint + ---------------------------------------------------------------- */} + {mobileSidebarOpen && ( + <> + {/* Backdrop */} +
setMobileSidebarOpen(false)} + aria-hidden="true" + /> + + {/* Sidebar panel */} + + + )} + + {/* ---------------------------------------------------------------- + Content area -- fills remaining space, scrollable + ---------------------------------------------------------------- */} +
+
+ +
+
+
+
+ ) +} diff --git a/ui/src/components/docs/DocsSearch.tsx b/ui/src/components/docs/DocsSearch.tsx new file mode 100644 index 0000000..896f3cd --- /dev/null +++ b/ui/src/components/docs/DocsSearch.tsx @@ -0,0 +1,78 @@ +/** + * DocsSearch Component + * + * Search input for the documentation sidebar. + * Supports Ctrl/Cmd+K keyboard shortcut to focus, + * and shows a keyboard hint when the input is empty. + */ + +import { useRef, useEffect } from 'react' +import { Search, X } from 'lucide-react' + +interface DocsSearchProps { + value: string + onChange: (value: string) => void +} + +export function DocsSearch({ value, onChange }: DocsSearchProps) { + const inputRef = useRef(null) + + // Global keyboard shortcut: Ctrl/Cmd+K focuses the search input + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if ((e.ctrlKey || e.metaKey) && e.key === 'k') { + e.preventDefault() + inputRef.current?.focus() + } + } + + window.addEventListener('keydown', handleKeyDown) + return () => window.removeEventListener('keydown', handleKeyDown) + }, []) + + return ( +
+ {/* Search icon */} + + + onChange(e.target.value)} + placeholder="Search docs..." + className="w-full pl-9 pr-16 py-2 text-sm bg-muted border border-border rounded-lg + text-foreground placeholder:text-muted-foreground + focus:outline-none focus:ring-2 focus:ring-ring/50 focus:border-ring + transition-colors" + /> + + {/* Right side: clear button when has value, otherwise Ctrl+K hint */} + {value ? ( + + ) : ( + + Ctrl+K + + )} +
+ ) +} diff --git a/ui/src/components/docs/DocsSidebar.tsx b/ui/src/components/docs/DocsSidebar.tsx new file mode 100644 index 0000000..4be7576 --- /dev/null +++ b/ui/src/components/docs/DocsSidebar.tsx @@ -0,0 +1,189 @@ +/** + * DocsSidebar Component + * + * Left sidebar navigation for the documentation page. + * Lists all sections from docsData with expandable subsections. + * Supports search filtering with auto-expansion of matching sections. + */ + +import { useState, useMemo } from 'react' +import { ChevronRight } from 'lucide-react' +import { DOC_SECTIONS, type DocSection } from './docsData' + +interface DocsSidebarProps { + activeSectionId: string | null + onSectionClick: (id: string) => void + searchQuery: string + onMobileClose?: () => void +} + +export function DocsSidebar({ + activeSectionId, + onSectionClick, + searchQuery, + onMobileClose, +}: DocsSidebarProps) { + // Track which top-level sections are manually expanded by the user + const [expandedSections, setExpandedSections] = useState>(() => { + // Start with the first section expanded so the sidebar is not fully collapsed + const initial = new Set() + if (DOC_SECTIONS.length > 0) { + initial.add(DOC_SECTIONS[0].id) + } + return initial + }) + + const normalizedQuery = searchQuery.trim().toLowerCase() + + // Filter sections based on search query, matching against section title, + // subsection titles, and keywords + const filteredSections = useMemo(() => { + if (!normalizedQuery) { + return DOC_SECTIONS + } + + return DOC_SECTIONS.filter((section) => { + // Check section title + if (section.title.toLowerCase().includes(normalizedQuery)) return true + + // Check keywords + if (section.keywords.some((kw) => kw.toLowerCase().includes(normalizedQuery))) return true + + // Check subsection titles + if (section.subsections.some((sub) => sub.title.toLowerCase().includes(normalizedQuery))) { + return true + } + + return false + }) + }, [normalizedQuery]) + + // Determine which sections should appear expanded: + // - When searching: auto-expand all matching sections + // - Otherwise: use manual expanded state, plus expand whichever section contains the active item + const isSectionExpanded = (sectionId: string): boolean => { + if (normalizedQuery) return true + + if (expandedSections.has(sectionId)) return true + + // Also expand the section that contains the currently active subsection + if (activeSectionId) { + const section = DOC_SECTIONS.find((s) => s.id === sectionId) + if (section) { + if (section.id === activeSectionId) return true + if (section.subsections.some((sub) => sub.id === activeSectionId)) return true + } + } + + return false + } + + const toggleSection = (sectionId: string) => { + setExpandedSections((prev) => { + const next = new Set(prev) + if (next.has(sectionId)) { + next.delete(sectionId) + } else { + next.add(sectionId) + } + return next + }) + } + + /** + * Checks whether a given id (section or subsection) is the currently active item. + * Active items get a highlighted visual treatment. + */ + const isActive = (id: string): boolean => activeSectionId === id + + /** + * Checks whether a section contains the active subsection. + * Used to highlight parent sections in a muted way. + */ + const sectionContainsActive = (section: DocSection): boolean => { + if (!activeSectionId) return false + return section.subsections.some((sub) => sub.id === activeSectionId) + } + + const handleItemClick = (id: string) => { + onSectionClick(id) + // On mobile, close the sidebar after navigation + onMobileClose?.() + } + + return ( + + ) +} diff --git a/ui/src/components/docs/docsData.ts b/ui/src/components/docs/docsData.ts new file mode 100644 index 0000000..d7b2875 --- /dev/null +++ b/ui/src/components/docs/docsData.ts @@ -0,0 +1,222 @@ +import { + Rocket, + FileText, + FolderTree, + LayoutGrid, + Bot, + Settings, + Terminal, + MessageSquare, + Clock, + Palette, + Shield, + Wrench, + HelpCircle, + type LucideIcon, +} from 'lucide-react' + +export interface DocSubsection { + id: string + title: string +} + +export interface DocSection { + id: string + title: string + icon: LucideIcon + subsections: DocSubsection[] + keywords: string[] +} + +export const DOC_SECTIONS: DocSection[] = [ + { + id: 'getting-started', + title: 'Getting Started', + icon: Rocket, + subsections: [ + { id: 'what-is-autocoder', title: 'What is AutoCoder?' }, + { id: 'quick-start', title: 'Quick Start' }, + { id: 'creating-a-project', title: 'Creating a New Project' }, + { id: 'existing-project', title: 'Adding to an Existing Project' }, + { id: 'system-requirements', title: 'System Requirements' }, + ], + keywords: ['install', 'setup', 'start', 'begin', 'new', 'requirements', 'prerequisites'], + }, + { + id: 'app-spec-setup', + title: 'App Spec & Project Setup', + icon: FileText, + subsections: [ + { id: 'what-is-app-spec', title: 'What is an App Spec?' }, + { id: 'creating-spec-with-claude', title: 'Creating a Spec with Claude' }, + { id: 'writing-spec-manually', title: 'Writing a Spec Manually' }, + { id: 'initializer-agent', title: 'The Initializer Agent' }, + { id: 'starting-after-spec', title: 'Starting After Spec Creation' }, + ], + keywords: ['spec', 'specification', 'xml', 'app_spec', 'initializer', 'prompt', 'template'], + }, + { + id: 'project-structure', + title: 'Target Project Structure', + icon: FolderTree, + subsections: [ + { id: 'autocoder-directory', title: '.autocoder/ Directory Layout' }, + { id: 'features-db', title: 'Features Database' }, + { id: 'prompts-directory', title: 'Prompts Directory' }, + { id: 'allowed-commands-yaml', title: 'Allowed Commands Config' }, + { id: 'claude-md', title: 'CLAUDE.md Convention' }, + { id: 'legacy-migration', title: 'Legacy Layout Migration' }, + { id: 'claude-inheritance', title: 'Claude Inheritance' }, + ], + keywords: ['folder', 'directory', 'structure', 'layout', 'files', 'database', 'sqlite', 'migration'], + }, + { + id: 'features-kanban', + title: 'Features & Kanban Board', + icon: LayoutGrid, + subsections: [ + { id: 'kanban-overview', title: 'Kanban Board Overview' }, + { id: 'feature-cards', title: 'Feature Cards' }, + { id: 'dependency-graph', title: 'Dependency Graph View' }, + { id: 'adding-features', title: 'Adding Features' }, + { id: 'editing-features', title: 'Editing & Deleting Features' }, + { id: 'feature-dependencies', title: 'Feature Dependencies' }, + { id: 'expanding-with-ai', title: 'Expanding Project with AI' }, + { id: 'feature-priority', title: 'Priority & Ordering' }, + ], + keywords: ['kanban', 'board', 'feature', 'card', 'dependency', 'graph', 'priority', 'pending', 'progress', 'done'], + }, + { + id: 'agent-system', + title: 'Agent System', + icon: Bot, + subsections: [ + { id: 'maestro-orchestrator', title: 'Maestro: The Orchestrator' }, + { id: 'coding-agents', title: 'Coding Agents' }, + { id: 'testing-agents', title: 'Testing Agents' }, + { id: 'agent-lifecycle', title: 'Agent Lifecycle' }, + { id: 'concurrency', title: 'Concurrency Control' }, + { id: 'mission-control', title: 'Agent Mission Control' }, + { id: 'agent-mascots', title: 'Agent Mascots & States' }, + { id: 'agent-logs', title: 'Viewing Agent Logs' }, + { id: 'process-limits', title: 'Process Limits' }, + ], + keywords: ['agent', 'maestro', 'orchestrator', 'coding', 'testing', 'parallel', 'concurrency', 'mascot', 'spark', 'fizz', 'octo', 'batch'], + }, + { + id: 'settings-config', + title: 'Settings & Configuration', + icon: Settings, + subsections: [ + { id: 'opening-settings', title: 'Opening Settings' }, + { id: 'yolo-mode', title: 'YOLO Mode' }, + { id: 'headless-browser', title: 'Headless Browser' }, + { id: 'model-selection', title: 'Model Selection' }, + { id: 'regression-agents', title: 'Regression Agents' }, + { id: 'features-per-agent', title: 'Features per Agent (Batch Size)' }, + { id: 'concurrency-setting', title: 'Concurrency' }, + { id: 'settings-persistence', title: 'How Settings are Persisted' }, + ], + keywords: ['settings', 'config', 'yolo', 'headless', 'model', 'opus', 'sonnet', 'haiku', 'batch', 'regression'], + }, + { + id: 'developer-tools', + title: 'Developer Tools', + icon: Terminal, + subsections: [ + { id: 'debug-panel', title: 'Debug Panel' }, + { id: 'agent-logs-tab', title: 'Agent Logs Tab' }, + { id: 'dev-server-logs', title: 'Dev Server Logs Tab' }, + { id: 'terminal', title: 'Terminal' }, + { id: 'dev-server-control', title: 'Dev Server Control' }, + { id: 'per-agent-logs', title: 'Per-Agent Logs' }, + ], + keywords: ['debug', 'terminal', 'logs', 'dev server', 'console', 'xterm', 'shell'], + }, + { + id: 'ai-assistant', + title: 'AI Assistant', + icon: MessageSquare, + subsections: [ + { id: 'what-is-assistant', title: 'What is the Assistant?' }, + { id: 'opening-assistant', title: 'Opening the Assistant' }, + { id: 'assistant-capabilities', title: 'What It Can Do' }, + { id: 'assistant-limitations', title: 'What It Cannot Do' }, + { id: 'conversation-history', title: 'Conversation History' }, + ], + keywords: ['assistant', 'ai', 'chat', 'help', 'question', 'conversation'], + }, + { + id: 'scheduling', + title: 'Scheduling', + icon: Clock, + subsections: [ + { id: 'what-scheduling-does', title: 'What Scheduling Does' }, + { id: 'creating-schedule', title: 'Creating a Schedule' }, + { id: 'schedule-settings', title: 'Schedule Settings' }, + { id: 'schedule-overrides', title: 'Schedule Overrides' }, + { id: 'crash-recovery', title: 'Crash Recovery' }, + ], + keywords: ['schedule', 'timer', 'automated', 'cron', 'run', 'recurring', 'utc'], + }, + { + id: 'appearance-themes', + title: 'Appearance & Themes', + icon: Palette, + subsections: [ + { id: 'themes-overview', title: 'Themes Overview' }, + { id: 'dark-light-mode', title: 'Dark & Light Mode' }, + { id: 'theme-selector', title: 'Theme Selector' }, + { id: 'keyboard-shortcuts', title: 'Keyboard Shortcuts' }, + ], + keywords: ['theme', 'dark', 'light', 'color', 'appearance', 'twitter', 'claude', 'neo', 'brutalism', 'retro', 'aurora', 'business', 'keyboard', 'shortcut'], + }, + { + id: 'security', + title: 'Security', + icon: Shield, + subsections: [ + { id: 'command-validation', title: 'Command Validation Overview' }, + { id: 'command-hierarchy', title: 'Command Hierarchy' }, + { id: 'hardcoded-blocklist', title: 'Hardcoded Blocklist' }, + { id: 'global-allowlist', title: 'Global Allowlist' }, + { id: 'project-allowlist', title: 'Per-Project Allowed Commands' }, + { id: 'org-config', title: 'Organization Configuration' }, + { id: 'extra-read-paths', title: 'Extra Read Paths' }, + { id: 'filesystem-sandboxing', title: 'Filesystem Sandboxing' }, + ], + keywords: ['security', 'sandbox', 'allowlist', 'blocklist', 'command', 'bash', 'permission', 'filesystem'], + }, + { + id: 'advanced-config', + title: 'Advanced Configuration', + icon: Wrench, + subsections: [ + { id: 'vertex-ai', title: 'Vertex AI Setup' }, + { id: 'ollama', title: 'Ollama Local Models' }, + { id: 'env-variables', title: 'Environment Variables' }, + { id: 'cli-arguments', title: 'CLI Arguments' }, + { id: 'webhooks', title: 'Webhook Support' }, + { id: 'project-registry', title: 'Project Registry' }, + ], + keywords: ['vertex', 'gcloud', 'ollama', 'local', 'env', 'environment', 'cli', 'webhook', 'n8n', 'registry', 'api'], + }, + { + id: 'faq', + title: 'FAQ & Troubleshooting', + icon: HelpCircle, + subsections: [ + { id: 'faq-new-project', title: 'Starting a New Project' }, + { id: 'faq-existing-project', title: 'Adding to Existing Project' }, + { id: 'faq-agent-crash', title: 'Agent Crashes' }, + { id: 'faq-custom-commands', title: 'Custom Bash Commands' }, + { id: 'faq-blocked-features', title: 'Blocked Features' }, + { id: 'faq-parallel', title: 'Running in Parallel' }, + { id: 'faq-local-model', title: 'Using Local Models' }, + { id: 'faq-reset', title: 'Resetting a Project' }, + { id: 'faq-agent-types', title: 'Coding vs Testing Agents' }, + { id: 'faq-real-time', title: 'Monitoring in Real Time' }, + ], + keywords: ['faq', 'troubleshoot', 'help', 'problem', 'issue', 'fix', 'error', 'stuck', 'reset', 'crash'], + }, +] diff --git a/ui/src/components/docs/sections/AIAssistant.tsx b/ui/src/components/docs/sections/AIAssistant.tsx new file mode 100644 index 0000000..cceb297 --- /dev/null +++ b/ui/src/components/docs/sections/AIAssistant.tsx @@ -0,0 +1,75 @@ +/** + * AIAssistant Documentation Section + * + * Covers the project assistant: what it is, how to open it, + * its capabilities and limitations, and conversation history. + */ + +import { Badge } from '@/components/ui/badge' + +export function AIAssistant() { + return ( +
+ {/* What is the Assistant? */} +

+ What is the Assistant? +

+

+ The AI Assistant is a read-only project helper that can answer questions about your project, search + code, view progress, and help you understand what’s happening — without making any changes. +

+ + {/* Opening the Assistant */} +

+ Opening the Assistant +

+
    +
  • + Press A to toggle the assistant panel +
  • +
  • Or click the floating action button (chat bubble) in the bottom-right corner
  • +
  • The panel slides in from the right side
  • +
+ + {/* What It Can Do */} +

+ What It Can Do +

+
    +
  • Read and search your project’s source code
  • +
  • Answer questions about code architecture and implementation
  • +
  • View feature progress and status
  • +
  • Create new features based on your description
  • +
  • Explain what agents have done or are currently doing
  • +
  • Help debug issues by analyzing code and logs
  • +
+ + {/* What It Cannot Do */} +

+ What It Cannot Do +

+
    +
  • Modify files (read-only access)
  • +
  • Run bash commands
  • +
  • Mark features as passing/failing
  • +
  • Start or stop agents
  • +
  • Access external APIs or the internet
  • +
+
+ This is a deliberate security design — the assistant is a safe way to interact with your project + without risk of unintended changes. +
+ + {/* Conversation History */} +

+ Conversation History +

+
    +
  • Conversations are stored per-project in SQLite database
  • +
  • Multiple conversations supported — start new ones as needed
  • +
  • Switch between conversations using the conversation selector
  • +
  • History persists across browser sessions
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/AdvancedConfig.tsx b/ui/src/components/docs/sections/AdvancedConfig.tsx new file mode 100644 index 0000000..2ed1584 --- /dev/null +++ b/ui/src/components/docs/sections/AdvancedConfig.tsx @@ -0,0 +1,220 @@ +/** + * AdvancedConfig Documentation Section + * + * Covers Vertex AI setup, Ollama local models, environment variables, + * CLI arguments, webhook support, and the project registry. + */ + +import { Badge } from '@/components/ui/badge' + +/** Environment variable descriptor for the reference table. */ +interface EnvVar { + name: string + description: string +} + +const ENV_VARS: EnvVar[] = [ + { name: 'CLAUDE_CODE_USE_VERTEX', description: 'Enable Vertex AI (1)' }, + { name: 'CLOUD_ML_REGION', description: 'GCP region' }, + { name: 'ANTHROPIC_VERTEX_PROJECT_ID', description: 'GCP project ID' }, + { name: 'ANTHROPIC_BASE_URL', description: 'Custom API base URL (for Ollama)' }, + { name: 'ANTHROPIC_AUTH_TOKEN', description: 'API auth token' }, + { name: 'API_TIMEOUT_MS', description: 'API timeout in milliseconds' }, + { name: 'EXTRA_READ_PATHS', description: 'Comma-separated extra read directories' }, + { name: 'ANTHROPIC_DEFAULT_OPUS_MODEL', description: 'Override Opus model name' }, + { name: 'ANTHROPIC_DEFAULT_SONNET_MODEL', description: 'Override Sonnet model name' }, + { name: 'ANTHROPIC_DEFAULT_HAIKU_MODEL', description: 'Override Haiku model name' }, +] + +/** CLI argument descriptor for the reference table. */ +interface CliArg { + name: string + description: string +} + +const CLI_ARGS: CliArg[] = [ + { name: '--project-dir', description: 'Project directory path or registered name' }, + { name: '--yolo', description: 'Enable YOLO mode' }, + { name: '--parallel', description: 'Enable parallel mode' }, + { name: '--max-concurrency N', description: 'Max concurrent agents (1-5)' }, + { name: '--batch-size N', description: 'Features per coding agent (1-3)' }, + { name: '--batch-features 1,2,3', description: 'Specific feature IDs to implement' }, + { name: '--testing-batch-size N', description: 'Features per testing batch (1-5)' }, + { name: '--testing-batch-features 1,2,3', description: 'Specific testing feature IDs' }, +] + +export function AdvancedConfig() { + return ( +
+ {/* Vertex AI Setup */} +

+ Vertex AI Setup +

+

+ Run coding agents via Google Cloud Vertex AI: +

+
    +
  1. + Install and authenticate the gcloud CLI:{' '} + + gcloud auth application-default login + +
  2. +
  3. + Configure your{' '} + .env file: +
  4. +
+
+
{`CLAUDE_CODE_USE_VERTEX=1
+CLOUD_ML_REGION=us-east5
+ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
+ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4-5@20251101
+ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4-5@20250929
+ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-3-5-haiku@20241022`}
+
+
+ Use @{' '} + instead of -{' '} + in model names for Vertex AI. +
+ + {/* Ollama Local Models */} +

+ Ollama Local Models +

+

+ Run coding agents using local models via Ollama v0.14.0+: +

+
    +
  1. + Install Ollama from{' '} + + ollama.com + +
  2. +
  3. + Start Ollama:{' '} + ollama serve +
  4. +
  5. + Pull a coding model:{' '} + ollama pull qwen3-coder +
  6. +
  7. + Configure your{' '} + .env: +
  8. +
+
+
{`ANTHROPIC_BASE_URL=http://localhost:11434
+ANTHROPIC_AUTH_TOKEN=ollama
+API_TIMEOUT_MS=3000000
+ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder`}
+
+

+ Recommended models:{' '} + qwen3-coder{' '} + deepseek-coder-v2{' '} + codellama +

+

+ Limitations: Smaller context windows than Claude + (model-dependent), extended context beta disabled (not supported by Ollama), and performance + depends on local hardware (GPU recommended). +

+ + {/* Environment Variables */} +

+ Environment Variables +

+

+ Key environment variables for configuring AutoCoder: +

+ + + + + + + + + {ENV_VARS.map((v) => ( + + + + + ))} + +
+ Variable + + Description +
+ {v.name} + {v.description}
+ + {/* CLI Arguments */} +

+ CLI Arguments +

+

+ Command-line arguments for{' '} + + autonomous_agent_demo.py + + : +

+ + + + + + + + + {CLI_ARGS.map((arg) => ( + + + + + ))} + +
+ Argument + + Description +
+ {arg.name} + {arg.description}
+ + {/* Webhook Support */} +

+ Webhook Support +

+
    +
  • AutoCoder can send webhook notifications on feature completion
  • +
  • Compatible with N8N and similar automation tools
  • +
  • Configure the webhook URL in project settings
  • +
  • + Payload includes: feature name, status, and project info +
  • +
+ + {/* Project Registry */} +

+ Project Registry +

+
    +
  • + All projects are registered in{' '} + ~/.autocoder/registry.db{' '} + (SQLite) +
  • +
  • Maps project names to filesystem paths
  • +
  • Uses POSIX path format (forward slashes) for cross-platform compatibility
  • +
  • SQLAlchemy ORM with SQLite's built-in transaction handling
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/AgentSystem.tsx b/ui/src/components/docs/sections/AgentSystem.tsx new file mode 100644 index 0000000..0edc2d3 --- /dev/null +++ b/ui/src/components/docs/sections/AgentSystem.tsx @@ -0,0 +1,280 @@ +/** + * AgentSystem Documentation Section + * + * Covers the orchestrator (Maestro), coding agents, testing agents, + * agent lifecycle, concurrency control, mission control dashboard, + * agent mascots and states, viewing logs, and process limits. + */ + +import { Badge } from '@/components/ui/badge' + +export function AgentSystem() { + return ( +
+ {/* Maestro: The Orchestrator */} +

+ Maestro: The Orchestrator +

+

+ Maestro is the central orchestrator that coordinates all agents. It acts as the conductor, + ensuring features are implemented efficiently and in the correct order. +

+
    +
  • Manages the full lifecycle of coding and testing agents
  • +
  • Schedules which features to work on based on dependencies and priority
  • +
  • Monitors agent health and restarts crashed agents automatically
  • +
  • Reports status to the UI in real time via WebSocket
  • +
+ + {/* Coding Agents */} +

+ Coding Agents +

+
    +
  • Implement features one at a time, or in batches of 1–3
  • +
  • + Claim features atomically via the{' '} + + feature_claim_and_get + {' '} + MCP tool — no two agents work on the same feature +
  • +
  • Run in isolated environments with their own browser context
  • +
  • + Use the Claude Code SDK with project-specific tools and{' '} + CLAUDE.md +
  • +
+ + {/* Testing Agents */} +

+ Testing Agents +

+
    +
  • Run regression tests after features are implemented
  • +
  • Verify that new code does not break existing features
  • +
  • Configurable ratio: 0–3 testing agents per coding agent
  • +
  • Can batch-test multiple features per session (1–5)
  • +
+ + {/* Agent Lifecycle */} +

+ Agent Lifecycle +

+

+ Agents are controlled through the UI or CLI. The lifecycle states are: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Action + + Behavior +
Start + Click the Play button or run the CLI command +
Stop + Gracefully terminates all running agents +
Pause + Temporarily halts work (agents finish their current task first) +
Resume + Continues from where the agents were paused +
+

+ Agents auto-continue between sessions with a 3-second delay, so they keep working until + all features are complete or they are explicitly stopped. +

+ + {/* Concurrency Control */} +

+ Concurrency Control +

+
    +
  • + A slider in the agent control bar sets the number of concurrent coding agents + (1–5) +
  • +
  • + More agents means faster progress, but also higher API usage +
  • +
  • Each agent runs as an independent subprocess
  • +
  • + Feature claiming is atomic — no two agents will ever work on the same feature + simultaneously +
  • +
+ + {/* Agent Mission Control */} +

+ Agent Mission Control +

+

+ The Mission Control dashboard provides a real-time overview of all active agents: +

+
    +
  • Active agent cards with mascot icons and current status
  • +
  • The feature each agent is currently working on
  • +
  • Agent state indicators (thinking, working, testing, etc.)
  • +
  • Orchestrator status and a recent activity feed
  • +
+ + {/* Agent Mascots & States */} +

+ Agent Mascots & States +

+

+ Each agent is assigned a unique mascot for easy identification:{' '} + Spark,{' '} + Fizz,{' '} + Octo,{' '} + Hoot,{' '} + Buzz, and more. Agent states include: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ State + + Animation + + Description +
+ Thinking + BouncingAgent is planning its approach
+ Working + ShakeActively writing code
+ Testing + RotatingRunning tests
+ Success + CelebrationFeature completed
+ Error + Red shakeEncountered an issue
+ Struggling + Concerned expressionMultiple consecutive failures
+ + {/* Viewing Agent Logs */} +

+ Viewing Agent Logs +

+
    +
  • Click any agent card in Mission Control to see its log output
  • +
  • Logs are color-coded by level (info, warning, error)
  • +
  • Output streams in real time via WebSocket
  • +
  • Each agent's logs are isolated and filterable
  • +
+ + {/* Process Limits */} +

+ Process Limits +

+

+ The orchestrator enforces strict bounds on concurrent processes to prevent resource + exhaustion: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Limit + + Value +
+ + MAX_PARALLEL_AGENTS + + 5 (maximum concurrent coding agents)
+ + MAX_TOTAL_AGENTS + + + 10 (hard limit on coding + testing combined) +
Testing agents + Capped at the same count as coding agents +
Total Python processes + Never exceeds 11 (1 orchestrator + 5 coding + 5 testing) +
+
+ ) +} diff --git a/ui/src/components/docs/sections/AppSpecSetup.tsx b/ui/src/components/docs/sections/AppSpecSetup.tsx new file mode 100644 index 0000000..a4084a9 --- /dev/null +++ b/ui/src/components/docs/sections/AppSpecSetup.tsx @@ -0,0 +1,130 @@ +/** + * AppSpecSetup Documentation Section + * + * Explains what an app spec is, how to create one interactively + * or manually, the initializer agent, and starting after spec creation. + */ + +export function AppSpecSetup() { + return ( +
+ {/* What is an App Spec? */} +

+ What is an App Spec? +

+

+ The app spec is an XML document that describes the application to be built. It lives at{' '} + + .autocoder/prompts/app_spec.txt + {' '} + and tells the initializer agent what features to create. The spec defines your app's name, + description, tech stack, and the features that should be implemented. +

+
+
{`
+  My App
+  A task management app
+  
+    User authentication with login/signup
+    Task CRUD with categories
+  
+`}
+
+ + {/* Creating a Spec with Claude */} +

+ Creating a Spec with Claude +

+
    +
  • + In the UI, select your project and click{' '} + Create Spec +
  • +
  • + An interactive chat with Claude helps you define your app — it asks about + your app's purpose, features, and tech stack +
  • +
  • The spec is generated and saved automatically
  • +
  • After creation, the initializer agent can be started immediately
  • +
+ + {/* Writing a Spec Manually */} +

+ Writing a Spec Manually +

+
    +
  • + Create{' '} + + .autocoder/prompts/app_spec.txt + {' '} + in your project directory +
  • +
  • + Use XML format with app name, description, tech stack, and a feature list +
  • +
  • + Be specific about each feature — the initializer creates test cases from these + descriptions +
  • +
  • + Include technical constraints where needed (e.g.,{' '} + + "use PostgreSQL" + + ,{' '} + + "React with TypeScript" + + ) +
  • +
+ + {/* The Initializer Agent */} +

+ The Initializer Agent +

+

+ The initializer agent is the first agent to run on a new project. It bridges the gap between + your spec and the coding agents that implement features. +

+
    +
  • Runs automatically on first agent start when no features exist in the database
  • +
  • Reads the app spec and creates features with descriptions, steps, and priorities
  • +
  • + Sets up feature dependencies (e.g., "auth must be done before user profile") +
  • +
  • + Creates the feature database at{' '} + + .autocoder/features.db + +
  • +
+ + {/* Starting After Spec Creation */} +

+ Starting After Spec Creation +

+

+ Once your spec is ready, you can kick off the agents: +

+
    +
  • + From the UI, click the Play button to start + the agent +
  • +
  • + Or run from the CLI: +
  • +
+
+
python autonomous_agent_demo.py --project-dir your-project
+
+

+ The initializer runs first to create features, then coding agents take over to implement + them. Progress is shown in real time on the Kanban board. +

+
+ ) +} diff --git a/ui/src/components/docs/sections/AppearanceThemes.tsx b/ui/src/components/docs/sections/AppearanceThemes.tsx new file mode 100644 index 0000000..d713983 --- /dev/null +++ b/ui/src/components/docs/sections/AppearanceThemes.tsx @@ -0,0 +1,185 @@ +/** + * AppearanceThemes Documentation Section + * + * Covers built-in themes with color previews, dark/light mode toggling, + * the theme selector dropdown, and global keyboard shortcuts. + */ + +import { Badge } from '@/components/ui/badge' + +/** Theme descriptor used to render the preview rows. */ +interface ThemePreview { + name: string + description: string + colors: { label: string; hex: string }[] +} + +const THEMES: ThemePreview[] = [ + { + name: 'Twitter', + description: 'Clean, modern blue design. Primary: blue, Background: white/dark gray.', + colors: [ + { label: 'Background', hex: '#ffffff' }, + { label: 'Primary', hex: '#4a9eff' }, + { label: 'Accent', hex: '#e8f4ff' }, + ], + }, + { + name: 'Claude', + description: "Warm beige/cream tones with orange accents. Inspired by Anthropic's Claude brand.", + colors: [ + { label: 'Background', hex: '#faf6f0' }, + { label: 'Primary', hex: '#c75b2a' }, + { label: 'Accent', hex: '#f5ede4' }, + ], + }, + { + name: 'Neo Brutalism', + description: 'Bold colors, hard shadows, no border radius. High contrast, expressive design.', + colors: [ + { label: 'Background', hex: '#ffffff' }, + { label: 'Primary', hex: '#ff4d00' }, + { label: 'Accent', hex: '#ffeb00' }, + ], + }, + { + name: 'Retro Arcade', + description: 'Vibrant pink and teal with pixel-art inspired styling.', + colors: [ + { label: 'Background', hex: '#f0e6d3' }, + { label: 'Primary', hex: '#e8457c' }, + { label: 'Accent', hex: '#4eb8a5' }, + ], + }, + { + name: 'Aurora', + description: 'Deep violet and luminous teal, inspired by the northern lights.', + colors: [ + { label: 'Background', hex: '#faf8ff' }, + { label: 'Primary', hex: '#8b5cf6' }, + { label: 'Accent', hex: '#2dd4bf' }, + ], + }, + { + name: 'Business', + description: 'Professional deep navy and gray monochrome palette for corporate use.', + colors: [ + { label: 'Background', hex: '#eaecef' }, + { label: 'Primary', hex: '#000e4e' }, + { label: 'Accent', hex: '#6b7280' }, + ], + }, +] + +/** Keyboard shortcut descriptor for the shortcuts table. */ +interface Shortcut { + key: string + action: string +} + +const SHORTCUTS: Shortcut[] = [ + { key: '?', action: 'Show keyboard shortcuts help' }, + { key: 'D', action: 'Toggle debug panel' }, + { key: 'T', action: 'Toggle terminal' }, + { key: 'G', action: 'Toggle Kanban/Graph view' }, + { key: 'N', action: 'Add new feature' }, + { key: 'E', action: 'Expand project with AI' }, + { key: 'A', action: 'Toggle AI assistant' }, + { key: ',', action: 'Open settings' }, + { key: 'R', action: 'Reset project' }, + { key: 'Escape', action: 'Close current modal' }, +] + +export function AppearanceThemes() { + return ( +
+ {/* Themes Overview */} +

+ Themes Overview +

+

+ AutoCoder comes with 6 built-in themes. Each theme provides a complete visual identity including + colors, accents, and dark mode variants. +

+
+ {THEMES.map((theme) => ( +
+ {/* Color swatches */} +
+ {theme.colors.map((color) => ( +
+ ))} +
+ {/* Description */} +
+ {theme.name} + {theme.name === 'Twitter' && ( + <> + {' '} + Default + + )} + — {theme.description} +
+
+ ))} +
+ + {/* Dark & Light Mode */} +

+ Dark & Light Mode +

+
    +
  • Toggle with the sun/moon icon in the header
  • +
  • All 6 themes have dedicated dark mode variants
  • +
  • + Preference is saved in browser{' '} + localStorage +
  • +
  • Dark mode affects all UI elements including the docs page
  • +
+ + {/* Theme Selector */} +

+ Theme Selector +

+
    +
  • Hover over the palette icon in the header to open the theme dropdown
  • +
  • Preview themes by hovering over each option (live preview)
  • +
  • Click to select — the change is applied instantly
  • +
  • Theme preference persists across sessions
  • +
+ + {/* Keyboard Shortcuts */} +

+ Keyboard Shortcuts +

+

+ Press ? anywhere in the UI to see the shortcuts help overlay. +

+ + + + + + + + + {SHORTCUTS.map((shortcut) => ( + + + + + ))} + +
KeyAction
+ {shortcut.key} + {shortcut.action}
+
+ ) +} diff --git a/ui/src/components/docs/sections/DeveloperTools.tsx b/ui/src/components/docs/sections/DeveloperTools.tsx new file mode 100644 index 0000000..06a5999 --- /dev/null +++ b/ui/src/components/docs/sections/DeveloperTools.tsx @@ -0,0 +1,104 @@ +/** + * DeveloperTools Documentation Section + * + * Covers the debug panel, agent logs tab, dev server logs, + * terminal, dev server control, and per-agent logs. + */ + +import { Badge } from '@/components/ui/badge' + +export function DeveloperTools() { + return ( +
+ {/* Debug Panel */} +

+ Debug Panel +

+
    +
  • + Press D to toggle the debug panel at the bottom of the screen +
  • +
  • Resizable by dragging the top edge
  • +
  • + Three tabs: Agent Logs,{' '} + Dev Server Logs, and{' '} + Terminal +
  • +
  • Shows real-time output from agents and dev server
  • +
+ + {/* Agent Logs Tab */} +

+ Agent Logs Tab +

+
    +
  • + Color-coded log levels:{' '} + Error,{' '} + Warning,{' '} + Info,{' '} + Debug,{' '} + Success +
  • +
  • Timestamps on each log entry
  • +
  • Auto-scrolls to latest entry
  • +
  • Clear button to reset log view
  • +
+ + {/* Dev Server Logs Tab */} +

+ Dev Server Logs Tab +

+
    +
  • + Shows stdout/stderr from the project’s dev server (e.g.,{' '} + npm run dev) +
  • +
  • Useful for seeing compilation errors, hot reload status
  • +
  • Clear button available
  • +
+ + {/* Terminal */} +

+ Terminal +

+
    +
  • + Press T to open terminal (opens debug panel on the terminal tab) +
  • +
  • Full xterm.js terminal emulator with WebSocket backend
  • +
  • Multi-tab support: create multiple terminal sessions
  • +
  • Rename tabs by double-clicking the tab title
  • +
  • Each tab runs an independent PTY (pseudo-terminal) session
  • +
  • Supports standard terminal features: colors, cursor movement, history
  • +
+ + {/* Dev Server Control */} +

+ Dev Server Control +

+
    +
  • Start/stop button in the header bar
  • +
  • + Auto-detects project type (Next.js, Vite, CRA, etc.) and runs the appropriate dev command +
  • +
  • Shows the dev server URL when running
  • +
  • Automatic crash detection and restart option
  • +
  • Dev server output piped to the Dev Server Logs tab
  • +
+ + {/* Per-Agent Logs */} +

+ Per-Agent Logs +

+
    +
  • In Agent Mission Control, click any agent card to see its individual logs
  • +
  • + Logs include: what feature the agent is working on, code changes, test results +
  • +
  • Separate logs for coding agents and testing agents
  • +
  • Real-time streaming — see agent output as it happens
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/FAQ.tsx b/ui/src/components/docs/sections/FAQ.tsx new file mode 100644 index 0000000..82b4f66 --- /dev/null +++ b/ui/src/components/docs/sections/FAQ.tsx @@ -0,0 +1,157 @@ +/** + * FAQ Documentation Section + * + * Covers frequently asked questions about project setup, agent behavior, + * customization, troubleshooting, and real-time monitoring. + */ + +export function FAQ() { + return ( +
+ {/* Starting a New Project */} +

+ Starting a New Project +

+

+ How do I use AutoCoder on a new project? +

+

+ From the UI, select "Create New Project" in the project dropdown. Choose a folder and + name. Then create an app spec using the interactive chat or write one manually. Click Start to run + the initializer agent, which creates features from your spec. Coding agents then implement features + automatically. +

+ + {/* Adding to Existing Project */} +

+ Adding to Existing Project +

+

+ How do I add AutoCoder to an existing project? +

+

+ Register the project folder through the UI project selector using "Add Existing". + AutoCoder creates a{' '} + .autocoder/ directory + alongside your existing code. Write an app spec describing what to build (new features), and the + agent works within your existing codebase. +

+ + {/* Agent Crashes */} +

+ Agent Crashes +

+

+ What happens if an agent crashes? +

+

+ The orchestrator (Maestro) automatically detects crashed agents and can restart them. Features + claimed by a crashed agent are released back to the pending queue. Scheduled runs use exponential + backoff with up to 3 retries. Check the agent logs in the debug panel for crash details. +

+ + {/* Custom Bash Commands */} +

+ Custom Bash Commands +

+

+ How do I customize which bash commands the agent can use? +

+

+ Create{' '} + + .autocoder/allowed_commands.yaml + {' '} + in your project with a list of allowed commands. Supports exact names, wildcards (e.g.,{' '} + swift*), and local + scripts. See the Security section for full details on the command hierarchy. +

+ + {/* Blocked Features */} +

+ Blocked Features +

+

+ Why are my features stuck in "blocked" status? +

+

+ Features with unmet dependencies show as blocked. Check the Dependency Graph view (press{' '} + G) to see which + features are waiting on others. A feature can only start when all its dependencies are marked as + "passing". Remove or reorder dependencies if needed. +

+ + {/* Running in Parallel */} +

+ Running in Parallel +

+

+ How do I run multiple agents in parallel? +

+

+ Use the concurrency slider in the agent control bar (1–5 agents) or pass{' '} + + --parallel --max-concurrency N + {' '} + on the CLI. Each agent claims features atomically, so there is no conflict. More agents means + faster progress but higher API cost. +

+ + {/* Using Local Models */} +

+ Using Local Models +

+

+ Can I use a local model instead of the Claude API? +

+

+ Yes, via Ollama v0.14.0+. Install Ollama, pull a coding model (e.g.,{' '} + qwen3-coder), and + configure your{' '} + .env to point to + localhost. See the Advanced Configuration section for full setup instructions. +

+ + {/* Resetting a Project */} +

+ Resetting a Project +

+

+ How do I reset a project and start over? +

+

+ Press R (when agents + are stopped) to open the Reset modal. Choose between: "Reset Features" (clears the + feature database, keeps the spec) or "Full Reset" (removes the spec too, starts fresh). + After a full reset, you will be prompted to create a new spec. +

+ + {/* Coding vs Testing Agents */} +

+ Coding vs Testing Agents +

+

+ What's the difference between coding and testing agents? +

+

+ Coding agents implement features — they write code, create files, and run feature-specific + tests. Testing agents run regression tests across completed features to ensure new code does not + break existing functionality. Configure the testing agent ratio (0–3) in settings. +

+ + {/* Monitoring in Real Time */} +

+ Monitoring in Real Time +

+

+ How do I view what an agent is doing in real time? +

+

+ Multiple ways: (1) Watch the Kanban board for feature status changes. (2) Open the debug panel + (D key) for live + agent logs. (3) Click agent cards in Mission Control for per-agent logs. (4) The progress bar + updates in real time via WebSocket. +

+
+ ) +} diff --git a/ui/src/components/docs/sections/FeaturesKanban.tsx b/ui/src/components/docs/sections/FeaturesKanban.tsx new file mode 100644 index 0000000..4076af8 --- /dev/null +++ b/ui/src/components/docs/sections/FeaturesKanban.tsx @@ -0,0 +1,182 @@ +/** + * FeaturesKanban Documentation Section + * + * Covers the Kanban board, feature cards, dependency graph view, + * adding/editing features, dependencies, expanding with AI, + * and priority ordering. + */ + +import { Badge } from '@/components/ui/badge' + +export function FeaturesKanban() { + return ( +
+ {/* Kanban Board Overview */} +

+ Kanban Board Overview +

+

+ The main view organizes features into three columns representing their current status: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Column + + Color + + Meaning +
Pending + Yellow + Waiting to be picked up
In Progress + Cyan + An agent is actively working on it
Done + Green + Implemented and passing
+

+ Each feature appears as a card showing its name, priority, and category. The board updates + in real time as agents work. +

+ + {/* Feature Cards */} +

+ Feature Cards +

+
    +
  • + Each card displays a priority badge (P1 through{' '} + P5), a category tag, and the feature name +
  • +
  • Status icons indicate the current state of the feature
  • +
  • Click a card to open the detail modal with the full description and test steps
  • +
  • + Cards in the "In Progress" column show which agent is currently working on them +
  • +
+ + {/* Dependency Graph View */} +

+ Dependency Graph View +

+

+ An alternative to the Kanban board that visualizes feature relationships as a directed graph. +

+
    +
  • + Press G to toggle between Kanban and Graph view +
  • +
  • Uses the dagre layout engine for automatic node positioning
  • +
  • + Nodes are colored by status — pending, in-progress, and done each have + distinct colors +
  • +
  • Arrows show dependency relationships between features
  • +
  • Click any node to open the feature detail modal
  • +
  • Supports both horizontal and vertical layout orientations
  • +
+ + {/* Adding Features */} +

+ Adding Features +

+
    +
  • + Press N to open the Add Feature form +
  • +
  • Fill in: name, description, category, and priority
  • +
  • Optionally define steps (test criteria the agent must pass to complete the feature)
  • +
  • New features are added to the Pending column immediately
  • +
+ + {/* Editing & Deleting Features */} +

+ Editing & Deleting Features +

+
    +
  • Click a feature card to open the detail modal
  • +
  • + Click Edit to modify the name, description, + category, priority, or steps +
  • +
  • + Delete removes the feature permanently +
  • +
  • + Skip moves a feature to the end of the queue + without deleting it +
  • +
+ + {/* Feature Dependencies */} +

+ Feature Dependencies +

+

+ Features can declare dependencies on other features, ensuring they are implemented in the + correct order. +

+
    +
  • Set dependencies in the feature edit modal
  • +
  • + Cycle detection prevents circular dependencies (uses Kahn's algorithm combined + with DFS) +
  • +
  • + Blocked features display a lock icon and cannot be claimed by agents until their + dependencies are met +
  • +
  • The Dependency Graph view makes these relationships easy to visualize
  • +
+ + {/* Expanding Project with AI */} +

+ Expanding Project with AI +

+
    +
  • + Press E to open the Expand Project modal +
  • +
  • Chat with Claude to describe the new features you want to add
  • +
  • Supports image attachments for UI mockups or design references
  • +
  • Claude creates properly structured features with appropriate dependencies
  • +
  • New features appear on the board immediately after creation
  • +
+ + {/* Priority & Ordering */} +

+ Priority & Ordering +

+
    +
  • + Features are ordered by priority: P1 is the highest + and P5 is the lowest +
  • +
  • Within the same priority level, features are ordered by creation time
  • +
  • Agents always pick up the highest-priority ready feature first
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/GettingStarted.tsx b/ui/src/components/docs/sections/GettingStarted.tsx new file mode 100644 index 0000000..1ee560b --- /dev/null +++ b/ui/src/components/docs/sections/GettingStarted.tsx @@ -0,0 +1,134 @@ +/** + * GettingStarted Documentation Section + * + * Covers what AutoCoder is, quick start commands, + * creating and adding projects, and system requirements. + */ + +import { Badge } from '@/components/ui/badge' + +export function GettingStarted() { + return ( +
+ {/* What is AutoCoder? */} +

+ What is AutoCoder? +

+

+ AutoCoder is an autonomous coding agent system that builds complete applications over multiple + sessions using a two-agent pattern: +

+
    +
  1. + Initializer Agent — reads your app spec + and creates features in a SQLite database +
  2. +
  3. + Coding Agent — implements features one by + one, marking each as passing when complete +
  4. +
+

+ It comes with a React-based UI for monitoring progress, managing features, and controlling agents + in real time. +

+ + {/* Quick Start */} +

+ Quick Start +

+

+ Launch AutoCoder with a single command. The CLI menu lets you create or select a project, + while the Web UI provides a full dashboard experience. +

+
+
{`# Windows
+start.bat          # CLI menu
+start_ui.bat       # Web UI
+
+# macOS/Linux
+./start.sh         # CLI menu
+./start_ui.sh      # Web UI`}
+
+ + {/* Creating a New Project */} +

+ Creating a New Project +

+
    +
  • + From the UI, click the project dropdown and select{' '} + Create New Project +
  • +
  • Enter a name and select or browse to a folder for the project
  • +
  • + Create an app spec interactively with Claude, or write one manually in XML format +
  • +
  • + The initializer agent reads your spec and creates features automatically +
  • +
+ + {/* Adding to an Existing Project */} +

+ Adding to an Existing Project +

+
    +
  • Register the project folder via the UI project selector
  • +
  • + AutoCoder creates a{' '} + .autocoder/{' '} + directory inside your project +
  • +
  • + Existing code is preserved — AutoCoder adds its configuration alongside it +
  • +
  • Write or generate an app spec describing what to build
  • +
+ + {/* System Requirements */} +

+ System Requirements +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Requirement + + Details +
Python + 3.11+ +
Node.js + 20+{' '} + (for UI development) +
Claude Code CLI + Required for running agents +
Operating System + Windows, macOS, or Linux +
+
+ ) +} diff --git a/ui/src/components/docs/sections/ProjectStructure.tsx b/ui/src/components/docs/sections/ProjectStructure.tsx new file mode 100644 index 0000000..0b6aa6b --- /dev/null +++ b/ui/src/components/docs/sections/ProjectStructure.tsx @@ -0,0 +1,162 @@ +/** + * ProjectStructure Documentation Section + * + * Covers the .autocoder/ directory layout, features database, + * prompts directory, allowed commands, CLAUDE.md convention, + * legacy migration, and Claude inheritance. + */ + +export function ProjectStructure() { + return ( +
+ {/* .autocoder/ Directory Layout */} +

+ .autocoder/ Directory Layout +

+

+ Every AutoCoder project stores its configuration and runtime files in a{' '} + .autocoder/{' '} + directory at the project root. +

+
+
{`your-project/
+\u251C\u2500\u2500 .autocoder/
+\u2502   \u251C\u2500\u2500 features.db              # SQLite feature database
+\u2502   \u251C\u2500\u2500 .agent.lock              # Lock file (prevents multiple instances)
+\u2502   \u251C\u2500\u2500 .gitignore               # Ignores runtime files
+\u2502   \u251C\u2500\u2500 allowed_commands.yaml    # Per-project bash command allowlist
+\u2502   \u2514\u2500\u2500 prompts/
+\u2502       \u251C\u2500\u2500 app_spec.txt         # Application specification (XML)
+\u2502       \u251C\u2500\u2500 initializer_prompt.md # First session prompt
+\u2502       \u2514\u2500\u2500 coding_prompt.md     # Continuation session prompt
+\u251C\u2500\u2500 CLAUDE.md                    # Claude Code convention file
+\u2514\u2500\u2500 app_spec.txt                 # Root copy for template compatibility`}
+
+ + {/* Features Database */} +

+ Features Database +

+
    +
  • + SQLite database managed by SQLAlchemy, stored at{' '} + + .autocoder/features.db + +
  • +
  • + Each feature record includes: id, priority, category, name, description, steps, status + (pending,{' '} + in_progress,{' '} + passing,{' '} + failing), + and dependencies +
  • +
  • Agents interact with features through MCP server tools, not direct database access
  • +
  • Viewable in the UI via the Kanban board or the Dependency Graph view
  • +
+ + {/* Prompts Directory */} +

+ Prompts Directory +

+

+ Prompts control how agents behave during each session: +

+
    +
  • + app_spec.txt{' '} + — your application specification in XML format +
  • +
  • + + initializer_prompt.md + {' '} + — prompt for the initializer agent (creates features from the spec) +
  • +
  • + + coding_prompt.md + {' '} + — prompt for coding agents (implements features) +
  • +
+

+ These can be customized per project. If not present, defaults from{' '} + + .claude/templates/ + {' '} + are used as a fallback. +

+ + {/* Allowed Commands Config */} +

+ Allowed Commands Config +

+

+ The optional{' '} + + .autocoder/allowed_commands.yaml + {' '} + file lets you grant project-specific bash commands to the agent. This is useful when your + project requires tools beyond the default allowlist (e.g., language-specific compilers or + custom build scripts). +

+

+ See the Security section for full details on + the command hierarchy and how project-level commands interact with global and organization + policies. +

+ + {/* CLAUDE.md Convention */} +

+ CLAUDE.md Convention +

+
    +
  • + CLAUDE.md{' '} + lives at the project root, as required by the Claude Code SDK +
  • +
  • + Contains project-specific instructions that the agent follows during every coding session +
  • +
  • + Automatically inherited by all agents working on the project — no additional + configuration needed +
  • +
+ + {/* Legacy Layout Migration */} +

+ Legacy Layout Migration +

+

+ Older projects stored configuration files directly at the project root (e.g.,{' '} + features.db,{' '} + prompts/). +

+
    +
  • + On the next agent start, these files are automatically migrated into{' '} + .autocoder/ +
  • +
  • Dual-path resolution ensures both old and new layouts work transparently
  • +
  • No manual migration is needed — it happens seamlessly
  • +
+ + {/* Claude Inheritance */} +

+ Claude Inheritance +

+

+ Agents inherit all MCP servers, tools, skills, custom commands, and{' '} + CLAUDE.md{' '} + from the target project folder. +

+
+ If your project has its own MCP servers or Claude commands, the coding agent can use them. + The agent essentially runs as if Claude Code was opened in your project directory. +
+
+ ) +} diff --git a/ui/src/components/docs/sections/Scheduling.tsx b/ui/src/components/docs/sections/Scheduling.tsx new file mode 100644 index 0000000..913bb78 --- /dev/null +++ b/ui/src/components/docs/sections/Scheduling.tsx @@ -0,0 +1,102 @@ +/** + * Scheduling Documentation Section + * + * Covers schedule creation, per-schedule settings, + * overrides, and crash recovery with exponential backoff. + */ + +import { Badge } from '@/components/ui/badge' + +export function Scheduling() { + return ( +
+ {/* What Scheduling Does */} +

+ What Scheduling Does +

+

+ Scheduling automates agent runs at specific times. Set up a schedule and AutoCoder will automatically + start agents on your project — useful for overnight builds, periodic maintenance, or continuous + development. +

+ + {/* Creating a Schedule */} +

+ Creating a Schedule +

+
    +
  • Click the clock icon in the header to open the Schedule modal
  • +
  • Set: start time, duration (how long agents run), days of the week
  • +
  • Optionally configure: YOLO mode, concurrency, model selection
  • +
  • Schedule is saved and starts at the next matching time
  • +
+ + {/* Schedule Settings */} +

+ Schedule Settings +

+

+ Each schedule can override global settings: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
SettingDetails
YOLO modeOn/off per schedule
Concurrency + 1–5 agents +
Model tierOpus / Sonnet / Haiku
DurationHow long the session runs before auto-stopping
+
+ All schedule times are in UTC timezone. +
+ + {/* Schedule Overrides */} +

+ Schedule Overrides +

+
    +
  • Manually skip a scheduled run (one-time override)
  • +
  • Pause a schedule temporarily (resumes on next period)
  • +
  • + View upcoming runs with{' '} + Running until /{' '} + Next run indicators +
  • +
  • Override without deleting the schedule
  • +
+ + {/* Crash Recovery */} +

+ Crash Recovery +

+
    +
  • If a scheduled agent crashes, it uses exponential backoff for retries
  • +
  • + Maximum 3 retry attempts per scheduled run +
  • +
  • Backoff prevents rapid restart loops
  • +
  • Failed runs are logged for troubleshooting
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/Security.tsx b/ui/src/components/docs/sections/Security.tsx new file mode 100644 index 0000000..2e4e09a --- /dev/null +++ b/ui/src/components/docs/sections/Security.tsx @@ -0,0 +1,218 @@ +/** + * Security Documentation Section + * + * Covers the defense-in-depth security model: command validation layers, + * the hierarchical allowlist/blocklist system, per-project and org-level + * configuration, extra read paths, and filesystem sandboxing. + */ + +import { Badge } from '@/components/ui/badge' + +export function Security() { + return ( +
+ {/* Command Validation Overview */} +

+ Command Validation Overview +

+

+ AutoCoder uses a defense-in-depth approach for security. All three layers must pass before any + command is executed: +

+
    +
  1. + OS-level sandbox — bash commands run inside + a restricted sandbox environment +
  2. +
  3. + Filesystem restriction — agents can only + access the project directory (plus configured extra read paths) +
  4. +
  5. + Hierarchical allowlist — every bash command + is validated against a multi-level allowlist system +
  6. +
+ + {/* Command Hierarchy */} +

+ Command Hierarchy +

+

+ Commands are evaluated against a 5-level hierarchy, from highest to lowest priority: +

+
    +
  1. + Hardcoded Blocklist{' '} + security.py{' '} + — NEVER allowed, cannot be overridden +
  2. +
  3. + Org Blocklist{' '} + ~/.autocoder/config.yaml{' '} + — org-wide blocks, cannot be project-overridden +
  4. +
  5. + Org Allowlist{' '} + ~/.autocoder/config.yaml{' '} + — available to all projects +
  6. +
  7. + Global Allowlist{' '} + security.py{' '} + — default commands (npm, git, curl, etc.) +
  8. +
  9. + Project Allowlist{' '} + + .autocoder/allowed_commands.yaml + {' '} + — project-specific additions +
  10. +
+
+ Higher priority levels always win. A command blocked at level 1 or 2 can never be allowed by + lower levels. +
+ + {/* Hardcoded Blocklist */} +

+ Hardcoded Blocklist +

+

+ The following commands can never be allowed, regardless + of any configuration. They are hardcoded in{' '} + security.py and + cannot be overridden: +

+
+ {['dd', 'sudo', 'su', 'shutdown', 'reboot', 'poweroff', 'mkfs', 'fdisk', 'mount', 'umount', 'systemctl'].map( + (cmd) => ( + + {cmd} + + ), + )} +
+ + {/* Global Allowlist */} +

+ Global Allowlist +

+

+ Default commands available to all projects out of the box. These are the standard development + commands needed for most projects: +

+
+ {['npm', 'npx', 'node', 'git', 'curl', 'python', 'pip', 'cat', 'ls', 'mkdir', 'cp', 'mv', 'rm', 'grep', 'find'].map( + (cmd) => ( + + {cmd} + + ), + )} +
+ + {/* Per-Project Allowed Commands */} +

+ Per-Project Allowed Commands +

+

+ Each project can define additional allowed commands in{' '} + + .autocoder/allowed_commands.yaml + + : +

+
+
{`# .autocoder/allowed_commands.yaml
+version: 1
+commands:
+  # Exact command name
+  - name: swift
+    description: Swift compiler
+
+  # Wildcard - matches swiftc, swiftlint, swiftformat
+  - name: swift*
+    description: All Swift tools (wildcard)
+
+  # Local project scripts
+  - name: ./scripts/build.sh
+    description: Project build script`}
+
+

+ Pattern matching: exact match ( + swift), wildcard ( + swift* matches swiftc, + swiftlint, etc.), and scripts ( + ./scripts/build.sh). + Limit: 100 commands per project. +

+ + {/* Organization Configuration */} +

+ Organization Configuration +

+

+ System administrators can set org-wide policies in{' '} + ~/.autocoder/config.yaml: +

+
+
{`# ~/.autocoder/config.yaml
+version: 1
+
+# Commands available to ALL projects
+allowed_commands:
+  - name: jq
+    description: JSON processor
+
+# Commands blocked across ALL projects (cannot be overridden)
+blocked_commands:
+  - aws        # Prevent accidental cloud operations
+  - kubectl    # Block production deployments`}
+
+

+ Org-level blocked commands cannot be overridden by any project configuration. +

+ + {/* Extra Read Paths */} +

+ Extra Read Paths +

+

+ Allow agents to read files from directories outside the project folder via the{' '} + EXTRA_READ_PATHS{' '} + environment variable: +

+
+
EXTRA_READ_PATHS=/path/to/docs,/path/to/shared-libs
+
+
    +
  • Must be absolute paths and must exist as directories
  • +
  • Only read operations allowed (Read, Glob, Grep — no Write/Edit)
  • +
  • + Sensitive directories are always blocked:{' '} + .ssh,{' '} + .aws,{' '} + .gnupg,{' '} + .docker,{' '} + .kube, etc. +
  • +
+ + {/* Filesystem Sandboxing */} +

+ Filesystem Sandboxing +

+
    +
  • Agents can only write to the project directory
  • +
  • Read access is limited to the project directory plus configured extra read paths
  • +
  • + Path traversal attacks are prevented via canonicalization ( + Path.resolve()) +
  • +
  • File operations are validated before execution
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/SettingsConfig.tsx b/ui/src/components/docs/sections/SettingsConfig.tsx new file mode 100644 index 0000000..6045c5a --- /dev/null +++ b/ui/src/components/docs/sections/SettingsConfig.tsx @@ -0,0 +1,188 @@ +/** + * SettingsConfig Documentation Section + * + * Covers global settings: opening the modal, YOLO mode, headless browser, + * model selection, regression agents, batch size, concurrency, and persistence. + */ + +import { Badge } from '@/components/ui/badge' + +export function SettingsConfig() { + return ( +
+ {/* Opening Settings */} +

+ Opening Settings +

+

+ Press the , (comma) key or click the gear icon in the header bar to + open the Settings modal. Settings are global and apply to all projects. +

+ + {/* YOLO Mode */} +

+ YOLO Mode +

+

+ YOLO mode is for rapid prototyping — it skips testing for faster iteration: +

+
    +
  • + What’s skipped: Regression testing, Playwright MCP + server (browser automation disabled) +
  • +
  • + What still runs: Lint and type-check (to verify code + compiles), Feature MCP server for tracking +
  • +
  • + Toggle via the lightning bolt button in the UI or the{' '} + --yolo CLI flag +
  • +
  • + When to use: Early prototyping when you want to scaffold + features quickly without verification overhead +
  • +
  • Switch back to standard mode for production-quality development
  • +
+ + {/* Headless Browser */} +

+ Headless Browser +

+
    +
  • When enabled, Playwright runs without a visible browser window
  • +
  • Saves CPU/GPU resources on machines running multiple agents
  • +
  • Tests still run fully — just no visible browser UI
  • +
  • Toggle in settings or via the UI button
  • +
+ + {/* Model Selection */} +

+ Model Selection +

+

+ Choose which Claude model tier to use for your agents: +

+ + + + + + + + + + + + + + + + + + + + + +
Tier + Characteristics +
+ Opus + Most capable, highest quality
+ Sonnet + Balanced speed and quality
+ Haiku + Fastest, most economical
+
    +
  • Model can be set globally in settings
  • +
  • Per-schedule model override is also available
  • +
  • + When using Vertex AI, model names use{' '} + @ instead of{' '} + - (e.g.,{' '} + + claude-opus-4-5@20251101 + + ) +
  • +
+ + {/* Regression Agents */} +

+ Regression Agents +

+

+ Controls how many testing agents run alongside coding agents (0–3): +

+
    +
  • + 0: No regression testing (like YOLO but coding agents + still test their own feature) +
  • +
  • + 1: One testing agent runs in background verifying + completed features +
  • +
  • + 2–3: Multiple testing agents for thorough + verification +
  • +
  • Testing agents batch-test 1–5 features per session
  • +
+ + {/* Features per Agent / Batch Size */} +

+ Features per Agent (Batch Size) +

+

+ Controls how many features each coding agent implements per session (1–3): +

+
    +
  • + 1: One feature per session (most focused, lower risk of + conflicts) +
  • +
  • + 2–3: Multiple features per session (more efficient, + fewer session startups) +
  • +
  • + Set via settings UI or the{' '} + --batch-size CLI flag +
  • +
  • + Can also target specific features:{' '} + --batch-features 1,2,3 +
  • +
+ + {/* Concurrency */} +

+ Concurrency +

+
    +
  • Per-project default concurrency saved in project settings
  • +
  • Override at runtime with the concurrency slider in agent controls
  • +
  • + Range: 1–5 concurrent coding agents +
  • +
  • Higher concurrency = faster progress but more API cost
  • +
+ + {/* How Settings are Persisted */} +

+ How Settings are Persisted +

+
    +
  • + Global settings stored in SQLite registry at{' '} + ~/.autocoder/registry.db +
  • +
  • Per-project settings (like default concurrency) stored in the project registry entry
  • +
  • UI settings (theme, dark mode) stored in browser localStorage
  • +
  • Settings survive app restarts and are shared across UI sessions
  • +
+
+ ) +} diff --git a/ui/src/hooks/useHashRoute.ts b/ui/src/hooks/useHashRoute.ts new file mode 100644 index 0000000..1482199 --- /dev/null +++ b/ui/src/hooks/useHashRoute.ts @@ -0,0 +1,36 @@ +import { useState, useEffect, useCallback } from 'react' + +export type Route = 'app' | 'docs' + +interface HashRouteState { + route: Route + section: string | null + navigate: (hash: string) => void +} + +function parseHash(hash: string): { route: Route; section: string | null } { + const cleaned = hash.replace(/^#\/?/, '') + if (cleaned === 'docs' || cleaned.startsWith('docs/')) { + const section = cleaned.slice(5) || null // Remove 'docs/' prefix + return { route: 'docs', section } + } + return { route: 'app', section: null } +} + +export function useHashRoute(): HashRouteState { + const [state, setState] = useState(() => parseHash(window.location.hash)) + + useEffect(() => { + const handleHashChange = () => { + setState(parseHash(window.location.hash)) + } + window.addEventListener('hashchange', handleHashChange) + return () => window.removeEventListener('hashchange', handleHashChange) + }, []) + + const navigate = useCallback((hash: string) => { + window.location.hash = hash + }, []) + + return { ...state, navigate } +} diff --git a/ui/src/main.tsx b/ui/src/main.tsx index fa4dad9..b4d89a2 100644 --- a/ui/src/main.tsx +++ b/ui/src/main.tsx @@ -1,7 +1,9 @@ import { StrictMode } from 'react' import { createRoot } from 'react-dom/client' import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { useHashRoute } from './hooks/useHashRoute' import App from './App' +import { DocsPage } from './components/docs/DocsPage' import './styles/globals.css' // Note: Custom theme removed - using shadcn/ui theming instead @@ -14,10 +16,16 @@ const queryClient = new QueryClient({ }, }) +function Router() { + const { route } = useHashRoute() + if (route === 'docs') return + return +} + createRoot(document.getElementById('root')!).render( - + , ) diff --git a/ui/src/styles/globals.css b/ui/src/styles/globals.css index e927b54..035bffe 100644 --- a/ui/src/styles/globals.css +++ b/ui/src/styles/globals.css @@ -1134,6 +1134,143 @@ } } +/* ============================================================================ + Documentation Prose Typography + ============================================================================ */ + +.docs-prose { + line-height: 1.7; + color: var(--muted-foreground); +} + +.docs-prose h2 { + font-size: 1.5rem; + font-weight: 700; + color: var(--foreground); + margin-top: 3rem; + margin-bottom: 1rem; + padding-bottom: 0.5rem; + border-bottom: 2px solid var(--border); + scroll-margin-top: 5rem; +} + +.docs-prose h2:first-child { + margin-top: 0; +} + +.docs-prose h3 { + font-size: 1.15rem; + font-weight: 600; + color: var(--foreground); + margin-top: 2rem; + margin-bottom: 0.75rem; + scroll-margin-top: 5rem; +} + +.docs-prose p { + margin-bottom: 1rem; + max-width: 65ch; +} + +.docs-prose ul, +.docs-prose ol { + margin-bottom: 1rem; + padding-left: 1.5rem; +} + +.docs-prose ul { + list-style-type: disc; +} + +.docs-prose ol { + list-style-type: decimal; +} + +.docs-prose li { + margin-bottom: 0.375rem; +} + +.docs-prose li > ul, +.docs-prose li > ol { + margin-top: 0.375rem; + margin-bottom: 0; +} + +.docs-prose pre { + background: var(--muted); + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 1rem; + overflow-x: auto; + margin-bottom: 1rem; + font-family: var(--font-mono); + font-size: 0.8125rem; + line-height: 1.6; +} + +.docs-prose code:not(pre code) { + background: var(--muted); + padding: 0.125rem 0.375rem; + border-radius: 0.25rem; + font-family: var(--font-mono); + font-size: 0.8125rem; +} + +.docs-prose table { + width: 100%; + border-collapse: collapse; + margin-bottom: 1rem; + font-size: 0.875rem; +} + +.docs-prose th { + background: var(--muted); + font-weight: 600; + color: var(--foreground); + text-align: left; + padding: 0.5rem 0.75rem; + border: 1px solid var(--border); +} + +.docs-prose td { + padding: 0.5rem 0.75rem; + border: 1px solid var(--border); +} + +.docs-prose tr:nth-child(even) td { + background: var(--muted); + opacity: 0.5; +} + +.docs-prose blockquote { + border-left: 4px solid var(--primary); + padding-left: 1rem; + margin-bottom: 1rem; + font-style: italic; + color: var(--muted-foreground); +} + +.docs-prose a { + color: var(--primary); + text-decoration: underline; + text-underline-offset: 2px; +} + +.docs-prose a:hover { + opacity: 0.8; +} + +.docs-prose strong { + color: var(--foreground); + font-weight: 600; +} + +.docs-prose hr { + border: none; + border-top: 1px solid var(--border); + margin: 2rem 0; +} + /* ============================================================================ Scrollbar Styling ============================================================================ */