mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-01-30 06:12:06 +00:00
Introduce a new testing agent architecture that runs regression tests independently from coding agents, improving quality assurance in parallel mode. Key changes: Testing Agent System: - Add testing_prompt.template.md for dedicated testing agent role - Add feature_mark_failing MCP tool for regression detection - Add --agent-type flag to select initializer/coding/testing mode - Remove regression testing from coding prompt (now handled by testing agents) Parallel Orchestrator Enhancements: - Add testing agent spawning with configurable ratio (--testing-agent-ratio) - Add comprehensive debug logging system (DebugLog class) - Improve database session management to prevent stale reads - Add engine.dispose() calls to refresh connections after subprocess commits - Fix f-string linting issues (remove unnecessary f-prefixes) UI Improvements: - Add testing agent mascot (Chip) to AgentAvatar - Enhance AgentCard to display testing agent status - Add testing agent ratio slider in SettingsModal - Update WebSocket handling for testing agent updates - Improve ActivityFeed to show testing agent activity API & Server Updates: - Add testing_agent_ratio to settings schema and endpoints - Update process manager to support testing agent type - Enhance WebSocket messages for agent_update events Template Changes: - Delete coding_prompt_yolo.template.md (consolidated into main prompt) - Update initializer_prompt.template.md with improved structure - Streamline coding_prompt.template.md workflow Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
284 lines
11 KiB
Python
284 lines
11 KiB
Python
"""
|
|
Claude SDK Client Configuration
|
|
===============================
|
|
|
|
Functions for creating and configuring the Claude Agent SDK client.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import shutil
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
|
|
from claude_agent_sdk.types import HookMatcher
|
|
from dotenv import load_dotenv
|
|
|
|
from security import bash_security_hook
|
|
|
|
# Load environment variables from .env file if present
|
|
load_dotenv()
|
|
|
|
# Default Playwright headless mode - can be overridden via PLAYWRIGHT_HEADLESS env var
|
|
# When True, browser runs invisibly in background
|
|
# When False, browser window is visible (default - useful for monitoring agent progress)
|
|
DEFAULT_PLAYWRIGHT_HEADLESS = False
|
|
|
|
# Environment variables to pass through to Claude CLI for API configuration
|
|
# These allow using alternative API endpoints (e.g., GLM via z.ai) without
|
|
# affecting the user's global Claude Code settings
|
|
API_ENV_VARS = [
|
|
"ANTHROPIC_BASE_URL", # Custom API endpoint (e.g., https://api.z.ai/api/anthropic)
|
|
"ANTHROPIC_AUTH_TOKEN", # API authentication token
|
|
"API_TIMEOUT_MS", # Request timeout in milliseconds
|
|
"ANTHROPIC_DEFAULT_SONNET_MODEL", # Model override for Sonnet
|
|
"ANTHROPIC_DEFAULT_OPUS_MODEL", # Model override for Opus
|
|
"ANTHROPIC_DEFAULT_HAIKU_MODEL", # Model override for Haiku
|
|
]
|
|
|
|
|
|
def get_playwright_headless() -> bool:
|
|
"""
|
|
Get the Playwright headless mode setting.
|
|
|
|
Reads from PLAYWRIGHT_HEADLESS environment variable, defaults to False.
|
|
Returns True for headless mode (invisible browser), False for visible browser.
|
|
"""
|
|
value = os.getenv("PLAYWRIGHT_HEADLESS", "false").lower()
|
|
# Accept various truthy/falsy values
|
|
return value in ("true", "1", "yes", "on")
|
|
|
|
|
|
# Feature MCP tools for feature/test management
|
|
FEATURE_MCP_TOOLS = [
|
|
# Core feature operations
|
|
"mcp__features__feature_get_stats",
|
|
"mcp__features__feature_get_next",
|
|
"mcp__features__feature_claim_next", # Atomic get+claim for parallel execution
|
|
"mcp__features__feature_get_for_regression",
|
|
"mcp__features__feature_mark_in_progress",
|
|
"mcp__features__feature_mark_passing",
|
|
"mcp__features__feature_mark_failing", # Mark regression detected
|
|
"mcp__features__feature_skip",
|
|
"mcp__features__feature_create_bulk",
|
|
"mcp__features__feature_create",
|
|
"mcp__features__feature_clear_in_progress",
|
|
# Dependency management
|
|
"mcp__features__feature_add_dependency",
|
|
"mcp__features__feature_remove_dependency",
|
|
"mcp__features__feature_set_dependencies",
|
|
# Parallel execution support
|
|
"mcp__features__feature_get_ready",
|
|
"mcp__features__feature_get_blocked",
|
|
"mcp__features__feature_get_graph",
|
|
]
|
|
|
|
# Playwright MCP tools for browser automation
|
|
PLAYWRIGHT_TOOLS = [
|
|
# Core navigation & screenshots
|
|
"mcp__playwright__browser_navigate",
|
|
"mcp__playwright__browser_navigate_back",
|
|
"mcp__playwright__browser_take_screenshot",
|
|
"mcp__playwright__browser_snapshot",
|
|
|
|
# Element interaction
|
|
"mcp__playwright__browser_click",
|
|
"mcp__playwright__browser_type",
|
|
"mcp__playwright__browser_fill_form",
|
|
"mcp__playwright__browser_select_option",
|
|
"mcp__playwright__browser_hover",
|
|
"mcp__playwright__browser_drag",
|
|
"mcp__playwright__browser_press_key",
|
|
|
|
# JavaScript & debugging
|
|
"mcp__playwright__browser_evaluate",
|
|
# "mcp__playwright__browser_run_code", # REMOVED - causes Playwright MCP server crash
|
|
"mcp__playwright__browser_console_messages",
|
|
"mcp__playwright__browser_network_requests",
|
|
|
|
# Browser management
|
|
"mcp__playwright__browser_close",
|
|
"mcp__playwright__browser_resize",
|
|
"mcp__playwright__browser_tabs",
|
|
"mcp__playwright__browser_wait_for",
|
|
"mcp__playwright__browser_handle_dialog",
|
|
"mcp__playwright__browser_file_upload",
|
|
"mcp__playwright__browser_install",
|
|
]
|
|
|
|
# Built-in tools
|
|
BUILTIN_TOOLS = [
|
|
"Read",
|
|
"Write",
|
|
"Edit",
|
|
"Glob",
|
|
"Grep",
|
|
"Bash",
|
|
"WebFetch",
|
|
"WebSearch",
|
|
]
|
|
|
|
|
|
def create_client(
|
|
project_dir: Path,
|
|
model: str,
|
|
yolo_mode: bool = False,
|
|
agent_id: str | None = None,
|
|
):
|
|
"""
|
|
Create a Claude Agent SDK client with multi-layered security.
|
|
|
|
Args:
|
|
project_dir: Directory for the project
|
|
model: Claude model to use
|
|
yolo_mode: If True, skip Playwright MCP server for rapid prototyping
|
|
agent_id: Optional unique identifier for browser isolation in parallel mode.
|
|
When provided, each agent gets its own browser profile.
|
|
|
|
Returns:
|
|
Configured ClaudeSDKClient (from claude_agent_sdk)
|
|
|
|
Security layers (defense in depth):
|
|
1. Sandbox - OS-level bash command isolation prevents filesystem escape
|
|
2. Permissions - File operations restricted to project_dir only
|
|
3. Security hooks - Bash commands validated against an allowlist
|
|
(see security.py for ALLOWED_COMMANDS)
|
|
|
|
Note: Authentication is handled by start.bat/start.sh before this runs.
|
|
The Claude SDK auto-detects credentials from the Claude CLI configuration
|
|
"""
|
|
# Build allowed tools list based on mode
|
|
# In YOLO mode, exclude Playwright tools for faster prototyping
|
|
allowed_tools = [*BUILTIN_TOOLS, *FEATURE_MCP_TOOLS]
|
|
if not yolo_mode:
|
|
allowed_tools.extend(PLAYWRIGHT_TOOLS)
|
|
|
|
# Build permissions list
|
|
permissions_list = [
|
|
# Allow all file operations within the project directory
|
|
"Read(./**)",
|
|
"Write(./**)",
|
|
"Edit(./**)",
|
|
"Glob(./**)",
|
|
"Grep(./**)",
|
|
# Bash permission granted here, but actual commands are validated
|
|
# by the bash_security_hook (see security.py for allowed commands)
|
|
"Bash(*)",
|
|
# Allow web tools for documentation lookup
|
|
"WebFetch",
|
|
"WebSearch",
|
|
# Allow Feature MCP tools for feature management
|
|
*FEATURE_MCP_TOOLS,
|
|
]
|
|
if not yolo_mode:
|
|
# Allow Playwright MCP tools for browser automation (standard mode only)
|
|
permissions_list.extend(PLAYWRIGHT_TOOLS)
|
|
|
|
# Create comprehensive security settings
|
|
# Note: Using relative paths ("./**") restricts access to project directory
|
|
# since cwd is set to project_dir
|
|
security_settings = {
|
|
"sandbox": {"enabled": True, "autoAllowBashIfSandboxed": True},
|
|
"permissions": {
|
|
"defaultMode": "acceptEdits", # Auto-approve edits within allowed directories
|
|
"allow": permissions_list,
|
|
},
|
|
}
|
|
|
|
# Ensure project directory exists before creating settings file
|
|
project_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Write settings to a file in the project directory
|
|
settings_file = project_dir / ".claude_settings.json"
|
|
with open(settings_file, "w") as f:
|
|
json.dump(security_settings, f, indent=2)
|
|
|
|
print(f"Created security settings at {settings_file}")
|
|
print(" - Sandbox enabled (OS-level bash isolation)")
|
|
print(f" - Filesystem restricted to: {project_dir.resolve()}")
|
|
print(" - Bash commands restricted to allowlist (see security.py)")
|
|
if yolo_mode:
|
|
print(" - MCP servers: features (database) - YOLO MODE (no Playwright)")
|
|
else:
|
|
print(" - MCP servers: playwright (browser), features (database)")
|
|
print(" - Project settings enabled (skills, commands, CLAUDE.md)")
|
|
print()
|
|
|
|
# Use system Claude CLI instead of bundled one (avoids Bun runtime crash on Windows)
|
|
system_cli = shutil.which("claude")
|
|
if system_cli:
|
|
print(f" - Using system CLI: {system_cli}")
|
|
else:
|
|
print(" - Warning: System 'claude' CLI not found, using bundled CLI")
|
|
|
|
# Build MCP servers config - features is always included, playwright only in standard mode
|
|
mcp_servers = {
|
|
"features": {
|
|
"command": sys.executable, # Use the same Python that's running this script
|
|
"args": ["-m", "mcp_server.feature_mcp"],
|
|
"env": {
|
|
# Only specify variables the MCP server needs
|
|
# (subprocess inherits parent environment automatically)
|
|
"PROJECT_DIR": str(project_dir.resolve()),
|
|
"PYTHONPATH": str(Path(__file__).parent.resolve()),
|
|
},
|
|
},
|
|
}
|
|
if not yolo_mode:
|
|
# Include Playwright MCP server for browser automation (standard mode only)
|
|
# Headless mode is configurable via PLAYWRIGHT_HEADLESS environment variable
|
|
playwright_args = ["@playwright/mcp@latest", "--viewport-size", "1280x720"]
|
|
if get_playwright_headless():
|
|
playwright_args.append("--headless")
|
|
|
|
# Browser isolation for parallel execution
|
|
# Each agent gets its own isolated browser context to prevent tab conflicts
|
|
if agent_id:
|
|
# Use --isolated for ephemeral browser context
|
|
# This creates a fresh, isolated context without persistent state
|
|
# Note: --isolated and --user-data-dir are mutually exclusive
|
|
playwright_args.append("--isolated")
|
|
print(f" - Browser isolation enabled for agent: {agent_id}")
|
|
|
|
mcp_servers["playwright"] = {
|
|
"command": "npx",
|
|
"args": playwright_args,
|
|
}
|
|
|
|
# Build environment overrides for API endpoint configuration
|
|
# These override system env vars for the Claude CLI subprocess,
|
|
# allowing AutoCoder to use alternative APIs (e.g., GLM) without
|
|
# affecting the user's global Claude Code settings
|
|
sdk_env = {}
|
|
for var in API_ENV_VARS:
|
|
value = os.getenv(var)
|
|
if value:
|
|
sdk_env[var] = value
|
|
|
|
if sdk_env:
|
|
print(f" - API overrides: {', '.join(sdk_env.keys())}")
|
|
if "ANTHROPIC_BASE_URL" in sdk_env:
|
|
print(f" - GLM Mode: Using {sdk_env['ANTHROPIC_BASE_URL']}")
|
|
|
|
return ClaudeSDKClient(
|
|
options=ClaudeAgentOptions(
|
|
model=model,
|
|
cli_path=system_cli, # Use system CLI to avoid bundled Bun crash (exit code 3)
|
|
system_prompt="You are an expert full-stack developer building a production-quality web application.",
|
|
setting_sources=["project"], # Enable skills, commands, and CLAUDE.md from project dir
|
|
max_buffer_size=10 * 1024 * 1024, # 10MB for large Playwright screenshots
|
|
allowed_tools=allowed_tools,
|
|
mcp_servers=mcp_servers,
|
|
hooks={
|
|
"PreToolUse": [
|
|
HookMatcher(matcher="Bash", hooks=[bash_security_hook]),
|
|
],
|
|
},
|
|
max_turns=1000,
|
|
cwd=str(project_dir.resolve()),
|
|
settings=str(settings_file.resolve()), # Use absolute path
|
|
env=sdk_env, # Pass API configuration overrides to CLI subprocess
|
|
)
|
|
)
|