mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-02-01 23:13:36 +00:00
refactor: optimize token usage, deduplicate code, fix bugs across agents
Token reduction (~40% per session, ~2.3M fewer tokens per 200-feature project): - Agent-type-specific tool lists: coding 9, testing 5, init 5 (was 19 for all) - Right-sized max_turns: coding 300, testing 100 (was 1000 for all) - Trimmed coding prompt template (~150 lines removed) - Streamlined testing prompt with batch support - YOLO mode now strips browser testing instructions from prompt - Added Grep, WebFetch, WebSearch to expand project session Performance improvements: - Rate limit retries start at ~15s with jitter (was fixed 60s) - Post-spawn delay reduced to 0.5s (was 2s) - Orchestrator consolidated to 1 DB query per loop (was 5-7) - Testing agents batch 3 features per session (was 1) - Smart context compaction preserves critical state, discards noise Bug fixes: - Removed ghost feature_release_testing MCP tool (wasted tokens every test session) - Forward all 9 Vertex AI env vars to chat sessions (was missing 3) - Fix DetachedInstanceError risk in test batch ORM access - Prevent duplicate testing of same features in parallel mode Code deduplication: - _get_project_path(): 9 copies -> 1 shared utility (project_helpers.py) - validate_project_name(): 9 copies -> 2 variants in 1 file (validation.py) - ROOT_DIR: 10 copies -> 1 definition (chat_constants.py) - API_ENV_VARS: 4 copies -> 1 source of truth (env_constants.py) Security hardening: - Unified sensitive directory blocklist (14 dirs, was two divergent lists) - Cached get_blocked_paths() for O(1) directory listing checks - Terminal security warning when ALLOW_REMOTE=1 exposes WebSocket - 20 new security tests for EXTRA_READ_PATHS blocking - Extracted _validate_command_list() and _validate_pkill_processes() helpers Type safety: - 87 mypy errors -> 0 across 58 source files - Installed types-PyYAML for proper yaml stub types - Fixed SQLAlchemy Column[T] coercions across all routers Dead code removed: - 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs - 7 unused npm packages removed (Radix UI components with 0 imports) - AgentAvatar.tsx reduced from 615 -> 119 lines (SVGs extracted to mascotData.tsx) New CLI options: - --testing-batch-size (1-5) for parallel mode test batching - --testing-feature-ids for direct multi-feature testing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
225
client.py
225
client.py
@@ -16,7 +16,8 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
|
||||
from claude_agent_sdk.types import HookContext, HookInput, HookMatcher, SyncHookJSONOutput
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from security import bash_security_hook
|
||||
from env_constants import API_ENV_VARS
|
||||
from security import SENSITIVE_DIRECTORIES, bash_security_hook
|
||||
|
||||
# Load environment variables from .env file if present
|
||||
load_dotenv()
|
||||
@@ -31,43 +32,15 @@ DEFAULT_PLAYWRIGHT_HEADLESS = True
|
||||
# Firefox is recommended for lower CPU usage
|
||||
DEFAULT_PLAYWRIGHT_BROWSER = "firefox"
|
||||
|
||||
# Environment variables to pass through to Claude CLI for API configuration
|
||||
# These allow using alternative API endpoints (e.g., GLM via z.ai, Vertex AI) without
|
||||
# affecting the user's global Claude Code settings
|
||||
API_ENV_VARS = [
|
||||
"ANTHROPIC_BASE_URL", # Custom API endpoint (e.g., https://api.z.ai/api/anthropic)
|
||||
"ANTHROPIC_AUTH_TOKEN", # API authentication token
|
||||
"API_TIMEOUT_MS", # Request timeout in milliseconds
|
||||
"ANTHROPIC_DEFAULT_SONNET_MODEL", # Model override for Sonnet
|
||||
"ANTHROPIC_DEFAULT_OPUS_MODEL", # Model override for Opus
|
||||
"ANTHROPIC_DEFAULT_HAIKU_MODEL", # Model override for Haiku
|
||||
# Vertex AI configuration
|
||||
"CLAUDE_CODE_USE_VERTEX", # Enable Vertex AI mode (set to "1")
|
||||
"CLOUD_ML_REGION", # GCP region (e.g., us-east5)
|
||||
"ANTHROPIC_VERTEX_PROJECT_ID", # GCP project ID
|
||||
]
|
||||
|
||||
# Extra read paths for cross-project file access (read-only)
|
||||
# Set EXTRA_READ_PATHS environment variable with comma-separated absolute paths
|
||||
# Example: EXTRA_READ_PATHS=/Volumes/Data/dev,/Users/shared/libs
|
||||
EXTRA_READ_PATHS_VAR = "EXTRA_READ_PATHS"
|
||||
|
||||
# Sensitive directories that should never be allowed via EXTRA_READ_PATHS
|
||||
# These contain credentials, keys, or system-critical files
|
||||
EXTRA_READ_PATHS_BLOCKLIST = {
|
||||
".ssh",
|
||||
".aws",
|
||||
".azure",
|
||||
".kube",
|
||||
".gnupg",
|
||||
".gpg",
|
||||
".password-store",
|
||||
".docker",
|
||||
".config/gcloud",
|
||||
".npmrc",
|
||||
".pypirc",
|
||||
".netrc",
|
||||
}
|
||||
# Sensitive directories that should never be allowed via EXTRA_READ_PATHS.
|
||||
# Delegates to the canonical SENSITIVE_DIRECTORIES set in security.py so that
|
||||
# this blocklist and the filesystem browser API share a single source of truth.
|
||||
EXTRA_READ_PATHS_BLOCKLIST = SENSITIVE_DIRECTORIES
|
||||
|
||||
def convert_model_for_vertex(model: str) -> str:
|
||||
"""
|
||||
@@ -209,32 +182,55 @@ def get_extra_read_paths() -> list[Path]:
|
||||
return validated_paths
|
||||
|
||||
|
||||
# Feature MCP tools for feature/test management
|
||||
FEATURE_MCP_TOOLS = [
|
||||
# Core feature operations
|
||||
# Per-agent-type MCP tool lists.
|
||||
# Only expose the tools each agent type actually needs, reducing tool schema
|
||||
# overhead and preventing agents from calling tools meant for other roles.
|
||||
#
|
||||
# Tools intentionally omitted from ALL agent lists (UI/orchestrator only):
|
||||
# feature_get_ready, feature_get_blocked, feature_get_graph,
|
||||
# feature_remove_dependency
|
||||
#
|
||||
# The ghost tool "feature_release_testing" was removed entirely -- it was
|
||||
# listed here but never implemented in mcp_server/feature_mcp.py.
|
||||
|
||||
CODING_AGENT_TOOLS = [
|
||||
"mcp__features__feature_get_stats",
|
||||
"mcp__features__feature_get_by_id", # Get assigned feature details
|
||||
"mcp__features__feature_get_summary", # Lightweight: id, name, status, deps only
|
||||
"mcp__features__feature_get_by_id",
|
||||
"mcp__features__feature_get_summary",
|
||||
"mcp__features__feature_claim_and_get",
|
||||
"mcp__features__feature_mark_in_progress",
|
||||
"mcp__features__feature_claim_and_get", # Atomic claim + get details
|
||||
"mcp__features__feature_mark_passing",
|
||||
"mcp__features__feature_mark_failing", # Mark regression detected
|
||||
"mcp__features__feature_mark_failing",
|
||||
"mcp__features__feature_skip",
|
||||
"mcp__features__feature_create_bulk",
|
||||
"mcp__features__feature_create",
|
||||
"mcp__features__feature_clear_in_progress",
|
||||
"mcp__features__feature_release_testing", # Release testing claim
|
||||
# Dependency management
|
||||
"mcp__features__feature_add_dependency",
|
||||
"mcp__features__feature_remove_dependency",
|
||||
"mcp__features__feature_set_dependencies",
|
||||
# Query tools
|
||||
"mcp__features__feature_get_ready",
|
||||
"mcp__features__feature_get_blocked",
|
||||
"mcp__features__feature_get_graph",
|
||||
]
|
||||
|
||||
# Playwright MCP tools for browser automation
|
||||
TESTING_AGENT_TOOLS = [
|
||||
"mcp__features__feature_get_stats",
|
||||
"mcp__features__feature_get_by_id",
|
||||
"mcp__features__feature_get_summary",
|
||||
"mcp__features__feature_mark_passing",
|
||||
"mcp__features__feature_mark_failing",
|
||||
]
|
||||
|
||||
INITIALIZER_AGENT_TOOLS = [
|
||||
"mcp__features__feature_get_stats",
|
||||
"mcp__features__feature_create_bulk",
|
||||
"mcp__features__feature_create",
|
||||
"mcp__features__feature_add_dependency",
|
||||
"mcp__features__feature_set_dependencies",
|
||||
]
|
||||
|
||||
# Union of all agent tool lists -- used for permissions (all tools remain
|
||||
# *permitted* so the MCP server can respond, but only the agent-type-specific
|
||||
# list is included in allowed_tools, which controls what the LLM sees).
|
||||
ALL_FEATURE_MCP_TOOLS = sorted(
|
||||
set(CODING_AGENT_TOOLS) | set(TESTING_AGENT_TOOLS) | set(INITIALIZER_AGENT_TOOLS)
|
||||
)
|
||||
|
||||
# Playwright MCP tools for browser automation.
|
||||
# Full set of tools for comprehensive UI testing including drag-and-drop,
|
||||
# hover menus, file uploads, tab management, etc.
|
||||
PLAYWRIGHT_TOOLS = [
|
||||
# Core navigation & screenshots
|
||||
"mcp__playwright__browser_navigate",
|
||||
@@ -247,9 +243,10 @@ PLAYWRIGHT_TOOLS = [
|
||||
"mcp__playwright__browser_type",
|
||||
"mcp__playwright__browser_fill_form",
|
||||
"mcp__playwright__browser_select_option",
|
||||
"mcp__playwright__browser_hover",
|
||||
"mcp__playwright__browser_drag",
|
||||
"mcp__playwright__browser_press_key",
|
||||
"mcp__playwright__browser_drag",
|
||||
"mcp__playwright__browser_hover",
|
||||
"mcp__playwright__browser_file_upload",
|
||||
|
||||
# JavaScript & debugging
|
||||
"mcp__playwright__browser_evaluate",
|
||||
@@ -258,16 +255,17 @@ PLAYWRIGHT_TOOLS = [
|
||||
"mcp__playwright__browser_network_requests",
|
||||
|
||||
# Browser management
|
||||
"mcp__playwright__browser_close",
|
||||
"mcp__playwright__browser_resize",
|
||||
"mcp__playwright__browser_tabs",
|
||||
"mcp__playwright__browser_wait_for",
|
||||
"mcp__playwright__browser_handle_dialog",
|
||||
"mcp__playwright__browser_file_upload",
|
||||
"mcp__playwright__browser_install",
|
||||
"mcp__playwright__browser_close",
|
||||
"mcp__playwright__browser_tabs",
|
||||
]
|
||||
|
||||
# Built-in tools
|
||||
# Built-in tools available to agents.
|
||||
# WebFetch and WebSearch are included so coding agents can look up current
|
||||
# documentation for frameworks and libraries they are implementing.
|
||||
BUILTIN_TOOLS = [
|
||||
"Read",
|
||||
"Write",
|
||||
@@ -285,6 +283,7 @@ def create_client(
|
||||
model: str,
|
||||
yolo_mode: bool = False,
|
||||
agent_id: str | None = None,
|
||||
agent_type: str = "coding",
|
||||
):
|
||||
"""
|
||||
Create a Claude Agent SDK client with multi-layered security.
|
||||
@@ -295,6 +294,8 @@ def create_client(
|
||||
yolo_mode: If True, skip Playwright MCP server for rapid prototyping
|
||||
agent_id: Optional unique identifier for browser isolation in parallel mode.
|
||||
When provided, each agent gets its own browser profile.
|
||||
agent_type: One of "coding", "testing", or "initializer". Controls which
|
||||
MCP tools are exposed and the max_turns limit.
|
||||
|
||||
Returns:
|
||||
Configured ClaudeSDKClient (from claude_agent_sdk)
|
||||
@@ -308,13 +309,34 @@ def create_client(
|
||||
Note: Authentication is handled by start.bat/start.sh before this runs.
|
||||
The Claude SDK auto-detects credentials from the Claude CLI configuration
|
||||
"""
|
||||
# Build allowed tools list based on mode
|
||||
# In YOLO mode, exclude Playwright tools for faster prototyping
|
||||
allowed_tools = [*BUILTIN_TOOLS, *FEATURE_MCP_TOOLS]
|
||||
# Select the feature MCP tools appropriate for this agent type
|
||||
feature_tools_map = {
|
||||
"coding": CODING_AGENT_TOOLS,
|
||||
"testing": TESTING_AGENT_TOOLS,
|
||||
"initializer": INITIALIZER_AGENT_TOOLS,
|
||||
}
|
||||
feature_tools = feature_tools_map.get(agent_type, CODING_AGENT_TOOLS)
|
||||
|
||||
# Select max_turns based on agent type:
|
||||
# - coding/initializer: 300 turns (complex multi-step implementation)
|
||||
# - testing: 100 turns (focused verification of a single feature)
|
||||
max_turns_map = {
|
||||
"coding": 300,
|
||||
"testing": 100,
|
||||
"initializer": 300,
|
||||
}
|
||||
max_turns = max_turns_map.get(agent_type, 300)
|
||||
|
||||
# Build allowed tools list based on mode and agent type.
|
||||
# In YOLO mode, exclude Playwright tools for faster prototyping.
|
||||
allowed_tools = [*BUILTIN_TOOLS, *feature_tools]
|
||||
if not yolo_mode:
|
||||
allowed_tools.extend(PLAYWRIGHT_TOOLS)
|
||||
|
||||
# Build permissions list
|
||||
# Build permissions list.
|
||||
# We permit ALL feature MCP tools at the security layer (so the MCP server
|
||||
# can respond if called), but the LLM only *sees* the agent-type-specific
|
||||
# subset via allowed_tools above.
|
||||
permissions_list = [
|
||||
# Allow all file operations within the project directory
|
||||
"Read(./**)",
|
||||
@@ -325,11 +347,11 @@ def create_client(
|
||||
# Bash permission granted here, but actual commands are validated
|
||||
# by the bash_security_hook (see security.py for allowed commands)
|
||||
"Bash(*)",
|
||||
# Allow web tools for documentation lookup
|
||||
"WebFetch",
|
||||
"WebSearch",
|
||||
# Allow web tools for looking up framework/library documentation
|
||||
"WebFetch(*)",
|
||||
"WebSearch(*)",
|
||||
# Allow Feature MCP tools for feature management
|
||||
*FEATURE_MCP_TOOLS,
|
||||
*ALL_FEATURE_MCP_TOOLS,
|
||||
]
|
||||
|
||||
# Add extra read paths from environment variable (read-only access)
|
||||
@@ -461,9 +483,10 @@ def create_client(
|
||||
context["project_dir"] = str(project_dir.resolve())
|
||||
return await bash_security_hook(input_data, tool_use_id, context)
|
||||
|
||||
# PreCompact hook for logging and customizing context compaction
|
||||
# PreCompact hook for logging and customizing context compaction.
|
||||
# Compaction is handled automatically by Claude Code CLI when context approaches limits.
|
||||
# This hook allows us to log when compaction occurs and optionally provide custom instructions.
|
||||
# This hook provides custom instructions that guide the summarizer to preserve
|
||||
# critical workflow state while discarding verbose/redundant content.
|
||||
async def pre_compact_hook(
|
||||
input_data: HookInput,
|
||||
tool_use_id: str | None,
|
||||
@@ -476,8 +499,9 @@ def create_client(
|
||||
- "auto": Automatic compaction when context approaches token limits
|
||||
- "manual": User-initiated compaction via /compact command
|
||||
|
||||
The hook can customize compaction via hookSpecificOutput:
|
||||
- customInstructions: String with focus areas for summarization
|
||||
Returns custom instructions that guide the compaction summarizer to:
|
||||
1. Preserve critical workflow state (feature ID, modified files, test results)
|
||||
2. Discard verbose content (screenshots, long grep outputs, repeated reads)
|
||||
"""
|
||||
trigger = input_data.get("trigger", "auto")
|
||||
custom_instructions = input_data.get("custom_instructions")
|
||||
@@ -488,18 +512,53 @@ def create_client(
|
||||
print("[Context] Manual compaction requested")
|
||||
|
||||
if custom_instructions:
|
||||
print(f"[Context] Custom instructions: {custom_instructions}")
|
||||
print(f"[Context] Custom instructions provided: {custom_instructions}")
|
||||
|
||||
# Return empty dict to allow compaction to proceed with default behavior
|
||||
# To customize, return:
|
||||
# {
|
||||
# "hookSpecificOutput": {
|
||||
# "hookEventName": "PreCompact",
|
||||
# "customInstructions": "Focus on preserving file paths and test results"
|
||||
# }
|
||||
# }
|
||||
return SyncHookJSONOutput()
|
||||
# Build compaction instructions that preserve workflow-critical context
|
||||
# while discarding verbose content that inflates token usage.
|
||||
#
|
||||
# The summarizer receives these instructions and uses them to decide
|
||||
# what to keep vs. discard during context compaction.
|
||||
compaction_guidance = "\n".join([
|
||||
"## PRESERVE (critical workflow state)",
|
||||
"- Current feature ID, feature name, and feature status (pending/in_progress/passing/failing)",
|
||||
"- List of all files created or modified during this session, with their paths",
|
||||
"- Last test/lint/type-check results: command run, pass/fail status, and key error messages",
|
||||
"- Current step in the workflow (e.g., implementing, testing, fixing lint errors)",
|
||||
"- Any dependency information (which features block this one)",
|
||||
"- Git operations performed (commits, branches created)",
|
||||
"- MCP tool call results (feature_claim_and_get, feature_mark_passing, etc.)",
|
||||
"- Key architectural decisions made during this session",
|
||||
"",
|
||||
"## DISCARD (verbose content safe to drop)",
|
||||
"- Full screenshot base64 data (just note that a screenshot was taken and what it showed)",
|
||||
"- Long grep/find/glob output listings (summarize to: searched for X, found Y relevant files)",
|
||||
"- Repeated file reads of the same file (keep only the latest read or a summary of changes)",
|
||||
"- Full file contents from Read tool (summarize to: read file X, key sections were Y)",
|
||||
"- Verbose npm/pip install output (just note: dependencies installed successfully/failed)",
|
||||
"- Full lint/type-check output when passing (just note: lint passed with no errors)",
|
||||
"- Browser console message dumps (summarize to: N errors found, key error was X)",
|
||||
"- Redundant tool result confirmations ([Done] markers)",
|
||||
])
|
||||
|
||||
print("[Context] Applying custom compaction instructions (preserve workflow state, discard verbose content)")
|
||||
|
||||
# The SDK's HookSpecificOutput union type does not yet include a
|
||||
# PreCompactHookSpecificOutput variant, but the CLI protocol accepts
|
||||
# {"hookEventName": "PreCompact", "customInstructions": "..."}.
|
||||
# The dict is serialized to JSON and sent to the CLI process directly,
|
||||
# so the runtime behavior is correct despite the type mismatch.
|
||||
return SyncHookJSONOutput(
|
||||
hookSpecificOutput={ # type: ignore[typeddict-item]
|
||||
"hookEventName": "PreCompact",
|
||||
"customInstructions": compaction_guidance,
|
||||
}
|
||||
)
|
||||
|
||||
# PROMPT CACHING: The Claude Code CLI applies cache_control breakpoints internally.
|
||||
# Our system_prompt benefits from automatic caching without explicit configuration.
|
||||
# If explicit cache_control is needed, the SDK would need to accept content blocks
|
||||
# with cache_control fields (not currently supported in v0.1.x).
|
||||
return ClaudeSDKClient(
|
||||
options=ClaudeAgentOptions(
|
||||
model=model,
|
||||
@@ -508,7 +567,7 @@ def create_client(
|
||||
setting_sources=["project"], # Enable skills, commands, and CLAUDE.md from project dir
|
||||
max_buffer_size=10 * 1024 * 1024, # 10MB for large Playwright screenshots
|
||||
allowed_tools=allowed_tools,
|
||||
mcp_servers=mcp_servers,
|
||||
mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime
|
||||
hooks={
|
||||
"PreToolUse": [
|
||||
HookMatcher(matcher="Bash", hooks=[bash_hook_with_context]),
|
||||
@@ -520,7 +579,7 @@ def create_client(
|
||||
HookMatcher(hooks=[pre_compact_hook]),
|
||||
],
|
||||
},
|
||||
max_turns=1000,
|
||||
max_turns=max_turns,
|
||||
cwd=str(project_dir.resolve()),
|
||||
settings=str(settings_file.resolve()), # Use absolute path
|
||||
env=sdk_env, # Pass API configuration overrides to CLI subprocess
|
||||
@@ -538,7 +597,7 @@ def create_client(
|
||||
# parameters. Instead, context is managed via:
|
||||
# 1. betas=["context-1m-2025-08-07"] - Extended context window
|
||||
# 2. PreCompact hook - Intercept and customize compaction behavior
|
||||
# 3. max_turns - Limit conversation turns (set to 1000 for long sessions)
|
||||
# 3. max_turns - Limit conversation turns (per agent type: coding=300, testing=100)
|
||||
#
|
||||
# Future SDK versions may add explicit compaction controls. When available,
|
||||
# consider adding:
|
||||
|
||||
Reference in New Issue
Block a user