mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-02-01 15:03:36 +00:00
Token reduction (~40% per session, ~2.3M fewer tokens per 200-feature project): - Agent-type-specific tool lists: coding 9, testing 5, init 5 (was 19 for all) - Right-sized max_turns: coding 300, testing 100 (was 1000 for all) - Trimmed coding prompt template (~150 lines removed) - Streamlined testing prompt with batch support - YOLO mode now strips browser testing instructions from prompt - Added Grep, WebFetch, WebSearch to expand project session Performance improvements: - Rate limit retries start at ~15s with jitter (was fixed 60s) - Post-spawn delay reduced to 0.5s (was 2s) - Orchestrator consolidated to 1 DB query per loop (was 5-7) - Testing agents batch 3 features per session (was 1) - Smart context compaction preserves critical state, discards noise Bug fixes: - Removed ghost feature_release_testing MCP tool (wasted tokens every test session) - Forward all 9 Vertex AI env vars to chat sessions (was missing 3) - Fix DetachedInstanceError risk in test batch ORM access - Prevent duplicate testing of same features in parallel mode Code deduplication: - _get_project_path(): 9 copies -> 1 shared utility (project_helpers.py) - validate_project_name(): 9 copies -> 2 variants in 1 file (validation.py) - ROOT_DIR: 10 copies -> 1 definition (chat_constants.py) - API_ENV_VARS: 4 copies -> 1 source of truth (env_constants.py) Security hardening: - Unified sensitive directory blocklist (14 dirs, was two divergent lists) - Cached get_blocked_paths() for O(1) directory listing checks - Terminal security warning when ALLOW_REMOTE=1 exposes WebSocket - 20 new security tests for EXTRA_READ_PATHS blocking - Extracted _validate_command_list() and _validate_pkill_processes() helpers Type safety: - 87 mypy errors -> 0 across 58 source files - Installed types-PyYAML for proper yaml stub types - Fixed SQLAlchemy Column[T] coercions across all routers Dead code removed: - 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs - 7 unused npm packages removed (Radix UI components with 0 imports) - AgentAvatar.tsx reduced from 615 -> 119 lines (SVGs extracted to mascotData.tsx) New CLI options: - --testing-batch-size (1-5) for parallel mode test batching - --testing-feature-ids for direct multi-feature testing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
608 lines
26 KiB
Python
608 lines
26 KiB
Python
"""
|
|
Claude SDK Client Configuration
|
|
===============================
|
|
|
|
Functions for creating and configuring the Claude Agent SDK client.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import re
|
|
import shutil
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
|
|
from claude_agent_sdk.types import HookContext, HookInput, HookMatcher, SyncHookJSONOutput
|
|
from dotenv import load_dotenv
|
|
|
|
from env_constants import API_ENV_VARS
|
|
from security import SENSITIVE_DIRECTORIES, bash_security_hook
|
|
|
|
# Load environment variables from .env file if present
|
|
load_dotenv()
|
|
|
|
# Default Playwright headless mode - can be overridden via PLAYWRIGHT_HEADLESS env var
|
|
# When True, browser runs invisibly in background (default - saves CPU)
|
|
# When False, browser window is visible (useful for monitoring agent progress)
|
|
DEFAULT_PLAYWRIGHT_HEADLESS = True
|
|
|
|
# Default browser for Playwright - can be overridden via PLAYWRIGHT_BROWSER env var
|
|
# Options: chrome, firefox, webkit, msedge
|
|
# Firefox is recommended for lower CPU usage
|
|
DEFAULT_PLAYWRIGHT_BROWSER = "firefox"
|
|
|
|
# Extra read paths for cross-project file access (read-only)
|
|
# Set EXTRA_READ_PATHS environment variable with comma-separated absolute paths
|
|
# Example: EXTRA_READ_PATHS=/Volumes/Data/dev,/Users/shared/libs
|
|
EXTRA_READ_PATHS_VAR = "EXTRA_READ_PATHS"
|
|
|
|
# Sensitive directories that should never be allowed via EXTRA_READ_PATHS.
|
|
# Delegates to the canonical SENSITIVE_DIRECTORIES set in security.py so that
|
|
# this blocklist and the filesystem browser API share a single source of truth.
|
|
EXTRA_READ_PATHS_BLOCKLIST = SENSITIVE_DIRECTORIES
|
|
|
|
def convert_model_for_vertex(model: str) -> str:
|
|
"""
|
|
Convert model name format for Vertex AI compatibility.
|
|
|
|
Vertex AI uses @ to separate model name from version (e.g., claude-opus-4-5@20251101)
|
|
while the Anthropic API uses - (e.g., claude-opus-4-5-20251101).
|
|
|
|
Args:
|
|
model: Model name in Anthropic format (with hyphens)
|
|
|
|
Returns:
|
|
Model name in Vertex AI format (with @ before date) if Vertex AI is enabled,
|
|
otherwise returns the model unchanged.
|
|
"""
|
|
# Only convert if Vertex AI is enabled
|
|
if os.getenv("CLAUDE_CODE_USE_VERTEX") != "1":
|
|
return model
|
|
|
|
# Pattern: claude-{name}-{version}-{date} -> claude-{name}-{version}@{date}
|
|
# Example: claude-opus-4-5-20251101 -> claude-opus-4-5@20251101
|
|
# The date is always 8 digits at the end
|
|
match = re.match(r'^(claude-.+)-(\d{8})$', model)
|
|
if match:
|
|
base_name, date = match.groups()
|
|
return f"{base_name}@{date}"
|
|
|
|
# If already in @ format or doesn't match expected pattern, return as-is
|
|
return model
|
|
|
|
|
|
def get_playwright_headless() -> bool:
|
|
"""
|
|
Get the Playwright headless mode setting.
|
|
|
|
Reads from PLAYWRIGHT_HEADLESS environment variable, defaults to True.
|
|
Returns True for headless mode (invisible browser), False for visible browser.
|
|
"""
|
|
value = os.getenv("PLAYWRIGHT_HEADLESS", str(DEFAULT_PLAYWRIGHT_HEADLESS).lower()).strip().lower()
|
|
truthy = {"true", "1", "yes", "on"}
|
|
falsy = {"false", "0", "no", "off"}
|
|
if value not in truthy | falsy:
|
|
print(f" - Warning: Invalid PLAYWRIGHT_HEADLESS='{value}', defaulting to {DEFAULT_PLAYWRIGHT_HEADLESS}")
|
|
return DEFAULT_PLAYWRIGHT_HEADLESS
|
|
return value in truthy
|
|
|
|
|
|
# Valid browsers supported by Playwright MCP
|
|
VALID_PLAYWRIGHT_BROWSERS = {"chrome", "firefox", "webkit", "msedge"}
|
|
|
|
|
|
def get_playwright_browser() -> str:
|
|
"""
|
|
Get the browser to use for Playwright.
|
|
|
|
Reads from PLAYWRIGHT_BROWSER environment variable, defaults to firefox.
|
|
Options: chrome, firefox, webkit, msedge
|
|
Firefox is recommended for lower CPU usage.
|
|
"""
|
|
value = os.getenv("PLAYWRIGHT_BROWSER", DEFAULT_PLAYWRIGHT_BROWSER).strip().lower()
|
|
if value not in VALID_PLAYWRIGHT_BROWSERS:
|
|
print(f" - Warning: Invalid PLAYWRIGHT_BROWSER='{value}', "
|
|
f"valid options: {', '.join(sorted(VALID_PLAYWRIGHT_BROWSERS))}. "
|
|
f"Defaulting to {DEFAULT_PLAYWRIGHT_BROWSER}")
|
|
return DEFAULT_PLAYWRIGHT_BROWSER
|
|
return value
|
|
|
|
|
|
def get_extra_read_paths() -> list[Path]:
|
|
"""
|
|
Get extra read-only paths from EXTRA_READ_PATHS environment variable.
|
|
|
|
Parses comma-separated absolute paths and validates each one:
|
|
- Must be an absolute path
|
|
- Must exist and be a directory
|
|
- Cannot be or contain sensitive directories (e.g., .ssh, .aws)
|
|
|
|
Returns:
|
|
List of validated, canonicalized Path objects.
|
|
"""
|
|
raw_value = os.getenv(EXTRA_READ_PATHS_VAR, "").strip()
|
|
if not raw_value:
|
|
return []
|
|
|
|
validated_paths: list[Path] = []
|
|
home_dir = Path.home()
|
|
|
|
for path_str in raw_value.split(","):
|
|
path_str = path_str.strip()
|
|
if not path_str:
|
|
continue
|
|
|
|
# Parse and canonicalize the path
|
|
try:
|
|
path = Path(path_str).resolve()
|
|
except (OSError, ValueError) as e:
|
|
print(f" - Warning: Invalid EXTRA_READ_PATHS path '{path_str}': {e}")
|
|
continue
|
|
|
|
# Must be absolute (resolve() makes it absolute, but check original input)
|
|
if not Path(path_str).is_absolute():
|
|
print(f" - Warning: EXTRA_READ_PATHS requires absolute paths, skipping: {path_str}")
|
|
continue
|
|
|
|
# Must exist
|
|
if not path.exists():
|
|
print(f" - Warning: EXTRA_READ_PATHS path does not exist, skipping: {path_str}")
|
|
continue
|
|
|
|
# Must be a directory
|
|
if not path.is_dir():
|
|
print(f" - Warning: EXTRA_READ_PATHS path is not a directory, skipping: {path_str}")
|
|
continue
|
|
|
|
# Check against sensitive directory blocklist
|
|
is_blocked = False
|
|
for sensitive in EXTRA_READ_PATHS_BLOCKLIST:
|
|
sensitive_path = (home_dir / sensitive).resolve()
|
|
try:
|
|
# Block if path IS the sensitive dir or is INSIDE it
|
|
if path == sensitive_path or path.is_relative_to(sensitive_path):
|
|
print(f" - Warning: EXTRA_READ_PATHS blocked sensitive path: {path_str}")
|
|
is_blocked = True
|
|
break
|
|
# Also block if sensitive dir is INSIDE the requested path
|
|
if sensitive_path.is_relative_to(path):
|
|
print(f" - Warning: EXTRA_READ_PATHS path contains sensitive directory ({sensitive}): {path_str}")
|
|
is_blocked = True
|
|
break
|
|
except (OSError, ValueError):
|
|
# is_relative_to can raise on some edge cases
|
|
continue
|
|
|
|
if is_blocked:
|
|
continue
|
|
|
|
validated_paths.append(path)
|
|
|
|
return validated_paths
|
|
|
|
|
|
# Per-agent-type MCP tool lists.
|
|
# Only expose the tools each agent type actually needs, reducing tool schema
|
|
# overhead and preventing agents from calling tools meant for other roles.
|
|
#
|
|
# Tools intentionally omitted from ALL agent lists (UI/orchestrator only):
|
|
# feature_get_ready, feature_get_blocked, feature_get_graph,
|
|
# feature_remove_dependency
|
|
#
|
|
# The ghost tool "feature_release_testing" was removed entirely -- it was
|
|
# listed here but never implemented in mcp_server/feature_mcp.py.
|
|
|
|
CODING_AGENT_TOOLS = [
|
|
"mcp__features__feature_get_stats",
|
|
"mcp__features__feature_get_by_id",
|
|
"mcp__features__feature_get_summary",
|
|
"mcp__features__feature_claim_and_get",
|
|
"mcp__features__feature_mark_in_progress",
|
|
"mcp__features__feature_mark_passing",
|
|
"mcp__features__feature_mark_failing",
|
|
"mcp__features__feature_skip",
|
|
"mcp__features__feature_clear_in_progress",
|
|
]
|
|
|
|
TESTING_AGENT_TOOLS = [
|
|
"mcp__features__feature_get_stats",
|
|
"mcp__features__feature_get_by_id",
|
|
"mcp__features__feature_get_summary",
|
|
"mcp__features__feature_mark_passing",
|
|
"mcp__features__feature_mark_failing",
|
|
]
|
|
|
|
INITIALIZER_AGENT_TOOLS = [
|
|
"mcp__features__feature_get_stats",
|
|
"mcp__features__feature_create_bulk",
|
|
"mcp__features__feature_create",
|
|
"mcp__features__feature_add_dependency",
|
|
"mcp__features__feature_set_dependencies",
|
|
]
|
|
|
|
# Union of all agent tool lists -- used for permissions (all tools remain
|
|
# *permitted* so the MCP server can respond, but only the agent-type-specific
|
|
# list is included in allowed_tools, which controls what the LLM sees).
|
|
ALL_FEATURE_MCP_TOOLS = sorted(
|
|
set(CODING_AGENT_TOOLS) | set(TESTING_AGENT_TOOLS) | set(INITIALIZER_AGENT_TOOLS)
|
|
)
|
|
|
|
# Playwright MCP tools for browser automation.
|
|
# Full set of tools for comprehensive UI testing including drag-and-drop,
|
|
# hover menus, file uploads, tab management, etc.
|
|
PLAYWRIGHT_TOOLS = [
|
|
# Core navigation & screenshots
|
|
"mcp__playwright__browser_navigate",
|
|
"mcp__playwright__browser_navigate_back",
|
|
"mcp__playwright__browser_take_screenshot",
|
|
"mcp__playwright__browser_snapshot",
|
|
|
|
# Element interaction
|
|
"mcp__playwright__browser_click",
|
|
"mcp__playwright__browser_type",
|
|
"mcp__playwright__browser_fill_form",
|
|
"mcp__playwright__browser_select_option",
|
|
"mcp__playwright__browser_press_key",
|
|
"mcp__playwright__browser_drag",
|
|
"mcp__playwright__browser_hover",
|
|
"mcp__playwright__browser_file_upload",
|
|
|
|
# JavaScript & debugging
|
|
"mcp__playwright__browser_evaluate",
|
|
# "mcp__playwright__browser_run_code", # REMOVED - causes Playwright MCP server crash
|
|
"mcp__playwright__browser_console_messages",
|
|
"mcp__playwright__browser_network_requests",
|
|
|
|
# Browser management
|
|
"mcp__playwright__browser_resize",
|
|
"mcp__playwright__browser_wait_for",
|
|
"mcp__playwright__browser_handle_dialog",
|
|
"mcp__playwright__browser_install",
|
|
"mcp__playwright__browser_close",
|
|
"mcp__playwright__browser_tabs",
|
|
]
|
|
|
|
# Built-in tools available to agents.
|
|
# WebFetch and WebSearch are included so coding agents can look up current
|
|
# documentation for frameworks and libraries they are implementing.
|
|
BUILTIN_TOOLS = [
|
|
"Read",
|
|
"Write",
|
|
"Edit",
|
|
"Glob",
|
|
"Grep",
|
|
"Bash",
|
|
"WebFetch",
|
|
"WebSearch",
|
|
]
|
|
|
|
|
|
def create_client(
|
|
project_dir: Path,
|
|
model: str,
|
|
yolo_mode: bool = False,
|
|
agent_id: str | None = None,
|
|
agent_type: str = "coding",
|
|
):
|
|
"""
|
|
Create a Claude Agent SDK client with multi-layered security.
|
|
|
|
Args:
|
|
project_dir: Directory for the project
|
|
model: Claude model to use
|
|
yolo_mode: If True, skip Playwright MCP server for rapid prototyping
|
|
agent_id: Optional unique identifier for browser isolation in parallel mode.
|
|
When provided, each agent gets its own browser profile.
|
|
agent_type: One of "coding", "testing", or "initializer". Controls which
|
|
MCP tools are exposed and the max_turns limit.
|
|
|
|
Returns:
|
|
Configured ClaudeSDKClient (from claude_agent_sdk)
|
|
|
|
Security layers (defense in depth):
|
|
1. Sandbox - OS-level bash command isolation prevents filesystem escape
|
|
2. Permissions - File operations restricted to project_dir only
|
|
3. Security hooks - Bash commands validated against an allowlist
|
|
(see security.py for ALLOWED_COMMANDS)
|
|
|
|
Note: Authentication is handled by start.bat/start.sh before this runs.
|
|
The Claude SDK auto-detects credentials from the Claude CLI configuration
|
|
"""
|
|
# Select the feature MCP tools appropriate for this agent type
|
|
feature_tools_map = {
|
|
"coding": CODING_AGENT_TOOLS,
|
|
"testing": TESTING_AGENT_TOOLS,
|
|
"initializer": INITIALIZER_AGENT_TOOLS,
|
|
}
|
|
feature_tools = feature_tools_map.get(agent_type, CODING_AGENT_TOOLS)
|
|
|
|
# Select max_turns based on agent type:
|
|
# - coding/initializer: 300 turns (complex multi-step implementation)
|
|
# - testing: 100 turns (focused verification of a single feature)
|
|
max_turns_map = {
|
|
"coding": 300,
|
|
"testing": 100,
|
|
"initializer": 300,
|
|
}
|
|
max_turns = max_turns_map.get(agent_type, 300)
|
|
|
|
# Build allowed tools list based on mode and agent type.
|
|
# In YOLO mode, exclude Playwright tools for faster prototyping.
|
|
allowed_tools = [*BUILTIN_TOOLS, *feature_tools]
|
|
if not yolo_mode:
|
|
allowed_tools.extend(PLAYWRIGHT_TOOLS)
|
|
|
|
# Build permissions list.
|
|
# We permit ALL feature MCP tools at the security layer (so the MCP server
|
|
# can respond if called), but the LLM only *sees* the agent-type-specific
|
|
# subset via allowed_tools above.
|
|
permissions_list = [
|
|
# Allow all file operations within the project directory
|
|
"Read(./**)",
|
|
"Write(./**)",
|
|
"Edit(./**)",
|
|
"Glob(./**)",
|
|
"Grep(./**)",
|
|
# Bash permission granted here, but actual commands are validated
|
|
# by the bash_security_hook (see security.py for allowed commands)
|
|
"Bash(*)",
|
|
# Allow web tools for looking up framework/library documentation
|
|
"WebFetch(*)",
|
|
"WebSearch(*)",
|
|
# Allow Feature MCP tools for feature management
|
|
*ALL_FEATURE_MCP_TOOLS,
|
|
]
|
|
|
|
# Add extra read paths from environment variable (read-only access)
|
|
# Paths are validated, canonicalized, and checked against sensitive blocklist
|
|
extra_read_paths = get_extra_read_paths()
|
|
for path in extra_read_paths:
|
|
# Add read-only permissions for each validated path
|
|
permissions_list.append(f"Read({path}/**)")
|
|
permissions_list.append(f"Glob({path}/**)")
|
|
permissions_list.append(f"Grep({path}/**)")
|
|
|
|
if not yolo_mode:
|
|
# Allow Playwright MCP tools for browser automation (standard mode only)
|
|
permissions_list.extend(PLAYWRIGHT_TOOLS)
|
|
|
|
# Create comprehensive security settings
|
|
# Note: Using relative paths ("./**") restricts access to project directory
|
|
# since cwd is set to project_dir
|
|
security_settings = {
|
|
"sandbox": {"enabled": True, "autoAllowBashIfSandboxed": True},
|
|
"permissions": {
|
|
"defaultMode": "acceptEdits", # Auto-approve edits within allowed directories
|
|
"allow": permissions_list,
|
|
},
|
|
}
|
|
|
|
# Ensure project directory exists before creating settings file
|
|
project_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Write settings to a file in the project directory
|
|
from autocoder_paths import get_claude_settings_path
|
|
settings_file = get_claude_settings_path(project_dir)
|
|
settings_file.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(settings_file, "w") as f:
|
|
json.dump(security_settings, f, indent=2)
|
|
|
|
print(f"Created security settings at {settings_file}")
|
|
print(" - Sandbox enabled (OS-level bash isolation)")
|
|
print(f" - Filesystem restricted to: {project_dir.resolve()}")
|
|
if extra_read_paths:
|
|
print(f" - Extra read paths (validated): {', '.join(str(p) for p in extra_read_paths)}")
|
|
print(" - Bash commands restricted to allowlist (see security.py)")
|
|
if yolo_mode:
|
|
print(" - MCP servers: features (database) - YOLO MODE (no Playwright)")
|
|
else:
|
|
print(" - MCP servers: playwright (browser), features (database)")
|
|
print(" - Project settings enabled (skills, commands, CLAUDE.md)")
|
|
print()
|
|
|
|
# Use system Claude CLI instead of bundled one (avoids Bun runtime crash on Windows)
|
|
system_cli = shutil.which("claude")
|
|
if system_cli:
|
|
print(f" - Using system CLI: {system_cli}")
|
|
else:
|
|
print(" - Warning: System 'claude' CLI not found, using bundled CLI")
|
|
|
|
# Build MCP servers config - features is always included, playwright only in standard mode
|
|
mcp_servers = {
|
|
"features": {
|
|
"command": sys.executable, # Use the same Python that's running this script
|
|
"args": ["-m", "mcp_server.feature_mcp"],
|
|
"env": {
|
|
# Only specify variables the MCP server needs
|
|
# (subprocess inherits parent environment automatically)
|
|
"PROJECT_DIR": str(project_dir.resolve()),
|
|
"PYTHONPATH": str(Path(__file__).parent.resolve()),
|
|
},
|
|
},
|
|
}
|
|
if not yolo_mode:
|
|
# Include Playwright MCP server for browser automation (standard mode only)
|
|
# Browser and headless mode configurable via environment variables
|
|
browser = get_playwright_browser()
|
|
playwright_args = [
|
|
"@playwright/mcp@latest",
|
|
"--viewport-size", "1280x720",
|
|
"--browser", browser,
|
|
]
|
|
if get_playwright_headless():
|
|
playwright_args.append("--headless")
|
|
print(f" - Browser: {browser} (headless={get_playwright_headless()})")
|
|
|
|
# Browser isolation for parallel execution
|
|
# Each agent gets its own isolated browser context to prevent tab conflicts
|
|
if agent_id:
|
|
# Use --isolated for ephemeral browser context
|
|
# This creates a fresh, isolated context without persistent state
|
|
# Note: --isolated and --user-data-dir are mutually exclusive
|
|
playwright_args.append("--isolated")
|
|
print(f" - Browser isolation enabled for agent: {agent_id}")
|
|
|
|
mcp_servers["playwright"] = {
|
|
"command": "npx",
|
|
"args": playwright_args,
|
|
}
|
|
|
|
# Build environment overrides for API endpoint configuration
|
|
# These override system env vars for the Claude CLI subprocess,
|
|
# allowing AutoCoder to use alternative APIs (e.g., GLM) without
|
|
# affecting the user's global Claude Code settings
|
|
sdk_env = {}
|
|
for var in API_ENV_VARS:
|
|
value = os.getenv(var)
|
|
if value:
|
|
sdk_env[var] = value
|
|
|
|
# Detect alternative API mode (Ollama, GLM, or Vertex AI)
|
|
base_url = sdk_env.get("ANTHROPIC_BASE_URL", "")
|
|
is_vertex = sdk_env.get("CLAUDE_CODE_USE_VERTEX") == "1"
|
|
is_alternative_api = bool(base_url) or is_vertex
|
|
is_ollama = "localhost:11434" in base_url or "127.0.0.1:11434" in base_url
|
|
model = convert_model_for_vertex(model)
|
|
if sdk_env:
|
|
print(f" - API overrides: {', '.join(sdk_env.keys())}")
|
|
if is_vertex:
|
|
project_id = sdk_env.get("ANTHROPIC_VERTEX_PROJECT_ID", "unknown")
|
|
region = sdk_env.get("CLOUD_ML_REGION", "unknown")
|
|
print(f" - Vertex AI Mode: Using GCP project '{project_id}' with model '{model}' in region '{region}'")
|
|
elif is_ollama:
|
|
print(" - Ollama Mode: Using local models")
|
|
elif "ANTHROPIC_BASE_URL" in sdk_env:
|
|
print(f" - GLM Mode: Using {sdk_env['ANTHROPIC_BASE_URL']}")
|
|
|
|
# Create a wrapper for bash_security_hook that passes project_dir via context
|
|
async def bash_hook_with_context(input_data, tool_use_id=None, context=None):
|
|
"""Wrapper that injects project_dir into context for security hook."""
|
|
if context is None:
|
|
context = {}
|
|
context["project_dir"] = str(project_dir.resolve())
|
|
return await bash_security_hook(input_data, tool_use_id, context)
|
|
|
|
# PreCompact hook for logging and customizing context compaction.
|
|
# Compaction is handled automatically by Claude Code CLI when context approaches limits.
|
|
# This hook provides custom instructions that guide the summarizer to preserve
|
|
# critical workflow state while discarding verbose/redundant content.
|
|
async def pre_compact_hook(
|
|
input_data: HookInput,
|
|
tool_use_id: str | None,
|
|
context: HookContext,
|
|
) -> SyncHookJSONOutput:
|
|
"""
|
|
Hook called before context compaction occurs.
|
|
|
|
Compaction triggers:
|
|
- "auto": Automatic compaction when context approaches token limits
|
|
- "manual": User-initiated compaction via /compact command
|
|
|
|
Returns custom instructions that guide the compaction summarizer to:
|
|
1. Preserve critical workflow state (feature ID, modified files, test results)
|
|
2. Discard verbose content (screenshots, long grep outputs, repeated reads)
|
|
"""
|
|
trigger = input_data.get("trigger", "auto")
|
|
custom_instructions = input_data.get("custom_instructions")
|
|
|
|
if trigger == "auto":
|
|
print("[Context] Auto-compaction triggered (context approaching limit)")
|
|
else:
|
|
print("[Context] Manual compaction requested")
|
|
|
|
if custom_instructions:
|
|
print(f"[Context] Custom instructions provided: {custom_instructions}")
|
|
|
|
# Build compaction instructions that preserve workflow-critical context
|
|
# while discarding verbose content that inflates token usage.
|
|
#
|
|
# The summarizer receives these instructions and uses them to decide
|
|
# what to keep vs. discard during context compaction.
|
|
compaction_guidance = "\n".join([
|
|
"## PRESERVE (critical workflow state)",
|
|
"- Current feature ID, feature name, and feature status (pending/in_progress/passing/failing)",
|
|
"- List of all files created or modified during this session, with their paths",
|
|
"- Last test/lint/type-check results: command run, pass/fail status, and key error messages",
|
|
"- Current step in the workflow (e.g., implementing, testing, fixing lint errors)",
|
|
"- Any dependency information (which features block this one)",
|
|
"- Git operations performed (commits, branches created)",
|
|
"- MCP tool call results (feature_claim_and_get, feature_mark_passing, etc.)",
|
|
"- Key architectural decisions made during this session",
|
|
"",
|
|
"## DISCARD (verbose content safe to drop)",
|
|
"- Full screenshot base64 data (just note that a screenshot was taken and what it showed)",
|
|
"- Long grep/find/glob output listings (summarize to: searched for X, found Y relevant files)",
|
|
"- Repeated file reads of the same file (keep only the latest read or a summary of changes)",
|
|
"- Full file contents from Read tool (summarize to: read file X, key sections were Y)",
|
|
"- Verbose npm/pip install output (just note: dependencies installed successfully/failed)",
|
|
"- Full lint/type-check output when passing (just note: lint passed with no errors)",
|
|
"- Browser console message dumps (summarize to: N errors found, key error was X)",
|
|
"- Redundant tool result confirmations ([Done] markers)",
|
|
])
|
|
|
|
print("[Context] Applying custom compaction instructions (preserve workflow state, discard verbose content)")
|
|
|
|
# The SDK's HookSpecificOutput union type does not yet include a
|
|
# PreCompactHookSpecificOutput variant, but the CLI protocol accepts
|
|
# {"hookEventName": "PreCompact", "customInstructions": "..."}.
|
|
# The dict is serialized to JSON and sent to the CLI process directly,
|
|
# so the runtime behavior is correct despite the type mismatch.
|
|
return SyncHookJSONOutput(
|
|
hookSpecificOutput={ # type: ignore[typeddict-item]
|
|
"hookEventName": "PreCompact",
|
|
"customInstructions": compaction_guidance,
|
|
}
|
|
)
|
|
|
|
# PROMPT CACHING: The Claude Code CLI applies cache_control breakpoints internally.
|
|
# Our system_prompt benefits from automatic caching without explicit configuration.
|
|
# If explicit cache_control is needed, the SDK would need to accept content blocks
|
|
# with cache_control fields (not currently supported in v0.1.x).
|
|
return ClaudeSDKClient(
|
|
options=ClaudeAgentOptions(
|
|
model=model,
|
|
cli_path=system_cli, # Use system CLI to avoid bundled Bun crash (exit code 3)
|
|
system_prompt="You are an expert full-stack developer building a production-quality web application.",
|
|
setting_sources=["project"], # Enable skills, commands, and CLAUDE.md from project dir
|
|
max_buffer_size=10 * 1024 * 1024, # 10MB for large Playwright screenshots
|
|
allowed_tools=allowed_tools,
|
|
mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime
|
|
hooks={
|
|
"PreToolUse": [
|
|
HookMatcher(matcher="Bash", hooks=[bash_hook_with_context]),
|
|
],
|
|
# PreCompact hook for context management during long sessions.
|
|
# Compaction is automatic when context approaches token limits.
|
|
# This hook logs compaction events and can customize summarization.
|
|
"PreCompact": [
|
|
HookMatcher(hooks=[pre_compact_hook]),
|
|
],
|
|
},
|
|
max_turns=max_turns,
|
|
cwd=str(project_dir.resolve()),
|
|
settings=str(settings_file.resolve()), # Use absolute path
|
|
env=sdk_env, # Pass API configuration overrides to CLI subprocess
|
|
# Enable extended context beta for better handling of long sessions.
|
|
# This provides up to 1M tokens of context with automatic compaction.
|
|
# See: https://docs.anthropic.com/en/api/beta-headers
|
|
# Disabled for alternative APIs (Ollama, GLM, Vertex AI) as they don't support this beta.
|
|
betas=[] if is_alternative_api else ["context-1m-2025-08-07"],
|
|
# Note on context management:
|
|
# The Claude Agent SDK handles context management automatically through the
|
|
# underlying Claude Code CLI. When context approaches limits, the CLI
|
|
# automatically compacts/summarizes previous messages.
|
|
#
|
|
# The SDK does NOT expose explicit compaction_control or context_management
|
|
# parameters. Instead, context is managed via:
|
|
# 1. betas=["context-1m-2025-08-07"] - Extended context window
|
|
# 2. PreCompact hook - Intercept and customize compaction behavior
|
|
# 3. max_turns - Limit conversation turns (per agent type: coding=300, testing=100)
|
|
#
|
|
# Future SDK versions may add explicit compaction controls. When available,
|
|
# consider adding:
|
|
# - compaction_control={"enabled": True, "context_token_threshold": 80000}
|
|
# - context_management={"edits": [...]} for tool use clearing
|
|
)
|
|
)
|