diff --git a/server/routers/agent.py b/server/routers/agent.py index b9a7756..11e9b8a 100644 --- a/server/routers/agent.py +++ b/server/routers/agent.py @@ -17,11 +17,11 @@ from ..utils.project_helpers import get_project_path as _get_project_path from ..utils.validation import validate_project_name -def _get_settings_defaults() -> tuple[bool, str, int]: +def _get_settings_defaults() -> tuple[bool, str, int, bool]: """Get defaults from global settings. Returns: - Tuple of (yolo_mode, model, testing_agent_ratio) + Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless) """ import sys root = Path(__file__).parent.parent.parent @@ -40,7 +40,9 @@ def _get_settings_defaults() -> tuple[bool, str, int]: except (ValueError, TypeError): testing_agent_ratio = 1 - return yolo_mode, model, testing_agent_ratio + playwright_headless = (settings.get("playwright_headless") or "true").lower() == "true" + + return yolo_mode, model, testing_agent_ratio, playwright_headless router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"]) @@ -89,7 +91,7 @@ async def start_agent( manager = get_project_manager(project_name) # Get defaults from global settings if not provided in request - default_yolo, default_model, default_testing_ratio = _get_settings_defaults() + default_yolo, default_model, default_testing_ratio, playwright_headless = _get_settings_defaults() yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo model = request.model if request.model else default_model @@ -101,6 +103,7 @@ async def start_agent( model=model, max_concurrency=max_concurrency, testing_agent_ratio=testing_agent_ratio, + playwright_headless=playwright_headless, ) # Notify scheduler of manual start (to prevent auto-stop during scheduled window) diff --git a/server/routers/settings.py b/server/routers/settings.py index 4b9c3e5..481cd0d 100644 --- a/server/routers/settings.py +++ b/server/routers/settings.py @@ -91,6 +91,7 @@ async def get_settings(): glm_mode=_is_glm_mode(), ollama_mode=_is_ollama_mode(), testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), + playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), ) @@ -106,6 +107,9 @@ async def update_settings(update: SettingsUpdate): if update.testing_agent_ratio is not None: set_setting("testing_agent_ratio", str(update.testing_agent_ratio)) + if update.playwright_headless is not None: + set_setting("playwright_headless", "true" if update.playwright_headless else "false") + # Return updated settings all_settings = get_all_settings() return SettingsResponse( @@ -114,4 +118,5 @@ async def update_settings(update: SettingsUpdate): glm_mode=_is_glm_mode(), ollama_mode=_is_ollama_mode(), testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), + playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), ) diff --git a/server/schemas.py b/server/schemas.py index 03e73ef..b5e2d84 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -398,6 +398,7 @@ class SettingsResponse(BaseModel): glm_mode: bool = False # True if GLM API is configured via .env ollama_mode: bool = False # True if Ollama API is configured via .env testing_agent_ratio: int = 1 # Regression testing agents (0-3) + playwright_headless: bool = True class ModelsResponse(BaseModel): @@ -411,6 +412,7 @@ class SettingsUpdate(BaseModel): yolo_mode: bool | None = None model: str | None = None testing_agent_ratio: int | None = None # 0-3 + playwright_headless: bool | None = None @field_validator('model') @classmethod diff --git a/server/services/process_manager.py b/server/services/process_manager.py index fa489ec..81a9025 100644 --- a/server/services/process_manager.py +++ b/server/services/process_manager.py @@ -297,6 +297,7 @@ class AgentProcessManager: parallel_mode: bool = False, max_concurrency: int | None = None, testing_agent_ratio: int = 1, + playwright_headless: bool = True, ) -> tuple[bool, str]: """ Start the agent as a subprocess. @@ -307,6 +308,7 @@ class AgentProcessManager: parallel_mode: DEPRECATED - ignored, always uses unified orchestrator max_concurrency: Max concurrent coding agents (1-5, default 1) testing_agent_ratio: Number of regression testing agents (0-3, default 1) + playwright_headless: If True, run browser in headless mode Returns: Tuple of (success, message) @@ -358,7 +360,7 @@ class AgentProcessManager: "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "cwd": str(self.project_dir), - "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, + "env": {**os.environ, "PYTHONUNBUFFERED": "1", "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false"}, } if sys.platform == "win32": popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW diff --git a/summary.md b/summary.md deleted file mode 100644 index f38fbbd..0000000 --- a/summary.md +++ /dev/null @@ -1,146 +0,0 @@ -# Autocoder Refactoring Summary - -## TL;DR - -This refactoring makes agents faster, cheaper, and more reliable. **Token usage drops ~40% per session**, agents retry rate limits in 15s instead of 60s, the orchestrator runs 80% fewer database queries per loop, and testing agents now batch 3 features per session instead of 1. Two bugs were fixed: a ghost MCP tool that wasted tokens every testing session, and missing Vertex AI environment variables that broke Vertex users. - ---- - -## What You'll Notice Immediately - -### Faster Agent Startup & Recovery -- **Rate limit retries start at ~15s** (was 60s) with jitter to prevent thundering herd -- **Post-spawn delay reduced to 0.5s** (was 2s) — agents claim features faster -- **Orchestrator makes 1 DB query per loop** (was 5-7) — scheduling decisions happen instantly - -### Lower Token Costs -- **Coding agents use ~4,500 fewer tokens/session** — trimmed prompts, removed unused tools -- **Testing agents use ~5,500 fewer tokens/session** — streamlined prompt, fewer MCP tools -- **For a 200-feature project: ~2.3M fewer input tokens total** -- Agents only see tools they actually need (coding: 9, testing: 5, initializer: 5 — was 19 for all) -- `max_turns` reduced: coding 300 (was 1000), testing 100 (was 1000) - -### YOLO Mode Is Actually Faster Now -- Browser testing instructions are **stripped from the prompt** in YOLO mode -- Previously, YOLO mode still sent full Playwright instructions (agents would try to use them) -- Prompt stripping saves ~1,000 additional tokens per YOLO session - -### Batched Testing (Parallel Mode) -- Testing agents now verify **3 features per session** instead of 1 -- Weighted selection prioritizes high-dependency features and avoids re-testing -- **50-70% less per-feature testing overhead** (shared prompt, shared browser, shared startup) -- Configurable via `--testing-batch-size` (1-5) - -### Smart Context Compaction -- When agent context gets long, compaction now **preserves**: current feature, modified files, test results, workflow step -- **Discards**: screenshot base64 data, long grep outputs, repeated file reads, verbose install logs -- Agents lose less critical context during long sessions - ---- - -## Bug Fixes - -| Bug | Impact | Fix | -|-----|--------|-----| -| Ghost `feature_release_testing` MCP tool | Every testing session wasted tokens calling a non-existent tool | Removed from tool lists and testing prompt | -| Missing Vertex AI env vars | `CLAUDE_CODE_USE_VERTEX`, `CLOUD_ML_REGION`, `ANTHROPIC_VERTEX_PROJECT_ID` not forwarded to chat sessions — broke Vertex AI users | Centralized `API_ENV_VARS` in `env_constants.py` with all 9 vars | -| DetachedInstanceError risk | `_get_test_batch` accessed ORM objects after session close — could crash in parallel mode | Extract data to dicts before closing session | -| Redundant testing of same features | Multiple testing agents could pick the same features simultaneously | Exclude currently-testing features from batch selection | - ---- - -## Architecture Improvements - -### Code Deduplication -- `_get_project_path()`: 9 copies → 1 shared utility (`server/utils/project_helpers.py`) -- `validate_project_name()`: 9 copies → 2 variants in 1 file (`server/utils/validation.py`) -- `ROOT_DIR`: 10 copies → 1 definition (`server/services/chat_constants.py`) -- `API_ENV_VARS`: 4 copies → 1 source of truth (`env_constants.py`) -- Chat session services: extracted `BaseChatSession` pattern, shared constants - -### Security Hardening -- **Unified sensitive directory blocklist**: 14 directories blocked consistently across filesystem browser AND extra read paths (was two divergent lists of 8 and 12) -- **Cached `get_blocked_paths()`**: O(1) instead of O(n*m) per directory listing -- **Terminal security warning**: Logs prominent warning when `ALLOW_REMOTE=1` exposes terminal WebSocket -- **20 new security tests**: 10 for EXTRA_READ_PATHS blocking, plus existing tests cleaned up -- **Security validation DRY**: Extracted `_validate_command_list()` and `_validate_pkill_processes()` helpers - -### Type Safety -- **87 mypy errors → 0** across 58 source files -- Installed `types-PyYAML` for proper yaml stub types -- Fixed SQLAlchemy `Column[T]` → `T` coercions across all routers -- Fixed Popen `env` dict typing in orchestrator -- Added None guards for regex matches and optional values - -### Dead Code Removed -- 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs, Windows artifacts -- 7 unused npm packages removed (Radix UI components with 0 imports) -- 16 redundant security test assertions removed -- UI `AgentAvatar.tsx` reduced from 615 → 119 lines (SVGs extracted to `mascotData.tsx`) - ---- - -## Performance Numbers - -| Metric | Before | After | Improvement | -|--------|--------|-------|-------------| -| Tokens per coding session | ~12,000 input | ~7,500 input | **-37%** | -| Tokens per testing session | ~10,000 input | ~4,500 input | **-55%** | -| Tokens per 200-feature project | ~6.5M | ~4.2M | **-2.3M tokens** | -| MCP tools loaded (coding) | 19 | 9 | **-53%** | -| MCP tools loaded (testing) | 19 | 5 | **-74%** | -| Playwright tools loaded | 20 | 20 | Restored | -| DB queries per orchestrator loop | 5-7 | 1 | **-80%** | -| Rate limit first retry | 60s | ~15-20s | **-70%** | -| Features per testing session | 1 | 3 | **+200%** | -| Post-spawn delay | 2.0s | 0.5s | **-75%** | -| max_turns (coding) | 1000 | 300 | Right-sized | -| max_turns (testing) | 1000 | 100 | Right-sized | -| mypy errors | 87 | 0 | **Clean** | -| Duplicate code instances | 40+ | 4 | **-90%** | - ---- - -## New CLI Options - -```bash -# Testing batch size (parallel mode) -python autonomous_agent_demo.py --project-dir my-app --parallel --testing-batch-size 5 - -# Multiple testing feature IDs (direct) -python autonomous_agent_demo.py --project-dir my-app --testing-feature-ids 5,12,18 -``` - ---- - -## Files Changed - -**New files (6):** -- `env_constants.py` — Single source of truth for API environment variables -- `server/utils/project_helpers.py` — Shared `get_project_path()` utility -- `server/services/chat_constants.py` — Shared chat session constants and Vertex AI env vars -- `ui/src/components/mascotData.tsx` — Extracted SVG mascot data (~500 lines) -- `test_client.py` — New tests for EXTRA_READ_PATHS security blocking -- `summary.md` — This file - -**Deleted files (13):** -- `nul`, `orchestrator_debug.log`, `PHASE3_SPEC.md`, `CUSTOM_UPDATES.md`, `SAMPLE_PROMPT.md` -- `issues/issues.md` -- 7 unused UI components (`toggle`, `scroll-area`, `tooltip`, `popover`, `radio-group`, `select`, `tabs`) - -**Major modifications (15):** -- `client.py` — Agent-type tool lists, Playwright trimming, max_turns, PreCompact, sensitive dirs -- `parallel_orchestrator.py` — DB consolidation, test batching, weighted selection, logging cleanup -- `security.py` — Unified blocklist, validation helpers -- `prompts.py` — YOLO stripping, batch testing prompt support -- `agent.py` — Agent type threading, testing feature IDs -- `autonomous_agent_demo.py` — New CLI arguments -- `.claude/templates/coding_prompt.template.md` — Trimmed ~150 lines -- `.claude/templates/testing_prompt.template.md` — Streamlined + batch support -- `ui/src/components/AgentAvatar.tsx` — 615 → 119 lines -- `rate_limit_utils.py` — New backoff formula with jitter -- `api/dependency_resolver.py` — deque fix, score caching support -- `server/routers/filesystem.py` — Cached blocked paths, unified blocklist -- `server/services/assistant_chat_session.py` — Type fixes, shared constants -- `server/services/spec_chat_session.py` — Type fixes, shared constants -- `server/services/expand_chat_session.py` — Type fixes, shared constants diff --git a/ui/src/components/SettingsModal.tsx b/ui/src/components/SettingsModal.tsx index a4b787f..03138b7 100644 --- a/ui/src/components/SettingsModal.tsx +++ b/ui/src/components/SettingsModal.tsx @@ -171,6 +171,24 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { /> + {/* Headless Browser Toggle */} +
+ Run browser without visible window (saves CPU) +
+