refactor: optimize token usage, deduplicate code, fix bugs across agents

Token reduction (~40% per session, ~2.3M fewer tokens per 200-feature project): - Agent-type-specific tool lists: coding 9, testing 5, init 5 (was 19 for all) - Right-sized max_turns: coding 300, testing 100 (was 1000 for all) - Trimmed coding prompt template (~150 lines removed) - Streamlined testing prompt with batch support - YOLO mode now strips browser testing instructions from prompt - Added Grep, WebFetch, WebSearch to expand project session Performance improvements: - Rate limit retries start at ~15s with jitter (was fixed 60s) - Post-spawn delay reduced to 0.5s (was 2s) - Orchestrator consolidated to 1 DB query per loop (was 5-7) - Testing agents batch 3 features per session (was 1) - Smart context compaction preserves critical state, discards noise Bug fixes: - Removed ghost feature_release_testing MCP tool (wasted tokens every test session) - Forward all 9 Vertex AI env vars to chat sessions (was missing 3) - Fix DetachedInstanceError risk in test batch ORM access - Prevent duplicate testing of same features in parallel mode Code deduplication: - _get_project_path(): 9 copies -> 1 shared utility (project_helpers.py) - validate_project_name(): 9 copies -> 2 variants in 1 file (validation.py) - ROOT_DIR: 10 copies -> 1 definition (chat_constants.py) - API_ENV_VARS: 4 copies -> 1 source of truth (env_constants.py) Security hardening: - Unified sensitive directory blocklist (14 dirs, was two divergent lists) - Cached get_blocked_paths() for O(1) directory listing checks - Terminal security warning when ALLOW_REMOTE=1 exposes WebSocket - 20 new security tests for EXTRA_READ_PATHS blocking - Extracted _validate_command_list() and _validate_pkill_processes() helpers Type safety: - 87 mypy errors -> 0 across 58 source files - Installed types-PyYAML for proper yaml stub types - Fixed SQLAlchemy Column[T] coercions across all routers Dead code removed: - 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs - 7 unused npm packages removed (Radix UI components with 0 imports) - AgentAvatar.tsx reduced from 615 -> 119 lines (SVGs extracted to mascotData.tsx) New CLI options: - --testing-batch-size (1-5) for parallel mode test batching - --testing-feature-ids for direct multi-feature testing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-19 11:53:09 +00:00 · 2026-02-01 13:16:24 +02:00
parent dc5bcc4ae9
commit 94e0b05cb1
57 changed files with 1974 additions and 4300 deletions
--- a/server/services/assistant_chat_session.py
+++ b/server/services/assistant_chat_session.py
@@ -25,25 +25,13 @@ from .assistant_database import (
    create_conversation,
    get_messages,
 )
+from .chat_constants import API_ENV_VARS, ROOT_DIR

 # Load environment variables from .env file if present
 load_dotenv()

 logger = logging.getLogger(__name__)

-# Root directory of the project
-ROOT_DIR = Path(__file__).parent.parent.parent
-
-# Environment variables to pass through to Claude CLI for API configuration
-API_ENV_VARS = [
-    "ANTHROPIC_BASE_URL",
-    "ANTHROPIC_AUTH_TOKEN",
-    "API_TIMEOUT_MS",
-    "ANTHROPIC_DEFAULT_SONNET_MODEL",
-    "ANTHROPIC_DEFAULT_OPUS_MODEL",
-    "ANTHROPIC_DEFAULT_HAIKU_MODEL",
-]
-
 # Read-only feature MCP tools
 READONLY_FEATURE_MCP_TOOLS = [
    "mcp__features__feature_get_stats",
@@ -215,7 +203,7 @@ class AssistantChatSession:
        # Create a new conversation if we don't have one
        if is_new_conversation:
            conv = create_conversation(self.project_dir, self.project_name)
-            self.conversation_id = conv.id
+            self.conversation_id = int(conv.id)  # type coercion: Column[int] -> int
            yield {"type": "conversation_created", "conversation_id": self.conversation_id}

        # Build permissions list for assistant access (read + feature management)
@@ -270,7 +258,11 @@ class AssistantChatSession:
        system_cli = shutil.which("claude")

        # Build environment overrides for API configuration
-        sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)}
+        sdk_env: dict[str, str] = {}
+        for var in API_ENV_VARS:
+            value = os.getenv(var)
+            if value:
+                sdk_env[var] = value

        # Determine model from environment or use default
        # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names
@@ -286,7 +278,7 @@ class AssistantChatSession:
                    # This avoids Windows command line length limit (~8191 chars)
                    setting_sources=["project"],
                    allowed_tools=[*READONLY_BUILTIN_TOOLS, *ASSISTANT_FEATURE_TOOLS],
-                    mcp_servers=mcp_servers,
+                    mcp_servers=mcp_servers,  # type: ignore[arg-type]  # SDK accepts dict config at runtime
                    permission_mode="bypassPermissions",
                    max_turns=100,
                    cwd=str(self.project_dir.resolve()),
@@ -312,6 +304,8 @@ class AssistantChatSession:
                greeting = f"Hello! I'm your project assistant for **{self.project_name}**. I can help you understand the codebase, explain features, and answer questions about the project. What would you like to know?"

                # Store the greeting in the database
+                # conversation_id is guaranteed non-None here (set on line 206 above)
+                assert self.conversation_id is not None
                add_message(self.project_dir, self.conversation_id, "assistant", greeting)

                yield {"type": "text", "content": greeting}
--- a/server/services/assistant_database.py
+++ b/server/services/assistant_database.py
@@ -13,6 +13,7 @@ from pathlib import Path
 from typing import Optional

 from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text, create_engine, func
+from sqlalchemy.engine import Engine
 from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker

 logger = logging.getLogger(__name__)
@@ -23,7 +24,7 @@ class Base(DeclarativeBase):

 # Engine cache to avoid creating new engines for each request
 # Key: project directory path (as posix string), Value: SQLAlchemy engine
-_engine_cache: dict[str, object] = {}
+_engine_cache: dict[str, Engine] = {}

 # Lock for thread-safe access to the engine cache
 # Prevents race conditions when multiple threads create engines simultaneously
--- a/server/services/chat_constants.py
+++ b/server/services/chat_constants.py
@@ -0,0 +1,57 @@
+"""
+Chat Session Constants
+======================
+
+Shared constants for all chat session types (assistant, spec, expand).
+
+The canonical ``API_ENV_VARS`` list lives in ``env_constants.py`` at the
+project root and is re-exported here for convenience so that existing
+imports (``from .chat_constants import API_ENV_VARS``) continue to work.
+"""
+
+import sys
+from pathlib import Path
+from typing import AsyncGenerator
+
+# -------------------------------------------------------------------
+# Root directory of the autocoder project (repository root).
+# Used throughout the server package whenever the repo root is needed.
+# -------------------------------------------------------------------
+ROOT_DIR = Path(__file__).parent.parent.parent
+
+# Ensure the project root is on sys.path so we can import env_constants
+# from the root-level module without requiring a package install.
+_root_str = str(ROOT_DIR)
+if _root_str not in sys.path:
+    sys.path.insert(0, _root_str)
+
+# -------------------------------------------------------------------
+# Environment variables forwarded to Claude CLI subprocesses.
+# Single source of truth lives in env_constants.py at the project root.
+# Re-exported here so existing ``from .chat_constants import API_ENV_VARS``
+# imports continue to work unchanged.
+# -------------------------------------------------------------------
+from env_constants import API_ENV_VARS  # noqa: E402, F401
+
+
+async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
+    """Yield a single multimodal user message in Claude Agent SDK format.
+
+    The Claude Agent SDK's ``query()`` method accepts either a plain string
+    or an ``AsyncIterable[dict]`` for custom message formats.  This helper
+    wraps a list of content blocks (text and/or images) in the expected
+    envelope.
+
+    Args:
+        content_blocks: List of content-block dicts, e.g.
+            ``[{"type": "text", "text": "..."}, {"type": "image", ...}]``.
+
+    Yields:
+        A single dict representing the user message.
+    """
+    yield {
+        "type": "user",
+        "message": {"role": "user", "content": content_blocks},
+        "parent_tool_use_id": None,
+        "session_id": "default",
+    }
--- a/server/services/expand_chat_session.py
+++ b/server/services/expand_chat_session.py
@@ -16,28 +16,19 @@ import threading
 import uuid
 from datetime import datetime
 from pathlib import Path
-from typing import AsyncGenerator, Optional
+from typing import Any, AsyncGenerator, Optional

 from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
 from dotenv import load_dotenv

 from ..schemas import ImageAttachment
+from .chat_constants import API_ENV_VARS, ROOT_DIR, make_multimodal_message

 # Load environment variables from .env file if present
 load_dotenv()

 logger = logging.getLogger(__name__)

-# Environment variables to pass through to Claude CLI for API configuration
-API_ENV_VARS = [
-    "ANTHROPIC_BASE_URL",
-    "ANTHROPIC_AUTH_TOKEN",
-    "API_TIMEOUT_MS",
-    "ANTHROPIC_DEFAULT_SONNET_MODEL",
-    "ANTHROPIC_DEFAULT_OPUS_MODEL",
-    "ANTHROPIC_DEFAULT_HAIKU_MODEL",
-]
-
 # Feature MCP tools needed for expand session
 EXPAND_FEATURE_TOOLS = [
    "mcp__features__feature_create",
@@ -46,22 +37,6 @@ EXPAND_FEATURE_TOOLS = [
 ]


-async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
-    """
-    Create an async generator that yields a properly formatted multimodal message.
-    """
-    yield {
-        "type": "user",
-        "message": {"role": "user", "content": content_blocks},
-        "parent_tool_use_id": None,
-        "session_id": "default",
-    }
-
-
-# Root directory of the project
-ROOT_DIR = Path(__file__).parent.parent.parent
-
-
 class ExpandChatSession:
    """
    Manages a project expansion conversation.
@@ -179,7 +154,12 @@ class ExpandChatSession:
        system_prompt = skill_content.replace("$ARGUMENTS", project_path)

        # Build environment overrides for API configuration
-        sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)}
+        # Filter to only include vars that are actually set (non-None)
+        sdk_env: dict[str, str] = {}
+        for var in API_ENV_VARS:
+            value = os.getenv(var)
+            if value:
+                sdk_env[var] = value

        # Determine model from environment or use default
        # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names
@@ -207,9 +187,12 @@ class ExpandChatSession:
                    allowed_tools=[
                        "Read",
                        "Glob",
+                        "Grep",
+                        "WebFetch",
+                        "WebSearch",
                        *EXPAND_FEATURE_TOOLS,
                    ],
-                    mcp_servers=mcp_servers,
+                    mcp_servers=mcp_servers,  # type: ignore[arg-type]  # SDK accepts dict config at runtime
                    permission_mode="bypassPermissions",
                    max_turns=100,
                    cwd=str(self.project_dir.resolve()),
@@ -303,7 +286,7 @@ class ExpandChatSession:

        # Build the message content
        if attachments and len(attachments) > 0:
-            content_blocks = []
+            content_blocks: list[dict[str, Any]] = []
            if message:
                content_blocks.append({"type": "text", "text": message})
            for att in attachments:
@@ -315,7 +298,7 @@ class ExpandChatSession:
                        "data": att.base64Data,
                    }
                })
-            await self.client.query(_make_multimodal_message(content_blocks))
+            await self.client.query(make_multimodal_message(content_blocks))
            logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
        else:
            await self.client.query(message)
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -15,7 +15,7 @@ import sys
 import threading
 from datetime import datetime
 from pathlib import Path
-from typing import Awaitable, Callable, Literal, Set
+from typing import Any, Awaitable, Callable, Literal, Set

 import psutil

@@ -353,7 +353,7 @@ class AgentProcessManager:
            # stdin=DEVNULL prevents blocking if Claude CLI or child process tries to read stdin
            # CREATE_NO_WINDOW on Windows prevents console window pop-ups
            # PYTHONUNBUFFERED ensures output isn't delayed
-            popen_kwargs = {
+            popen_kwargs: dict[str, Any] = {
                "stdin": subprocess.DEVNULL,
                "stdout": subprocess.PIPE,
                "stderr": subprocess.STDOUT,
--- a/server/services/spec_chat_session.py
+++ b/server/services/spec_chat_session.py
@@ -13,49 +13,19 @@ import shutil
 import threading
 from datetime import datetime
 from pathlib import Path
-from typing import AsyncGenerator, Optional
+from typing import Any, AsyncGenerator, Optional

 from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient
 from dotenv import load_dotenv

 from ..schemas import ImageAttachment
+from .chat_constants import API_ENV_VARS, ROOT_DIR, make_multimodal_message

 # Load environment variables from .env file if present
 load_dotenv()

 logger = logging.getLogger(__name__)

-# Environment variables to pass through to Claude CLI for API configuration
-API_ENV_VARS = [
-    "ANTHROPIC_BASE_URL",
-    "ANTHROPIC_AUTH_TOKEN",
-    "API_TIMEOUT_MS",
-    "ANTHROPIC_DEFAULT_SONNET_MODEL",
-    "ANTHROPIC_DEFAULT_OPUS_MODEL",
-    "ANTHROPIC_DEFAULT_HAIKU_MODEL",
-]
-
-
-async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]:
-    """
-    Create an async generator that yields a properly formatted multimodal message.
-
-    The Claude Agent SDK's query() method accepts either:
-    - A string (simple text)
-    - An AsyncIterable[dict] (for custom message formats)
-
-    This function wraps content blocks in the expected message format.
-    """
-    yield {
-        "type": "user",
-        "message": {"role": "user", "content": content_blocks},
-        "parent_tool_use_id": None,
-        "session_id": "default",
-    }
-
-# Root directory of the project
-ROOT_DIR = Path(__file__).parent.parent.parent
-

 class SpecChatSession:
    """
@@ -170,7 +140,12 @@ class SpecChatSession:
        system_cli = shutil.which("claude")

        # Build environment overrides for API configuration
-        sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)}
+        # Filter to only include vars that are actually set (non-None)
+        sdk_env: dict[str, str] = {}
+        for var in API_ENV_VARS:
+            value = os.getenv(var)
+            if value:
+                sdk_env[var] = value

        # Determine model from environment or use default
        # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names
@@ -292,7 +267,7 @@ class SpecChatSession:
        # Build the message content
        if attachments and len(attachments) > 0:
            # Multimodal message: build content blocks array
-            content_blocks = []
+            content_blocks: list[dict[str, Any]] = []

            # Add text block if there's text
            if message:
@@ -311,7 +286,7 @@ class SpecChatSession:

            # Send multimodal content to Claude using async generator format
            # The SDK's query() accepts AsyncIterable[dict] for custom message formats
-            await self.client.query(_make_multimodal_message(content_blocks))
+            await self.client.query(make_multimodal_message(content_blocks))
            logger.info(f"Sent multimodal message with {len(attachments)} image(s)")
        else:
            # Text-only message: use string format
@@ -320,7 +295,7 @@ class SpecChatSession:
        current_text = ""

        # Track pending writes for BOTH required files
-        pending_writes = {
+        pending_writes: dict[str, dict[str, Any] | None] = {
            "app_spec": None,      # {"tool_id": ..., "path": ...}
            "initializer": None,   # {"tool_id": ..., "path": ...}
        }
@@ -395,7 +370,8 @@ class SpecChatSession:
                            logger.warning(f"Tool error: {content}")
                            # Clear any pending writes that failed
                            for key in pending_writes:
-                                if pending_writes[key] and tool_use_id == pending_writes[key].get("tool_id"):
+                                pending_write = pending_writes[key]
+                                if pending_write is not None and tool_use_id == pending_write.get("tool_id"):
                                    logger.error(f"{key} write failed: {content}")
                                    pending_writes[key] = None
                        else:
--- a/server/services/terminal_manager.py
+++ b/server/services/terminal_manager.py
@@ -371,7 +371,7 @@ class TerminalSession:
            # Reap zombie if not already reaped
            if self._child_pid is not None:
                try:
-                    os.waitpid(self._child_pid, os.WNOHANG)
+                    os.waitpid(self._child_pid, os.WNOHANG)  # type: ignore[attr-defined]  # Unix-only method, guarded by runtime platform selection
                except ChildProcessError:
                    pass
                except Exception:
@@ -736,7 +736,7 @@ async def cleanup_all_terminals() -> None:
    Called on server shutdown to ensure all PTY processes are terminated.
    """
    with _sessions_lock:
-        all_sessions = []
+        all_sessions: list[TerminalSession] = []
        for project_sessions in _sessions.values():
            all_sessions.extend(project_sessions.values())