refactor: optimize token usage, deduplicate code, fix bugs across agents

Token reduction (~40% per session, ~2.3M fewer tokens per 200-feature project): - Agent-type-specific tool lists: coding 9, testing 5, init 5 (was 19 for all) - Right-sized max_turns: coding 300, testing 100 (was 1000 for all) - Trimmed coding prompt template (~150 lines removed) - Streamlined testing prompt with batch support - YOLO mode now strips browser testing instructions from prompt - Added Grep, WebFetch, WebSearch to expand project session Performance improvements: - Rate limit retries start at ~15s with jitter (was fixed 60s) - Post-spawn delay reduced to 0.5s (was 2s) - Orchestrator consolidated to 1 DB query per loop (was 5-7) - Testing agents batch 3 features per session (was 1) - Smart context compaction preserves critical state, discards noise Bug fixes: - Removed ghost feature_release_testing MCP tool (wasted tokens every test session) - Forward all 9 Vertex AI env vars to chat sessions (was missing 3) - Fix DetachedInstanceError risk in test batch ORM access - Prevent duplicate testing of same features in parallel mode Code deduplication: - _get_project_path(): 9 copies -> 1 shared utility (project_helpers.py) - validate_project_name(): 9 copies -> 2 variants in 1 file (validation.py) - ROOT_DIR: 10 copies -> 1 definition (chat_constants.py) - API_ENV_VARS: 4 copies -> 1 source of truth (env_constants.py) Security hardening: - Unified sensitive directory blocklist (14 dirs, was two divergent lists) - Cached get_blocked_paths() for O(1) directory listing checks - Terminal security warning when ALLOW_REMOTE=1 exposes WebSocket - 20 new security tests for EXTRA_READ_PATHS blocking - Extracted _validate_command_list() and _validate_pkill_processes() helpers Type safety: - 87 mypy errors -> 0 across 58 source files - Installed types-PyYAML for proper yaml stub types - Fixed SQLAlchemy Column[T] coercions across all routers Dead code removed: - 13 files deleted (~2,679 lines): unused UI components, debug logs, outdated docs - 7 unused npm packages removed (Radix UI components with 0 imports) - AgentAvatar.tsx reduced from 615 -> 119 lines (SVGs extracted to mascotData.tsx) New CLI options: - --testing-batch-size (1-5) for parallel mode test batching - --testing-feature-ids for direct multi-feature testing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 07:23:35 +00:00 · 2026-02-01 13:16:24 +02:00
parent dc5bcc4ae9
commit 94e0b05cb1
57 changed files with 1974 additions and 4300 deletions
--- a/security.py
+++ b/security.py
@@ -97,6 +97,31 @@ BLOCKED_COMMANDS = {
    "ufw",
 }

+# Sensitive directories (relative to home) that should never be exposed.
+# Used by both the EXTRA_READ_PATHS validator (client.py) and the filesystem
+# browser API (server/routers/filesystem.py) to block credential/key directories.
+# This is the single source of truth -- import from here in both places.
+#
+# SENSITIVE_DIRECTORIES is the union of the previous filesystem browser blocklist
+# (filesystem.py) and the previous EXTRA_READ_PATHS blocklist (client.py).
+# Some entries are new to each consumer -- this is intentional for defense-in-depth.
+SENSITIVE_DIRECTORIES = {
+    ".ssh",
+    ".aws",
+    ".azure",
+    ".kube",
+    ".gnupg",
+    ".gpg",
+    ".password-store",
+    ".docker",
+    ".config/gcloud",
+    ".config/gh",
+    ".npmrc",
+    ".pypirc",
+    ".netrc",
+    ".terraform",
+}
+
 # Commands that trigger emphatic warnings but CAN be approved (Phase 3)
 # For now, these are blocked like BLOCKED_COMMANDS until Phase 3 implements approval
 DANGEROUS_COMMANDS = {
@@ -413,24 +438,6 @@ def validate_init_script(command_string: str) -> tuple[bool, str]:
    return False, f"Only ./init.sh is allowed, got: {script}"


-def get_command_for_validation(cmd: str, segments: list[str]) -> str:
-    """
-    Find the specific command segment that contains the given command.
-
-    Args:
-        cmd: The command name to find
-        segments: List of command segments
-
-    Returns:
-        The segment containing the command, or empty string if not found
-    """
-    for segment in segments:
-        segment_commands = extract_commands(segment)
-        if cmd in segment_commands:
-            return segment
-    return ""
-
-
 def matches_pattern(command: str, pattern: str) -> bool:
    """
    Check if a command matches a pattern.
@@ -472,6 +479,75 @@ def matches_pattern(command: str, pattern: str) -> bool:
    return False


+def _validate_command_list(commands: list, config_path: Path, field_name: str) -> bool:
+    """
+    Validate a list of command entries from a YAML config.
+
+    Each entry must be a dict with a non-empty string 'name' field.
+    Used by both load_org_config() and load_project_commands() to avoid
+    duplicating the same validation logic.
+
+    Args:
+        commands: List of command entries to validate
+        config_path: Path to the config file (for log messages)
+        field_name: Name of the YAML field being validated (e.g., 'allowed_commands', 'commands')
+
+    Returns:
+        True if all entries are valid, False otherwise
+    """
+    if not isinstance(commands, list):
+        logger.warning(f"Config at {config_path}: '{field_name}' must be a list")
+        return False
+    for i, cmd in enumerate(commands):
+        if not isinstance(cmd, dict):
+            logger.warning(f"Config at {config_path}: {field_name}[{i}] must be a dict")
+            return False
+        if "name" not in cmd:
+            logger.warning(f"Config at {config_path}: {field_name}[{i}] missing 'name'")
+            return False
+        if not isinstance(cmd["name"], str) or cmd["name"].strip() == "":
+            logger.warning(f"Config at {config_path}: {field_name}[{i}] has invalid 'name'")
+            return False
+    return True
+
+
+def _validate_pkill_processes(config: dict, config_path: Path) -> Optional[list[str]]:
+    """
+    Validate and normalize pkill_processes from a YAML config.
+
+    Each entry must be a non-empty string matching VALID_PROCESS_NAME_PATTERN
+    (alphanumeric, dots, underscores, hyphens only -- no regex metacharacters).
+    Used by both load_org_config() and load_project_commands().
+
+    Args:
+        config: Parsed YAML config dict that may contain 'pkill_processes'
+        config_path: Path to the config file (for log messages)
+
+    Returns:
+        Normalized list of process names, or None if validation fails.
+        Returns an empty list if 'pkill_processes' is not present.
+    """
+    if "pkill_processes" not in config:
+        return []
+
+    processes = config["pkill_processes"]
+    if not isinstance(processes, list):
+        logger.warning(f"Config at {config_path}: 'pkill_processes' must be a list")
+        return None
+
+    normalized = []
+    for i, proc in enumerate(processes):
+        if not isinstance(proc, str):
+            logger.warning(f"Config at {config_path}: pkill_processes[{i}] must be a string")
+            return None
+        proc = proc.strip()
+        if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc):
+            logger.warning(f"Config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'")
+            return None
+        normalized.append(proc)
+    return normalized
+
+
 def get_org_config_path() -> Path:
    """
    Get the organization-level config file path.
@@ -513,21 +589,8 @@ def load_org_config() -> Optional[dict]:

        # Validate allowed_commands if present
        if "allowed_commands" in config:
-            allowed = config["allowed_commands"]
-            if not isinstance(allowed, list):
-                logger.warning(f"Org config at {config_path}: 'allowed_commands' must be a list")
+            if not _validate_command_list(config["allowed_commands"], config_path, "allowed_commands"):
                return None
-            for i, cmd in enumerate(allowed):
-                if not isinstance(cmd, dict):
-                    logger.warning(f"Org config at {config_path}: allowed_commands[{i}] must be a dict")
-                    return None
-                if "name" not in cmd:
-                    logger.warning(f"Org config at {config_path}: allowed_commands[{i}] missing 'name'")
-                    return None
-                # Validate that name is a non-empty string
-                if not isinstance(cmd["name"], str) or cmd["name"].strip() == "":
-                    logger.warning(f"Org config at {config_path}: allowed_commands[{i}] has invalid 'name'")
-                    return None

        # Validate blocked_commands if present
        if "blocked_commands" in config:
@@ -541,23 +604,10 @@ def load_org_config() -> Optional[dict]:
                    return None

        # Validate pkill_processes if present
-        if "pkill_processes" in config:
-            processes = config["pkill_processes"]
-            if not isinstance(processes, list):
-                logger.warning(f"Org config at {config_path}: 'pkill_processes' must be a list")
-                return None
-            # Normalize and validate each process name against safe pattern
-            normalized = []
-            for i, proc in enumerate(processes):
-                if not isinstance(proc, str):
-                    logger.warning(f"Org config at {config_path}: pkill_processes[{i}] must be a string")
-                    return None
-                proc = proc.strip()
-                # Block empty strings and regex metacharacters
-                if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc):
-                    logger.warning(f"Org config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'")
-                    return None
-                normalized.append(proc)
+        normalized = _validate_pkill_processes(config, config_path)
+        if normalized is None:
+            return None
+        if normalized:
            config["pkill_processes"] = normalized

        return config
@@ -603,46 +653,21 @@ def load_project_commands(project_dir: Path) -> Optional[dict]:
            return None

        commands = config.get("commands", [])
-        if not isinstance(commands, list):
-            logger.warning(f"Project config at {config_path}: 'commands' must be a list")
-            return None

        # Enforce 100 command limit
-        if len(commands) > 100:
+        if isinstance(commands, list) and len(commands) > 100:
            logger.warning(f"Project config at {config_path} exceeds 100 command limit ({len(commands)} commands)")
            return None

-        # Validate each command entry
-        for i, cmd in enumerate(commands):
-            if not isinstance(cmd, dict):
-                logger.warning(f"Project config at {config_path}: commands[{i}] must be a dict")
-                return None
-            if "name" not in cmd:
-                logger.warning(f"Project config at {config_path}: commands[{i}] missing 'name'")
-                return None
-            # Validate name is a non-empty string
-            if not isinstance(cmd["name"], str) or cmd["name"].strip() == "":
-                logger.warning(f"Project config at {config_path}: commands[{i}] has invalid 'name'")
-                return None
+        # Validate each command entry using shared helper
+        if not _validate_command_list(commands, config_path, "commands"):
+            return None

        # Validate pkill_processes if present
-        if "pkill_processes" in config:
-            processes = config["pkill_processes"]
-            if not isinstance(processes, list):
-                logger.warning(f"Project config at {config_path}: 'pkill_processes' must be a list")
-                return None
-            # Normalize and validate each process name against safe pattern
-            normalized = []
-            for i, proc in enumerate(processes):
-                if not isinstance(proc, str):
-                    logger.warning(f"Project config at {config_path}: pkill_processes[{i}] must be a string")
-                    return None
-                proc = proc.strip()
-                # Block empty strings and regex metacharacters
-                if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc):
-                    logger.warning(f"Project config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'")
-                    return None
-                normalized.append(proc)
+        normalized = _validate_pkill_processes(config, config_path)
+        if normalized is None:
+            return None
+        if normalized:
            config["pkill_processes"] = normalized

        return config
@@ -659,8 +684,12 @@ def validate_project_command(cmd_config: dict) -> tuple[bool, str]:
    """
    Validate a single command entry from project config.

+    Checks that the command has a valid name and is not in any blocklist.
+    Called during hierarchy resolution to gate each project command before
+    it is added to the effective allowed set.
+
    Args:
-        cmd_config: Dict with command configuration (name, description, args)
+        cmd_config: Dict with command configuration (name, description)

    Returns:
        Tuple of (is_valid, error_message)
@@ -690,15 +719,6 @@ def validate_project_command(cmd_config: dict) -> tuple[bool, str]:
    if "description" in cmd_config and not isinstance(cmd_config["description"], str):
        return False, "Description must be a string"

-    # Args validation (Phase 1 - just check structure)
-    if "args" in cmd_config:
-        args = cmd_config["args"]
-        if not isinstance(args, list):
-            return False, "Args must be a list"
-        for arg in args:
-            if not isinstance(arg, str):
-                return False, "Each arg must be a string"
-
    return True, ""


@@ -899,8 +919,13 @@ async def bash_security_hook(input_data, tool_use_id=None, context=None):

        # Additional validation for sensitive commands
        if cmd in COMMANDS_NEEDING_EXTRA_VALIDATION:
-            # Find the specific segment containing this command
-            cmd_segment = get_command_for_validation(cmd, segments)
+            # Find the specific segment containing this command by searching
+            # each segment's extracted commands for a match
+            cmd_segment = ""
+            for segment in segments:
+                if cmd in extract_commands(segment):
+                    cmd_segment = segment
+                    break
            if not cmd_segment:
                cmd_segment = command  # Fallback to full command