feat: add dedicated testing agents and enhanced parallel orchestration

Introduce a new testing agent architecture that runs regression tests independently from coding agents, improving quality assurance in parallel mode. Key changes: Testing Agent System: - Add testing_prompt.template.md for dedicated testing agent role - Add feature_mark_failing MCP tool for regression detection - Add --agent-type flag to select initializer/coding/testing mode - Remove regression testing from coding prompt (now handled by testing agents) Parallel Orchestrator Enhancements: - Add testing agent spawning with configurable ratio (--testing-agent-ratio) - Add comprehensive debug logging system (DebugLog class) - Improve database session management to prevent stale reads - Add engine.dispose() calls to refresh connections after subprocess commits - Fix f-string linting issues (remove unnecessary f-prefixes) UI Improvements: - Add testing agent mascot (Chip) to AgentAvatar - Enhance AgentCard to display testing agent status - Add testing agent ratio slider in SettingsModal - Update WebSocket handling for testing agent updates - Improve ActivityFeed to show testing agent activity API & Server Updates: - Add testing_agent_ratio to settings schema and endpoints - Update process manager to support testing agent type - Enhance WebSocket messages for agent_update events Template Changes: - Delete coding_prompt_yolo.template.md (consolidated into main prompt) - Update initializer_prompt.template.md with improved structure - Streamline coding_prompt.template.md workflow Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-17 10:53:09 +00:00 · 2026-01-18 13:49:50 +02:00
parent 5f786078fa
commit 13128361b0
27 changed files with 1885 additions and 536 deletions
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -8,6 +8,7 @@ Provides start/stop/pause/resume functionality with cross-platform support.

 import asyncio
 import logging
+import os
 import re
 import subprocess
 import sys
@@ -82,6 +83,8 @@ class AgentProcessManager:
        self.model: str | None = None  # Model being used
        self.parallel_mode: bool = False  # Parallel execution mode
        self.max_concurrency: int | None = None  # Max concurrent agents
+        self.testing_agent_ratio: int = 1  # Testing agents per coding agent
+        self.count_testing_in_concurrency: bool = False  # Count testing toward limit

        # Support multiple callbacks (for multiple WebSocket clients)
        self._output_callbacks: Set[Callable[[str], Awaitable[None]]] = set()
@@ -292,15 +295,19 @@ class AgentProcessManager:
        model: str | None = None,
        parallel_mode: bool = False,
        max_concurrency: int | None = None,
+        testing_agent_ratio: int = 1,
+        count_testing_in_concurrency: bool = False,
    ) -> tuple[bool, str]:
        """
        Start the agent as a subprocess.

        Args:
-            yolo_mode: If True, run in YOLO mode (no browser testing)
+            yolo_mode: If True, run in YOLO mode (skip testing agents)
            model: Model to use (e.g., claude-opus-4-5-20251101)
-            parallel_mode: If True, run multiple features in parallel
-            max_concurrency: Max concurrent agents (default 3 if parallel enabled)
+            parallel_mode: DEPRECATED - ignored, always uses unified orchestrator
+            max_concurrency: Max concurrent coding agents (1-5, default 1)
+            testing_agent_ratio: Testing agents per coding agent (0-3, default 1)
+            count_testing_in_concurrency: If True, testing agents count toward limit

        Returns:
            Tuple of (success, message)
@@ -314,12 +321,15 @@ class AgentProcessManager:
        # Store for status queries
        self.yolo_mode = yolo_mode
        self.model = model
-        self.parallel_mode = parallel_mode
-        self.max_concurrency = max_concurrency
+        self.parallel_mode = True  # Always True now (unified orchestrator)
+        self.max_concurrency = max_concurrency or 1
+        self.testing_agent_ratio = testing_agent_ratio
+        self.count_testing_in_concurrency = count_testing_in_concurrency

-        # Build command - pass absolute path to project directory
+        # Build command - unified orchestrator with --concurrency
        cmd = [
            sys.executable,
+            "-u",  # Force unbuffered stdout/stderr for real-time output
            str(self.root_dir / "autonomous_agent_demo.py"),
            "--project-dir",
            str(self.project_dir.resolve()),
@@ -333,19 +343,24 @@ class AgentProcessManager:
        if yolo_mode:
            cmd.append("--yolo")

-        # Add --parallel flag if parallel mode is enabled
-        if parallel_mode:
-            cmd.append("--parallel")
-            cmd.append(str(max_concurrency or 3))  # Default to 3 concurrent agents
+        # Add --concurrency flag (unified orchestrator always uses this)
+        cmd.extend(["--concurrency", str(max_concurrency or 1)])
+
+        # Add testing agent configuration
+        cmd.extend(["--testing-ratio", str(testing_agent_ratio)])
+        if count_testing_in_concurrency:
+            cmd.append("--count-testing")

        try:
            # Start subprocess with piped stdout/stderr
            # Use project_dir as cwd so Claude SDK sandbox allows access to project files
+            # IMPORTANT: Set PYTHONUNBUFFERED to ensure output isn't delayed
            self.process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                cwd=str(self.project_dir),
+                env={**os.environ, "PYTHONUNBUFFERED": "1"},
            )

            # Atomic lock creation - if it fails, another process beat us
@@ -412,6 +427,8 @@ class AgentProcessManager:
            self.model = None  # Reset model
            self.parallel_mode = False  # Reset parallel mode
            self.max_concurrency = None  # Reset concurrency
+            self.testing_agent_ratio = 1  # Reset testing ratio
+            self.count_testing_in_concurrency = False  # Reset count testing

            return True, "Agent stopped"
        except Exception as e:
@@ -496,6 +513,8 @@ class AgentProcessManager:
            "model": self.model,
            "parallel_mode": self.parallel_mode,
            "max_concurrency": self.max_concurrency,
+            "testing_agent_ratio": self.testing_agent_ratio,
+            "count_testing_in_concurrency": self.count_testing_in_concurrency,
        }