Merge branch 'leonvanzyl:master' into master

2026-03-17 19:03:09 +00:00 · 2026-01-19 22:03:29 +01:00
parent 245cc5b7ad fbe4c399ac
commit 5937205bf8
28 changed files with 1994 additions and 541 deletions
--- a/server/routers/agent.py
+++ b/server/routers/agent.py
@@ -26,8 +26,12 @@ def _get_project_path(project_name: str) -> Path:
    return get_project_path(project_name)


-def _get_settings_defaults() -> tuple[bool, str]:
-    """Get YOLO mode and model defaults from global settings."""
+def _get_settings_defaults() -> tuple[bool, str, int, bool]:
+    """Get defaults from global settings.
+
+    Returns:
+        Tuple of (yolo_mode, model, testing_agent_ratio, count_testing_in_concurrency)
+    """
    import sys
    root = Path(__file__).parent.parent.parent
    if str(root) not in sys.path:
@@ -38,7 +42,16 @@ def _get_settings_defaults() -> tuple[bool, str]:
    settings = get_all_settings()
    yolo_mode = (settings.get("yolo_mode") or "false").lower() == "true"
    model = settings.get("model", DEFAULT_MODEL)
-    return yolo_mode, model
+
+    # Parse testing agent settings with defaults
+    try:
+        testing_agent_ratio = int(settings.get("testing_agent_ratio", "1"))
+    except (ValueError, TypeError):
+        testing_agent_ratio = 1
+
+    count_testing = (settings.get("count_testing_in_concurrency") or "false").lower() == "true"
+
+    return yolo_mode, model, testing_agent_ratio, count_testing


 router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"])
@@ -87,6 +100,8 @@ async def get_agent_status(project_name: str):
        model=manager.model,
        parallel_mode=manager.parallel_mode,
        max_concurrency=manager.max_concurrency,
+        testing_agent_ratio=manager.testing_agent_ratio,
+        count_testing_in_concurrency=manager.count_testing_in_concurrency,
    )


@@ -99,17 +114,20 @@ async def start_agent(
    manager = get_project_manager(project_name)

    # Get defaults from global settings if not provided in request
-    default_yolo, default_model = _get_settings_defaults()
+    default_yolo, default_model, default_testing_ratio, default_count_testing = _get_settings_defaults()
+
    yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo
    model = request.model if request.model else default_model
-    parallel_mode = request.parallel_mode or False
-    max_concurrency = request.max_concurrency
+    max_concurrency = request.max_concurrency or 1
+    testing_agent_ratio = request.testing_agent_ratio if request.testing_agent_ratio is not None else default_testing_ratio
+    count_testing = request.count_testing_in_concurrency if request.count_testing_in_concurrency is not None else default_count_testing

    success, message = await manager.start(
        yolo_mode=yolo_mode,
        model=model,
-        parallel_mode=parallel_mode,
        max_concurrency=max_concurrency,
+        testing_agent_ratio=testing_agent_ratio,
+        count_testing_in_concurrency=count_testing,
    )

    return AgentActionResponse(
--- a/server/routers/settings.py
+++ b/server/routers/settings.py
@@ -52,6 +52,23 @@ async def get_available_models():
    )


+def _parse_int(value: str | None, default: int) -> int:
+    """Parse integer setting with default fallback."""
+    if value is None:
+        return default
+    try:
+        return int(value)
+    except (ValueError, TypeError):
+        return default
+
+
+def _parse_bool(value: str | None, default: bool = False) -> bool:
+    """Parse boolean setting with default fallback."""
+    if value is None:
+        return default
+    return value.lower() == "true"
+
+
@router.get("", response_model=SettingsResponse)
 async def get_settings():
    """Get current global settings."""
@@ -61,6 +78,8 @@ async def get_settings():
        yolo_mode=_parse_yolo_mode(all_settings.get("yolo_mode")),
        model=all_settings.get("model", DEFAULT_MODEL),
        glm_mode=_is_glm_mode(),
+        testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
+        count_testing_in_concurrency=_parse_bool(all_settings.get("count_testing_in_concurrency")),
    )


@@ -73,10 +92,18 @@ async def update_settings(update: SettingsUpdate):
    if update.model is not None:
        set_setting("model", update.model)

+    if update.testing_agent_ratio is not None:
+        set_setting("testing_agent_ratio", str(update.testing_agent_ratio))
+
+    if update.count_testing_in_concurrency is not None:
+        set_setting("count_testing_in_concurrency", "true" if update.count_testing_in_concurrency else "false")
+
    # Return updated settings
    all_settings = get_all_settings()
    return SettingsResponse(
        yolo_mode=_parse_yolo_mode(all_settings.get("yolo_mode")),
        model=all_settings.get("model", DEFAULT_MODEL),
        glm_mode=_is_glm_mode(),
+        testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
+        count_testing_in_concurrency=_parse_bool(all_settings.get("count_testing_in_concurrency")),
    )
--- a/server/schemas.py
+++ b/server/schemas.py
@@ -169,8 +169,10 @@ class AgentStartRequest(BaseModel):
    """Request schema for starting the agent."""
    yolo_mode: bool | None = None  # None means use global settings
    model: str | None = None  # None means use global settings
-    parallel_mode: bool | None = None  # Enable parallel execution
-    max_concurrency: int | None = None  # Max concurrent agents (1-5)
+    parallel_mode: bool | None = None  # DEPRECATED: Use max_concurrency instead
+    max_concurrency: int | None = None  # Max concurrent coding agents (1-5)
+    testing_agent_ratio: int | None = None  # Testing agents per coding agent (0-3)
+    count_testing_in_concurrency: bool | None = None  # Count testing toward limit

    @field_validator('model')
    @classmethod
@@ -188,6 +190,14 @@ class AgentStartRequest(BaseModel):
            raise ValueError("max_concurrency must be between 1 and 5")
        return v

+    @field_validator('testing_agent_ratio')
+    @classmethod
+    def validate_testing_ratio(cls, v: int | None) -> int | None:
+        """Validate testing_agent_ratio is between 0 and 3."""
+        if v is not None and (v < 0 or v > 3):
+            raise ValueError("testing_agent_ratio must be between 0 and 3")
+        return v
+

 class AgentStatus(BaseModel):
    """Current agent status."""
@@ -196,8 +206,10 @@ class AgentStatus(BaseModel):
    started_at: datetime | None = None
    yolo_mode: bool = False
    model: str | None = None  # Model being used by running agent
-    parallel_mode: bool = False
+    parallel_mode: bool = False  # DEPRECATED: Always True now (unified orchestrator)
    max_concurrency: int | None = None
+    testing_agent_ratio: int = 1  # Testing agents per coding agent
+    count_testing_in_concurrency: bool = False  # Count testing toward limit


 class AgentActionResponse(BaseModel):
@@ -257,6 +269,9 @@ class WSAgentStatusMessage(BaseModel):
 # Agent state for multi-agent tracking
 AgentState = Literal["idle", "thinking", "working", "testing", "success", "error", "struggling"]

+# Agent type (coding vs testing)
+AgentType = Literal["coding", "testing"]
+
 # Agent mascot names assigned by index
 AGENT_MASCOTS = ["Spark", "Fizz", "Octo", "Hoot", "Buzz"]

@@ -266,6 +281,7 @@ class WSAgentUpdateMessage(BaseModel):
    type: Literal["agent_update"] = "agent_update"
    agentIndex: int
    agentName: str  # One of AGENT_MASCOTS
+    agentType: AgentType = "coding"  # "coding" or "testing"
    featureId: int
    featureName: str
    state: AgentState
@@ -368,6 +384,8 @@ class SettingsResponse(BaseModel):
    yolo_mode: bool = False
    model: str = DEFAULT_MODEL
    glm_mode: bool = False  # True if GLM API is configured via .env
+    testing_agent_ratio: int = 1  # Testing agents per coding agent (0-3)
+    count_testing_in_concurrency: bool = False  # Count testing toward concurrency


 class ModelsResponse(BaseModel):
@@ -380,6 +398,8 @@ class SettingsUpdate(BaseModel):
    """Request schema for updating global settings."""
    yolo_mode: bool | None = None
    model: str | None = None
+    testing_agent_ratio: int | None = None  # 0-3
+    count_testing_in_concurrency: bool | None = None

    @field_validator('model')
    @classmethod
@@ -388,6 +408,13 @@ class SettingsUpdate(BaseModel):
            raise ValueError(f"Invalid model. Must be one of: {VALID_MODELS}")
        return v

+    @field_validator('testing_agent_ratio')
+    @classmethod
+    def validate_testing_ratio(cls, v: int | None) -> int | None:
+        if v is not None and (v < 0 or v > 3):
+            raise ValueError("testing_agent_ratio must be between 0 and 3")
+        return v
+

 # ============================================================================
 # Dev Server Schemas
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -8,6 +8,7 @@ Provides start/stop/pause/resume functionality with cross-platform support.

 import asyncio
 import logging
+import os
 import re
 import subprocess
 import sys
@@ -82,6 +83,8 @@ class AgentProcessManager:
        self.model: str | None = None  # Model being used
        self.parallel_mode: bool = False  # Parallel execution mode
        self.max_concurrency: int | None = None  # Max concurrent agents
+        self.testing_agent_ratio: int = 1  # Testing agents per coding agent
+        self.count_testing_in_concurrency: bool = False  # Count testing toward limit

        # Support multiple callbacks (for multiple WebSocket clients)
        self._output_callbacks: Set[Callable[[str], Awaitable[None]]] = set()
@@ -292,15 +295,19 @@ class AgentProcessManager:
        model: str | None = None,
        parallel_mode: bool = False,
        max_concurrency: int | None = None,
+        testing_agent_ratio: int = 1,
+        count_testing_in_concurrency: bool = False,
    ) -> tuple[bool, str]:
        """
        Start the agent as a subprocess.

        Args:
-            yolo_mode: If True, run in YOLO mode (no browser testing)
+            yolo_mode: If True, run in YOLO mode (skip testing agents)
            model: Model to use (e.g., claude-opus-4-5-20251101)
-            parallel_mode: If True, run multiple features in parallel
-            max_concurrency: Max concurrent agents (default 3 if parallel enabled)
+            parallel_mode: DEPRECATED - ignored, always uses unified orchestrator
+            max_concurrency: Max concurrent coding agents (1-5, default 1)
+            testing_agent_ratio: Testing agents per coding agent (0-3, default 1)
+            count_testing_in_concurrency: If True, testing agents count toward limit

        Returns:
            Tuple of (success, message)
@@ -314,12 +321,15 @@ class AgentProcessManager:
        # Store for status queries
        self.yolo_mode = yolo_mode
        self.model = model
-        self.parallel_mode = parallel_mode
-        self.max_concurrency = max_concurrency
+        self.parallel_mode = True  # Always True now (unified orchestrator)
+        self.max_concurrency = max_concurrency or 1
+        self.testing_agent_ratio = testing_agent_ratio
+        self.count_testing_in_concurrency = count_testing_in_concurrency

-        # Build command - pass absolute path to project directory
+        # Build command - unified orchestrator with --concurrency
        cmd = [
            sys.executable,
+            "-u",  # Force unbuffered stdout/stderr for real-time output
            str(self.root_dir / "autonomous_agent_demo.py"),
            "--project-dir",
            str(self.project_dir.resolve()),
@@ -333,19 +343,24 @@ class AgentProcessManager:
        if yolo_mode:
            cmd.append("--yolo")

-        # Add --parallel flag if parallel mode is enabled
-        if parallel_mode:
-            cmd.append("--parallel")
-            cmd.append(str(max_concurrency or 3))  # Default to 3 concurrent agents
+        # Add --concurrency flag (unified orchestrator always uses this)
+        cmd.extend(["--concurrency", str(max_concurrency or 1)])
+
+        # Add testing agent configuration
+        cmd.extend(["--testing-ratio", str(testing_agent_ratio)])
+        if count_testing_in_concurrency:
+            cmd.append("--count-testing")

        try:
            # Start subprocess with piped stdout/stderr
            # Use project_dir as cwd so Claude SDK sandbox allows access to project files
+            # IMPORTANT: Set PYTHONUNBUFFERED to ensure output isn't delayed
            self.process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                cwd=str(self.project_dir),
+                env={**os.environ, "PYTHONUNBUFFERED": "1"},
            )

            # Atomic lock creation - if it fails, another process beat us
@@ -412,6 +427,8 @@ class AgentProcessManager:
            self.model = None  # Reset model
            self.parallel_mode = False  # Reset parallel mode
            self.max_concurrency = None  # Reset concurrency
+            self.testing_agent_ratio = 1  # Reset testing ratio
+            self.count_testing_in_concurrency = False  # Reset count testing

            return True, "Agent stopped"
        except Exception as e:
@@ -496,6 +513,8 @@ class AgentProcessManager:
            "model": self.model,
            "parallel_mode": self.parallel_mode,
            "max_concurrency": self.max_concurrency,
+            "testing_agent_ratio": self.testing_agent_ratio,
+            "count_testing_in_concurrency": self.count_testing_in_concurrency,
        }


--- a/server/websocket.py
+++ b/server/websocket.py
@@ -24,9 +24,12 @@ _count_passing_tests = None

 logger = logging.getLogger(__name__)

-# Pattern to extract feature ID from parallel orchestrator output
+# Pattern to extract feature ID from parallel orchestrator output (coding agents)
 FEATURE_ID_PATTERN = re.compile(r'\[Feature #(\d+)\]\s*(.*)')

+# Pattern to extract testing agent output
+TESTING_AGENT_PATTERN = re.compile(r'\[Testing\]\s*(.*)')
+
 # Patterns for detecting agent activity and thoughts
 THOUGHT_PATTERNS = [
    # Claude's tool usage patterns (actual format: [Tool: name])
@@ -49,8 +52,12 @@ THOUGHT_PATTERNS = [
 class AgentTracker:
    """Tracks active agents and their states for multi-agent mode."""

+    # Use a special key for the testing agent since it doesn't have a fixed feature ID
+    TESTING_AGENT_KEY = -1
+
    def __init__(self):
-        # feature_id -> {name, state, last_thought, agent_index}
+        # feature_id -> {name, state, last_thought, agent_index, agent_type}
+        # For testing agents, use TESTING_AGENT_KEY as the key
        self.active_agents: dict[int, dict] = {}
        self._next_agent_index = 0
        self._lock = asyncio.Lock()
@@ -61,16 +68,24 @@ class AgentTracker:

        Returns None if no update should be emitted.
        """
-        # Check for feature-specific output
+        # Check for testing agent output first
+        testing_match = TESTING_AGENT_PATTERN.match(line)
+        if testing_match:
+            content = testing_match.group(1)
+            return await self._process_testing_agent_line(content)
+
+        # Check for feature-specific output (coding agents)
        match = FEATURE_ID_PATTERN.match(line)
        if not match:
            # Also check for orchestrator status messages
-            if line.startswith("Started agent for feature #"):
+            if line.startswith("Started coding agent for feature #"):
                try:
                    feature_id = int(re.search(r'#(\d+)', line).group(1))
-                    return await self._handle_agent_start(feature_id, line)
+                    return await self._handle_agent_start(feature_id, line, agent_type="coding")
                except (AttributeError, ValueError):
                    pass
+            elif line.startswith("Started testing agent"):
+                return await self._handle_testing_agent_start(line)
            elif line.startswith("Feature #") and ("completed" in line or "failed" in line):
                try:
                    feature_id = int(re.search(r'#(\d+)', line).group(1))
@@ -78,6 +93,10 @@ class AgentTracker:
                    return await self._handle_agent_complete(feature_id, is_success)
                except (AttributeError, ValueError):
                    pass
+            elif line.startswith("Testing agent") and ("completed" in line or "failed" in line):
+                # Format: "Testing agent (PID xxx) completed" or "Testing agent (PID xxx) failed"
+                is_success = "completed" in line
+                return await self._handle_testing_agent_complete(is_success)
            return None

        feature_id = int(match.group(1))
@@ -91,6 +110,7 @@ class AgentTracker:
                self.active_agents[feature_id] = {
                    'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
                    'agent_index': agent_index,
+                    'agent_type': 'coding',
                    'state': 'thinking',
                    'feature_name': f'Feature #{feature_id}',
                    'last_thought': None,
@@ -119,6 +139,7 @@ class AgentTracker:
                    'type': 'agent_update',
                    'agentIndex': agent['agent_index'],
                    'agentName': agent['name'],
+                    'agentType': agent['agent_type'],
                    'featureId': feature_id,
                    'featureName': agent['feature_name'],
                    'state': state,
@@ -128,6 +149,108 @@ class AgentTracker:

        return None

+    async def _process_testing_agent_line(self, content: str) -> dict | None:
+        """Process output from a testing agent."""
+        async with self._lock:
+            # Ensure testing agent is tracked
+            if self.TESTING_AGENT_KEY not in self.active_agents:
+                agent_index = self._next_agent_index
+                self._next_agent_index += 1
+                self.active_agents[self.TESTING_AGENT_KEY] = {
+                    'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
+                    'agent_index': agent_index,
+                    'agent_type': 'testing',
+                    'state': 'testing',
+                    'feature_name': 'Regression Testing',
+                    'last_thought': None,
+                }
+
+            agent = self.active_agents[self.TESTING_AGENT_KEY]
+
+            # Detect state and thought from content
+            state = 'testing'
+            thought = None
+
+            for pattern, detected_state in THOUGHT_PATTERNS:
+                m = pattern.search(content)
+                if m:
+                    state = detected_state
+                    thought = m.group(1) if m.lastindex else content[:100]
+                    break
+
+            # Only emit update if state changed or we have a new thought
+            if state != agent['state'] or thought != agent['last_thought']:
+                agent['state'] = state
+                if thought:
+                    agent['last_thought'] = thought
+
+                return {
+                    'type': 'agent_update',
+                    'agentIndex': agent['agent_index'],
+                    'agentName': agent['name'],
+                    'agentType': 'testing',
+                    'featureId': 0,  # Testing agents work on random features
+                    'featureName': agent['feature_name'],
+                    'state': state,
+                    'thought': thought,
+                    'timestamp': datetime.now().isoformat(),
+                }
+
+        return None
+
+    async def _handle_testing_agent_start(self, line: str) -> dict | None:
+        """Handle testing agent start message from orchestrator."""
+        async with self._lock:
+            agent_index = self._next_agent_index
+            self._next_agent_index += 1
+
+            self.active_agents[self.TESTING_AGENT_KEY] = {
+                'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
+                'agent_index': agent_index,
+                'agent_type': 'testing',
+                'state': 'testing',
+                'feature_name': 'Regression Testing',
+                'last_thought': 'Starting regression tests...',
+            }
+
+            return {
+                'type': 'agent_update',
+                'agentIndex': agent_index,
+                'agentName': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
+                'agentType': 'testing',
+                'featureId': 0,
+                'featureName': 'Regression Testing',
+                'state': 'testing',
+                'thought': 'Starting regression tests...',
+                'timestamp': datetime.now().isoformat(),
+            }
+
+    async def _handle_testing_agent_complete(self, is_success: bool) -> dict | None:
+        """Handle testing agent completion."""
+        async with self._lock:
+            if self.TESTING_AGENT_KEY not in self.active_agents:
+                return None
+
+            agent = self.active_agents[self.TESTING_AGENT_KEY]
+            state = 'success' if is_success else 'error'
+
+            result = {
+                'type': 'agent_update',
+                'agentIndex': agent['agent_index'],
+                'agentName': agent['name'],
+                'agentType': 'testing',
+                'featureId': 0,
+                'featureName': agent['feature_name'],
+                'state': state,
+                'thought': 'Tests passed!' if is_success else 'Found regressions',
+                'timestamp': datetime.now().isoformat(),
+            }
+
+            # Remove from active agents
+            del self.active_agents[self.TESTING_AGENT_KEY]
+
+            return result
+
    def get_agent_info(self, feature_id: int) -> tuple[int | None, str | None]:
        """Get agent index and name for a feature ID.

@@ -139,7 +262,7 @@ class AgentTracker:
            return agent['agent_index'], agent['name']
        return None, None

-    async def _handle_agent_start(self, feature_id: int, line: str) -> dict | None:
+    async def _handle_agent_start(self, feature_id: int, line: str, agent_type: str = "coding") -> dict | None:
        """Handle agent start message from orchestrator."""
        async with self._lock:
            agent_index = self._next_agent_index
@@ -154,6 +277,7 @@ class AgentTracker:
            self.active_agents[feature_id] = {
                'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
                'agent_index': agent_index,
+                'agent_type': agent_type,
                'state': 'thinking',
                'feature_name': feature_name,
                'last_thought': 'Starting work...',
@@ -163,6 +287,7 @@ class AgentTracker:
                'type': 'agent_update',
                'agentIndex': agent_index,
                'agentName': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
+                'agentType': agent_type,
                'featureId': feature_id,
                'featureName': feature_name,
                'state': 'thinking',
@@ -178,11 +303,13 @@ class AgentTracker:

            agent = self.active_agents[feature_id]
            state = 'success' if is_success else 'error'
+            agent_type = agent.get('agent_type', 'coding')

            result = {
                'type': 'agent_update',
                'agentIndex': agent['agent_index'],
                'agentName': agent['name'],
+                'agentType': agent_type,
                'featureId': feature_id,
                'featureName': agent['feature_name'],
                'state': state,