feat: decouple regression testing agents from coding agents

Major refactoring of the parallel orchestrator to run regression testing agents independently from coding agents. This improves system reliability and provides better control over testing behavior. Key changes: Database & MCP Layer: - Add testing_in_progress and last_tested_at columns to Feature model - Add feature_claim_for_testing() for atomic test claim with retry - Add feature_release_testing() to release claims after testing - Refactor claim functions to iterative loops (no recursion) - Add OperationalError retry handling for transient DB errors - Reduce MAX_CLAIM_RETRIES from 10 to 5 Orchestrator: - Decouple testing agent lifecycle from coding agents - Add _maintain_testing_agents() for continuous testing maintenance - Fix TOCTOU race in _spawn_testing_agent() - hold lock during spawn - Add _cleanup_stale_testing_locks() with 30-min timeout - Fix log ordering - start_session() before stale flag cleanup - Add stale testing_in_progress cleanup on startup Dead Code Removal: - Remove count_testing_in_concurrency from entire stack (12+ files) - Remove ineffective with_for_update() from features router API & UI: - Pass testing_agent_ratio via CLI to orchestrator - Update testing prompt template to use new claim/release tools - Rename UI label to "Regression Agents" with clearer description - Add process_utils.py for cross-platform process tree management Testing agents now: - Run continuously as long as passing features exist - Can re-test features multiple times to catch regressions - Are controlled by fixed count (0-3) via testing_agent_ratio setting - Have atomic claiming to prevent concurrent testing of same feature Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-17 02:43:09 +00:00 · 2026-01-22 15:22:48 +02:00
parent 29c6b252a9
commit 357083dbae
20 changed files with 841 additions and 382 deletions
--- a/server/websocket.py
+++ b/server/websocket.py
@@ -199,13 +199,23 @@ class AgentTracker:
        return None

    async def _handle_testing_agent_start(self, line: str) -> dict | None:
-        """Handle testing agent start message from orchestrator."""
+        """Handle testing agent start message from orchestrator.
+
+        Reuses existing testing agent entry if present to avoid ghost agents in UI.
+        """
        async with self._lock:
-            agent_index = self._next_agent_index
-            self._next_agent_index += 1
+            # Reuse existing testing agent entry if present
+            existing = self.active_agents.get(self.TESTING_AGENT_KEY)
+            if existing:
+                agent_index = existing['agent_index']
+                agent_name = existing['name']
+            else:
+                agent_index = self._next_agent_index
+                self._next_agent_index += 1
+                agent_name = AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)]

            self.active_agents[self.TESTING_AGENT_KEY] = {
-                'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
+                'name': agent_name,
                'agent_index': agent_index,
                'agent_type': 'testing',
                'state': 'testing',
@@ -216,7 +226,7 @@ class AgentTracker:
            return {
                'type': 'agent_update',
                'agentIndex': agent_index,
-                'agentName': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
+                'agentName': agent_name,
                'agentType': 'testing',
                'featureId': 0,
                'featureName': 'Regression Testing',
@@ -251,16 +261,31 @@ class AgentTracker:

            return result

-    def get_agent_info(self, feature_id: int) -> tuple[int | None, str | None]:
+    async def get_agent_info(self, feature_id: int) -> tuple[int | None, str | None]:
        """Get agent index and name for a feature ID.

+        Thread-safe method that acquires the lock before reading state.
+
        Returns:
            Tuple of (agentIndex, agentName) or (None, None) if not tracked.
        """
-        agent = self.active_agents.get(feature_id)
-        if agent:
-            return agent['agent_index'], agent['name']
-        return None, None
+        async with self._lock:
+            agent = self.active_agents.get(feature_id)
+            if agent:
+                return agent['agent_index'], agent['name']
+            return None, None
+
+    async def reset(self):
+        """Reset tracker state when orchestrator stops or crashes.
+
+        Clears all active agents and resets the index counter to prevent
+        ghost agents accumulating across start/stop cycles.
+
+        Must be called with await since it acquires the async lock.
+        """
+        async with self._lock:
+            self.active_agents.clear()
+            self._next_agent_index = 0

    async def _handle_agent_start(self, feature_id: int, line: str, agent_type: str = "coding") -> dict | None:
        """Handle agent start message from orchestrator."""
@@ -482,7 +507,7 @@ async def project_websocket(websocket: WebSocket, project_name: str):
            match = FEATURE_ID_PATTERN.match(line)
            if match:
                feature_id = int(match.group(1))
-                agent_index, _ = agent_tracker.get_agent_info(feature_id)
+                agent_index, _ = await agent_tracker.get_agent_info(feature_id)

            # Send the raw log line with optional feature/agent attribution
            log_msg = {
@@ -512,6 +537,9 @@ async def project_websocket(websocket: WebSocket, project_name: str):
                "type": "agent_status",
                "status": status,
            })
+            # Reset tracker when agent stops OR crashes to prevent ghost agents on restart
+            if status in ("stopped", "crashed"):
+                await agent_tracker.reset()
        except Exception:
            pass  # Connection may be closed