refactor: orchestrator pre-selects features for all agents

Replace agent-initiated feature selection with orchestrator pre-selection for both coding and testing agents. This ensures Mission Control displays correct feature numbers for testing agents (previously showed "Feature #0"). Key changes: MCP Server (mcp_server/feature_mcp.py): - Add feature_get_by_id tool for agents to fetch assigned feature details - Remove obsolete tools: feature_get_next, feature_claim_next, feature_claim_for_testing, feature_get_for_regression - Remove helper functions and unused imports (text, OperationalError, func) Orchestrator (parallel_orchestrator.py): - Change running_testing_agents from list to dict[int, Popen] - Add claim_feature_for_testing() with random selection - Add release_testing_claim() method - Pass --testing-feature-id to spawned testing agents - Use unified [Feature #X] output format for both agent types Agent Entry Points: - autonomous_agent_demo.py: Add --testing-feature-id CLI argument - agent.py: Pass testing_feature_id to get_testing_prompt() Prompt Templates: - coding_prompt.template.md: Update to use feature_get_by_id - testing_prompt.template.md: Update workflow for pre-assigned features - prompts.py: Update pre-claimed headers for both agent types WebSocket (server/websocket.py): - Simplify tracking with unified [Feature #X] pattern - Remove testing-specific parsing code Assistant (server/services/assistant_chat_session.py): - Update help text with current available tools Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-03-17 02:43:09 +00:00 · 2026-01-22 16:24:48 +02:00
parent 357083dbae
commit b00eef5eca
10 changed files with 277 additions and 564 deletions
--- a/parallel_orchestrator.py
+++ b/parallel_orchestrator.py
@@ -173,8 +173,8 @@ class ParallelOrchestrator:
        self._lock = threading.Lock()
        # Coding agents: feature_id -> process
        self.running_coding_agents: dict[int, subprocess.Popen] = {}
-        # Testing agents: list of processes (not tied to specific features)
-        self.running_testing_agents: list[subprocess.Popen] = []
+        # Testing agents: feature_id -> process (feature being tested)
+        self.running_testing_agents: dict[int, subprocess.Popen] = {}
        # Legacy alias for backward compatibility
        self.running_agents = self.running_coding_agents
        self.abort_events: dict[int, threading.Event] = {}
@@ -193,6 +193,75 @@ class ParallelOrchestrator:
        """Get a new database session."""
        return self._session_maker()

+    def claim_feature_for_testing(self) -> int | None:
+        """Claim a random passing feature for regression testing.
+
+        Returns the feature ID if successful, None if no features available.
+        Sets testing_in_progress=True on the claimed feature.
+        """
+        session = self.get_session()
+        try:
+            from sqlalchemy.sql.expression import func
+
+            # Find a passing feature that's not being worked on
+            # Exclude features already being tested by this orchestrator
+            with self._lock:
+                testing_feature_ids = set(self.running_testing_agents.keys())
+
+            candidate = (
+                session.query(Feature)
+                .filter(Feature.passes == True)
+                .filter(Feature.in_progress == False)
+                .filter(Feature.testing_in_progress == False)
+                .filter(~Feature.id.in_(testing_feature_ids) if testing_feature_ids else True)
+                .order_by(func.random())
+                .first()
+            )
+
+            if not candidate:
+                return None
+
+            # Atomic claim using UPDATE with WHERE clause
+            result = session.execute(
+                text("""
+                    UPDATE features
+                    SET testing_in_progress = 1
+                    WHERE id = :feature_id
+                      AND passes = 1
+                      AND in_progress = 0
+                      AND testing_in_progress = 0
+                """),
+                {"feature_id": candidate.id}
+            )
+            session.commit()
+
+            if result.rowcount == 0:
+                # Another process claimed it
+                return None
+
+            return candidate.id
+        except Exception as e:
+            session.rollback()
+            debug_log.log("TESTING", f"Failed to claim feature for testing: {e}")
+            return None
+        finally:
+            session.close()
+
+    def release_testing_claim(self, feature_id: int):
+        """Release a testing claim on a feature (called when testing agent exits)."""
+        session = self.get_session()
+        try:
+            session.execute(
+                text("UPDATE features SET testing_in_progress = 0 WHERE id = :feature_id"),
+                {"feature_id": feature_id}
+            )
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            debug_log.log("TESTING", f"Failed to release testing claim for feature {feature_id}: {e}")
+        finally:
+            session.close()
+
    def get_resumable_features(self) -> list[dict]:
        """Get features that were left in_progress from a previous session.

@@ -563,13 +632,11 @@ class ParallelOrchestrator:
    def _spawn_testing_agent(self) -> tuple[bool, str]:
        """Spawn a testing agent subprocess for regression testing.

-        CRITICAL: Lock is held during the entire spawn operation to prevent
-        TOCTOU race conditions where multiple threads could pass limit checks
-        and spawn excess agents.
+        Claims a feature BEFORE spawning the agent (same pattern as coding agents).
+        This ensures we know which feature is being tested for UI display.
        """
-        # Hold lock for entire operation to prevent TOCTOU race
+        # Check limits first (under lock)
        with self._lock:
-            # Check limits
            current_testing_count = len(self.running_testing_agents)
            if current_testing_count >= self.max_concurrency:
                debug_log.log("TESTING", f"Skipped spawn - at max testing agents ({current_testing_count}/{self.max_concurrency})")
@@ -579,7 +646,21 @@ class ParallelOrchestrator:
                debug_log.log("TESTING", f"Skipped spawn - at max total agents ({total_agents}/{MAX_TOTAL_AGENTS})")
                return False, f"At max total agents ({total_agents})"

-            debug_log.log("TESTING", "Attempting to spawn testing agent subprocess")
+        # Claim a feature for testing (outside lock to avoid holding during DB ops)
+        feature_id = self.claim_feature_for_testing()
+        if feature_id is None:
+            debug_log.log("TESTING", "No features available for testing")
+            return False, "No features available for testing"
+
+        debug_log.log("TESTING", f"Claimed feature #{feature_id} for testing")
+
+        # Now spawn with the claimed feature ID
+        with self._lock:
+            # Re-check limits in case another thread spawned while we were claiming
+            current_testing_count = len(self.running_testing_agents)
+            if current_testing_count >= self.max_concurrency:
+                self.release_testing_claim(feature_id)
+                return False, f"At max testing agents ({current_testing_count})"

            cmd = [
                sys.executable,
@@ -588,10 +669,10 @@ class ParallelOrchestrator:
                "--project-dir", str(self.project_dir),
                "--max-iterations", "1",
                "--agent-type", "testing",
+                "--testing-feature-id", str(feature_id),
            ]
            if self.model:
                cmd.extend(["--model", self.model])
-            # Testing agents don't need --yolo flag (they use testing prompt regardless)

            try:
                proc = subprocess.Popen(
@@ -604,25 +685,26 @@ class ParallelOrchestrator:
                )
            except Exception as e:
                debug_log.log("TESTING", f"FAILED to spawn testing agent: {e}")
+                self.release_testing_claim(feature_id)
                return False, f"Failed to start testing agent: {e}"

-            # Register process immediately while still holding lock
-            self.running_testing_agents.append(proc)
+            # Register process with feature ID (same pattern as coding agents)
+            self.running_testing_agents[feature_id] = proc
            testing_count = len(self.running_testing_agents)

-        # Start output reader thread (feature_id=None for testing agents)
-        # This can be outside lock since process is already registered
+        # Start output reader thread with feature ID (same as coding agents)
        threading.Thread(
            target=self._read_output,
-            args=(None, proc, threading.Event(), "testing"),
+            args=(feature_id, proc, threading.Event(), "testing"),
            daemon=True
        ).start()

-        print(f"Started testing agent (PID {proc.pid})", flush=True)
-        debug_log.log("TESTING", "Successfully spawned testing agent",
+        print(f"Started testing agent for feature #{feature_id} (PID {proc.pid})", flush=True)
+        debug_log.log("TESTING", f"Successfully spawned testing agent for feature #{feature_id}",
            pid=proc.pid,
+            feature_id=feature_id,
            total_testing_agents=testing_count)
-        return True, "Started testing agent"
+        return True, f"Started testing agent for feature #{feature_id}"

    async def _run_initializer(self) -> bool:
        """Run initializer agent as blocking subprocess.
@@ -706,10 +788,8 @@ class ParallelOrchestrator:
                if self.on_output:
                    self.on_output(feature_id or 0, line)
                else:
-                    if agent_type == "testing":
-                        print(f"[Testing] {line}", flush=True)
-                    else:
-                        print(f"[Feature #{feature_id}] {line}", flush=True)
+                    # Both coding and testing agents now use [Feature #X] format
+                    print(f"[Feature #{feature_id}] {line}", flush=True)
            proc.wait()
        finally:
            self._on_agent_complete(feature_id, proc.returncode, agent_type, proc)
@@ -730,17 +810,27 @@ class ParallelOrchestrator:
          is safe.

        For testing agents:
-        - Just remove from the running list.
+        - Remove from running dict and release testing claim on feature.
        """
        if agent_type == "testing":
            with self._lock:
-                if proc in self.running_testing_agents:
-                    self.running_testing_agents.remove(proc)
+                # Remove from dict by finding the feature_id for this proc
+                found_feature_id = None
+                for fid, p in list(self.running_testing_agents.items()):
+                    if p is proc:
+                        found_feature_id = fid
+                        del self.running_testing_agents[fid]
+                        break
+
+            # Release testing claim on the feature
+            if found_feature_id is not None:
+                self.release_testing_claim(found_feature_id)

            status = "completed" if return_code == 0 else "failed"
-            print(f"Testing agent (PID {proc.pid}) {status}", flush=True)
-            debug_log.log("COMPLETE", "Testing agent finished",
+            print(f"Feature #{feature_id} testing {status}", flush=True)
+            debug_log.log("COMPLETE", f"Testing agent for feature #{feature_id} finished",
                pid=proc.pid,
+                feature_id=feature_id,
                status=status)
            return

@@ -846,11 +936,12 @@ class ParallelOrchestrator:

        # Stop testing agents
        with self._lock:
-            testing_procs = list(self.running_testing_agents)
+            testing_items = list(self.running_testing_agents.items())

-        for proc in testing_procs:
+        for feature_id, proc in testing_items:
            result = kill_process_tree(proc, timeout=5.0)
-            debug_log.log("STOP", f"Killed testing agent PID {proc.pid} process tree",
+            self.release_testing_claim(feature_id)
+            debug_log.log("STOP", f"Killed testing agent for feature #{feature_id} (PID {proc.pid})",
                status=result.status, children_found=result.children_found,
                children_terminated=result.children_terminated, children_killed=result.children_killed)