refactor: remove testing agent claim mechanism for concurrent testing

Remove the testing_in_progress claim/release mechanism from the testing
agent architecture. Multiple testing agents can now test the same feature
concurrently, simplifying the system and eliminating potential stale lock
issues.

Changes:
- parallel_orchestrator.py:
  - Remove claim_feature_for_testing() and release_testing_claim() methods
  - Remove _cleanup_stale_testing_locks() periodic cleanup
  - Replace with simple _get_random_passing_feature() selection
  - Remove startup stale lock cleanup code
  - Remove STALE_TESTING_LOCK_MINUTES constant
  - Remove unused imports (timedelta, text)

- api/database.py:
  - Remove testing_in_progress and last_tested_at columns from Feature model
  - Update to_dict() to exclude these fields
  - Convert _migrate_add_testing_columns() to no-op for backwards compat

- mcp_server/feature_mcp.py:
  - Remove feature_release_testing tool entirely
  - Remove unused datetime import

- prompts.py:
  - Update testing prompt to remove feature_release_testing instruction
  - Testing agents now just verify and exit (no cleanup needed)

- server/websocket.py:
  - Update AgentTracker to use composite keys (feature_id, agent_type)
  - Prevents ghost agent creation from ambiguous [Feature #X] messages
  - Proper separation of coding vs testing agent tracking

Benefits:
- Eliminates artificial bottleneck from claim coordination
- No stale locks to clean up after crashes
- Simpler crash recovery (no testing state to restore)
- Reduced database writes (no claim/release transactions)
- Matches intended design: random, concurrent regression testing

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Auto
2026-01-23 15:30:31 +02:00
parent 874359fcf6
commit 486979c3d9
5 changed files with 83 additions and 247 deletions

View File

@@ -57,10 +57,6 @@ class Feature(Base):
# Dependencies: list of feature IDs that must be completed before this feature # Dependencies: list of feature IDs that must be completed before this feature
# NULL/empty = no dependencies (backwards compatible) # NULL/empty = no dependencies (backwards compatible)
dependencies = Column(JSON, nullable=True, default=None) dependencies = Column(JSON, nullable=True, default=None)
# Regression testing: prevent concurrent testing of the same feature
testing_in_progress = Column(Boolean, nullable=False, default=False, index=True)
# Last time this feature was tested (for session-based regression tracking)
last_tested_at = Column(DateTime, nullable=True, default=None)
def to_dict(self) -> dict: def to_dict(self) -> dict:
"""Convert feature to dictionary for JSON serialization.""" """Convert feature to dictionary for JSON serialization."""
@@ -76,9 +72,6 @@ class Feature(Base):
"in_progress": self.in_progress if self.in_progress is not None else False, "in_progress": self.in_progress if self.in_progress is not None else False,
# Dependencies: NULL/empty treated as empty list for backwards compat # Dependencies: NULL/empty treated as empty list for backwards compat
"dependencies": self.dependencies if self.dependencies else [], "dependencies": self.dependencies if self.dependencies else [],
# Regression testing fields
"testing_in_progress": self.testing_in_progress if self.testing_in_progress is not None else False,
"last_tested_at": self.last_tested_at.isoformat() if self.last_tested_at else None,
} }
def get_dependencies_safe(self) -> list[int]: def get_dependencies_safe(self) -> list[int]:
@@ -240,23 +233,18 @@ def _migrate_add_dependencies_column(engine) -> None:
def _migrate_add_testing_columns(engine) -> None: def _migrate_add_testing_columns(engine) -> None:
"""Add testing_in_progress and last_tested_at columns for regression testing. """Legacy migration - no longer adds testing columns.
These columns support atomic claiming of features for regression testing The testing_in_progress and last_tested_at columns were removed from the
and tracking when features were last tested in a session. Feature model as part of simplifying the testing agent architecture.
Multiple testing agents can now test the same feature concurrently
without coordination.
This function is kept for backwards compatibility but does nothing.
Existing databases with these columns will continue to work - the columns
are simply ignored.
""" """
with engine.connect() as conn: pass
# Check existing columns
result = conn.execute(text("PRAGMA table_info(features)"))
columns = [row[1] for row in result.fetchall()]
if "testing_in_progress" not in columns:
conn.execute(text("ALTER TABLE features ADD COLUMN testing_in_progress BOOLEAN DEFAULT 0"))
conn.commit()
if "last_tested_at" not in columns:
conn.execute(text("ALTER TABLE features ADD COLUMN last_tested_at DATETIME DEFAULT NULL"))
conn.commit()
def _is_network_path(path: Path) -> bool: def _is_network_path(path: Path) -> bool:

View File

@@ -15,7 +15,6 @@ Tools:
- feature_mark_in_progress: Mark a feature as in-progress - feature_mark_in_progress: Mark a feature as in-progress
- feature_claim_and_get: Atomically claim and get feature details - feature_claim_and_get: Atomically claim and get feature details
- feature_clear_in_progress: Clear in-progress status - feature_clear_in_progress: Clear in-progress status
- feature_release_testing: Release testing lock on a feature
- feature_create_bulk: Create multiple features at once - feature_create_bulk: Create multiple features at once
- feature_create: Create a single feature - feature_create: Create a single feature
- feature_add_dependency: Add a dependency between features - feature_add_dependency: Add a dependency between features
@@ -33,7 +32,6 @@ import os
import sys import sys
import threading import threading
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Annotated from typing import Annotated
@@ -228,57 +226,6 @@ def feature_get_summary(
session.close() session.close()
@mcp.tool()
def feature_release_testing(
feature_id: Annotated[int, Field(description="The ID of the feature to release", ge=1)],
tested_ok: Annotated[bool, Field(description="True if the feature passed testing, False if regression found")] = True
) -> str:
"""Release a feature after regression testing completes.
Clears the testing_in_progress flag and updates last_tested_at timestamp.
This should be called after testing is complete, whether the feature
passed or failed. If tested_ok=False, the feature was marked as failing
by a previous call to feature_mark_failing.
Args:
feature_id: The ID of the feature that was being tested
tested_ok: True if testing passed, False if a regression was found
Returns:
JSON with release confirmation or error message.
"""
session = get_session()
try:
feature = session.query(Feature).filter(Feature.id == feature_id).first()
if feature is None:
return json.dumps({"error": f"Feature with ID {feature_id} not found"})
if not feature.testing_in_progress:
return json.dumps({
"warning": f"Feature {feature_id} was not being tested",
"feature": feature.to_dict()
})
# Clear testing flag and update timestamp
feature.testing_in_progress = False
feature.last_tested_at = datetime.now(timezone.utc)
session.commit()
session.refresh(feature)
status = "passed" if tested_ok else "failed (regression detected)"
return json.dumps({
"message": f"Feature #{feature_id} testing {status}",
"feature": feature.to_dict()
})
except Exception as e:
session.rollback()
return json.dumps({"error": f"Failed to release testing claim: {str(e)}"})
finally:
session.close()
@mcp.tool() @mcp.tool()
def feature_mark_passing( def feature_mark_passing(
feature_id: Annotated[int, Field(description="The ID of the feature to mark as passing", ge=1)] feature_id: Annotated[int, Field(description="The ID of the feature to mark as passing", ge=1)]

View File

@@ -23,12 +23,10 @@ import os
import subprocess import subprocess
import sys import sys
import threading import threading
from datetime import datetime, timedelta, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Callable, Literal from typing import Callable, Literal
from sqlalchemy import text
from api.database import Feature, create_database from api.database import Feature, create_database
from api.dependency_resolver import are_dependencies_satisfied, compute_scheduling_scores from api.dependency_resolver import are_dependencies_satisfied, compute_scheduling_scores
from progress import has_features from progress import has_features
@@ -126,7 +124,6 @@ DEFAULT_CONCURRENCY = 3
POLL_INTERVAL = 5 # seconds between checking for ready features POLL_INTERVAL = 5 # seconds between checking for ready features
MAX_FEATURE_RETRIES = 3 # Maximum times to retry a failed feature MAX_FEATURE_RETRIES = 3 # Maximum times to retry a failed feature
INITIALIZER_TIMEOUT = 1800 # 30 minutes timeout for initializer INITIALIZER_TIMEOUT = 1800 # 30 minutes timeout for initializer
STALE_TESTING_LOCK_MINUTES = 30 # Auto-release testing locks older than this
class ParallelOrchestrator: class ParallelOrchestrator:
@@ -199,72 +196,28 @@ class ParallelOrchestrator:
"""Get a new database session.""" """Get a new database session."""
return self._session_maker() return self._session_maker()
def claim_feature_for_testing(self) -> int | None: def _get_random_passing_feature(self) -> int | None:
"""Claim a random passing feature for regression testing. """Get a random passing feature for regression testing (no claim needed).
Returns the feature ID if successful, None if no features available. Testing agents can test the same feature concurrently - it doesn't matter.
Sets testing_in_progress=True on the claimed feature. This simplifies the architecture by removing unnecessary coordination.
Returns the feature ID if available, None if no passing features exist.
""" """
session = self.get_session()
try:
from sqlalchemy.sql.expression import func from sqlalchemy.sql.expression import func
# Find a passing feature that's not being worked on session = self.get_session()
# Exclude features already being tested by this orchestrator try:
with self._lock: # Find a passing feature that's not currently being coded
testing_feature_ids = set(self.running_testing_agents.keys()) # Multiple testing agents can test the same feature - that's fine
feature = (
candidate = (
session.query(Feature) session.query(Feature)
.filter(Feature.passes == True) .filter(Feature.passes == True)
.filter(Feature.in_progress == False) .filter(Feature.in_progress == False) # Don't test while coding
.filter(Feature.testing_in_progress == False)
.filter(~Feature.id.in_(testing_feature_ids) if testing_feature_ids else True)
.order_by(func.random()) .order_by(func.random())
.first() .first()
) )
return feature.id if feature else None
if not candidate:
return None
# Atomic claim using UPDATE with WHERE clause
result = session.execute(
text("""
UPDATE features
SET testing_in_progress = 1
WHERE id = :feature_id
AND passes = 1
AND in_progress = 0
AND testing_in_progress = 0
"""),
{"feature_id": candidate.id}
)
session.commit()
if result.rowcount == 0:
# Another process claimed it
return None
return candidate.id
except Exception as e:
session.rollback()
debug_log.log("TESTING", f"Failed to claim feature for testing: {e}")
return None
finally:
session.close()
def release_testing_claim(self, feature_id: int):
"""Release a testing claim on a feature (called when testing agent exits)."""
session = self.get_session()
try:
session.execute(
text("UPDATE features SET testing_in_progress = 0 WHERE id = :feature_id"),
{"feature_id": feature_id}
)
session.commit()
except Exception as e:
session.rollback()
debug_log.log("TESTING", f"Failed to release testing claim for feature {feature_id}: {e}")
finally: finally:
session.close() session.close()
@@ -424,55 +377,6 @@ class ParallelOrchestrator:
finally: finally:
session.close() session.close()
def _cleanup_stale_testing_locks(self) -> None:
"""Release stale testing locks from crashed testing agents.
A feature is considered stale if:
- testing_in_progress=True AND
- last_tested_at is NOT NULL AND older than STALE_TESTING_LOCK_MINUTES
Note: We do NOT release features with last_tested_at=NULL because that would
incorrectly release features that are legitimately in the middle of their
first test. The last_tested_at is only set when testing completes.
This handles the case where a testing agent crashes mid-test, leaving
the feature locked until orchestrator restart. By checking periodically,
we can release these locks without requiring a restart.
"""
session = self.get_session()
try:
# Use timezone-aware UTC, then strip timezone for SQLite compatibility
# (SQLite stores datetimes as naive strings, but we want consistency with
# datetime.now(timezone.utc) used elsewhere in the codebase)
cutoff_time = (datetime.now(timezone.utc) - timedelta(minutes=STALE_TESTING_LOCK_MINUTES)).replace(tzinfo=None)
# Find stale locks: testing_in_progress=True AND last_tested_at < cutoff
# Excludes NULL last_tested_at to avoid false positives on first-time tests
stale_features = (
session.query(Feature)
.filter(Feature.testing_in_progress == True)
.filter(Feature.last_tested_at.isnot(None))
.filter(Feature.last_tested_at < cutoff_time)
.all()
)
if stale_features:
stale_ids = [f.id for f in stale_features]
# Use ORM update instead of raw SQL for SQLite IN clause compatibility
session.query(Feature).filter(Feature.id.in_(stale_ids)).update(
{"testing_in_progress": False},
synchronize_session=False
)
session.commit()
print(f"[CLEANUP] Released {len(stale_ids)} stale testing locks: {stale_ids}", flush=True)
debug_log.log("CLEANUP", "Released stale testing locks", feature_ids=stale_ids)
except Exception as e:
session.rollback()
print(f"[CLEANUP] Error cleaning stale locks: {e}", flush=True)
debug_log.log("CLEANUP", f"Error cleaning stale locks: {e}")
finally:
session.close()
def _maintain_testing_agents(self) -> None: def _maintain_testing_agents(self) -> None:
"""Maintain the desired count of testing agents independently. """Maintain the desired count of testing agents independently.
@@ -480,8 +384,8 @@ class ParallelOrchestrator:
the configured testing_agent_ratio. Testing agents run independently from the configured testing_agent_ratio. Testing agents run independently from
coding agents and continuously re-test passing features to catch regressions. coding agents and continuously re-test passing features to catch regressions.
Also periodically releases stale testing locks (features stuck in Multiple testing agents can test the same feature concurrently - this is
testing_in_progress=True for more than STALE_TESTING_LOCK_MINUTES). intentional and simplifies the architecture by removing claim coordination.
Stops spawning when: Stops spawning when:
- YOLO mode is enabled - YOLO mode is enabled
@@ -492,11 +396,6 @@ class ParallelOrchestrator:
if self.yolo_mode or self.testing_agent_ratio == 0: if self.yolo_mode or self.testing_agent_ratio == 0:
return return
# Periodically clean up stale testing locks (features stuck mid-test due to crash)
# A feature is considered stale if testing_in_progress=True and last_tested_at
# is either NULL or older than STALE_TESTING_LOCK_MINUTES
self._cleanup_stale_testing_locks()
# No testing until there are passing features # No testing until there are passing features
passing_count = self.get_passing_count() passing_count = self.get_passing_count()
if passing_count == 0: if passing_count == 0:
@@ -641,8 +540,9 @@ class ParallelOrchestrator:
def _spawn_testing_agent(self) -> tuple[bool, str]: def _spawn_testing_agent(self) -> tuple[bool, str]:
"""Spawn a testing agent subprocess for regression testing. """Spawn a testing agent subprocess for regression testing.
Claims a feature BEFORE spawning the agent (same pattern as coding agents). Picks a random passing feature to test. Multiple testing agents can test
This ensures we know which feature is being tested for UI display. the same feature concurrently - this is intentional and simplifies the
architecture by removing claim coordination.
""" """
# Check limits first (under lock) # Check limits first (under lock)
with self._lock: with self._lock:
@@ -655,20 +555,19 @@ class ParallelOrchestrator:
debug_log.log("TESTING", f"Skipped spawn - at max total agents ({total_agents}/{MAX_TOTAL_AGENTS})") debug_log.log("TESTING", f"Skipped spawn - at max total agents ({total_agents}/{MAX_TOTAL_AGENTS})")
return False, f"At max total agents ({total_agents})" return False, f"At max total agents ({total_agents})"
# Claim a feature for testing (outside lock to avoid holding during DB ops) # Pick a random passing feature (no claim needed - concurrent testing is fine)
feature_id = self.claim_feature_for_testing() feature_id = self._get_random_passing_feature()
if feature_id is None: if feature_id is None:
debug_log.log("TESTING", "No features available for testing") debug_log.log("TESTING", "No features available for testing")
return False, "No features available for testing" return False, "No features available for testing"
debug_log.log("TESTING", f"Claimed feature #{feature_id} for testing") debug_log.log("TESTING", f"Selected feature #{feature_id} for testing")
# Now spawn with the claimed feature ID # Spawn the testing agent
with self._lock: with self._lock:
# Re-check limits in case another thread spawned while we were claiming # Re-check limits in case another thread spawned while we were selecting
current_testing_count = len(self.running_testing_agents) current_testing_count = len(self.running_testing_agents)
if current_testing_count >= self.max_concurrency: if current_testing_count >= self.max_concurrency:
self.release_testing_claim(feature_id)
return False, f"At max testing agents ({current_testing_count})" return False, f"At max testing agents ({current_testing_count})"
cmd = [ cmd = [
@@ -694,7 +593,6 @@ class ParallelOrchestrator:
) )
except Exception as e: except Exception as e:
debug_log.log("TESTING", f"FAILED to spawn testing agent: {e}") debug_log.log("TESTING", f"FAILED to spawn testing agent: {e}")
self.release_testing_claim(feature_id)
return False, f"Failed to start testing agent: {e}" return False, f"Failed to start testing agent: {e}"
# Register process with feature ID (same pattern as coding agents) # Register process with feature ID (same pattern as coding agents)
@@ -865,22 +763,16 @@ class ParallelOrchestrator:
is safe. is safe.
For testing agents: For testing agents:
- Remove from running dict and release testing claim on feature. - Remove from running dict (no claim to release - concurrent testing is allowed).
""" """
if agent_type == "testing": if agent_type == "testing":
with self._lock: with self._lock:
# Remove from dict by finding the feature_id for this proc # Remove from dict by finding the feature_id for this proc
found_feature_id = None
for fid, p in list(self.running_testing_agents.items()): for fid, p in list(self.running_testing_agents.items()):
if p is proc: if p is proc:
found_feature_id = fid
del self.running_testing_agents[fid] del self.running_testing_agents[fid]
break break
# Release testing claim on the feature
if found_feature_id is not None:
self.release_testing_claim(found_feature_id)
status = "completed" if return_code == 0 else "failed" status = "completed" if return_code == 0 else "failed"
print(f"Feature #{feature_id} testing {status}", flush=True) print(f"Feature #{feature_id} testing {status}", flush=True)
debug_log.log("COMPLETE", f"Testing agent for feature #{feature_id} finished", debug_log.log("COMPLETE", f"Testing agent for feature #{feature_id} finished",
@@ -974,13 +866,12 @@ class ParallelOrchestrator:
for fid in feature_ids: for fid in feature_ids:
self.stop_feature(fid) self.stop_feature(fid)
# Stop testing agents # Stop testing agents (no claim to release - concurrent testing is allowed)
with self._lock: with self._lock:
testing_items = list(self.running_testing_agents.items()) testing_items = list(self.running_testing_agents.items())
for feature_id, proc in testing_items: for feature_id, proc in testing_items:
result = kill_process_tree(proc, timeout=5.0) result = kill_process_tree(proc, timeout=5.0)
self.release_testing_claim(feature_id)
debug_log.log("STOP", f"Killed testing agent for feature #{feature_id} (PID {proc.pid})", debug_log.log("STOP", f"Killed testing agent for feature #{feature_id} (PID {proc.pid})",
status=result.status, children_found=result.children_found, status=result.status, children_found=result.children_found,
children_terminated=result.children_terminated, children_killed=result.children_killed) children_terminated=result.children_terminated, children_killed=result.children_killed)
@@ -1002,19 +893,6 @@ class ParallelOrchestrator:
# Must happen before any debug_log.log() calls # Must happen before any debug_log.log() calls
debug_log.start_session() debug_log.start_session()
# Clear stale testing_in_progress flags from crashed testing agents
# This ensures features aren't permanently locked if a previous session crashed
session = self.get_session()
try:
stale_count = session.query(Feature).filter(Feature.testing_in_progress == True).count()
if stale_count > 0:
session.execute(text("UPDATE features SET testing_in_progress = 0 WHERE testing_in_progress = 1"))
session.commit()
print(f"[STARTUP] Cleared {stale_count} stale testing_in_progress flags", flush=True)
debug_log.log("STARTUP", f"Cleared {stale_count} stale testing_in_progress flags")
finally:
session.close()
# Log startup to debug file # Log startup to debug file
debug_log.section("ORCHESTRATOR STARTUP") debug_log.section("ORCHESTRATOR STARTUP")
debug_log.log("STARTUP", "Orchestrator run_loop starting", debug_log.log("STARTUP", "Orchestrator run_loop starting",

View File

@@ -93,12 +93,11 @@ def get_testing_prompt(project_dir: Path | None = None, testing_feature_id: int
**You are assigned to regression test Feature #{testing_feature_id}.** **You are assigned to regression test Feature #{testing_feature_id}.**
The orchestrator has already claimed this feature for you.
### Your workflow: ### Your workflow:
1. Call `feature_get_by_id` with ID {testing_feature_id} to get the feature details 1. Call `feature_get_by_id` with ID {testing_feature_id} to get the feature details
2. Verify the feature through the UI using browser automation 2. Verify the feature through the UI using browser automation
3. When done, call `feature_release_testing` with feature_id={testing_feature_id} 3. If regression found, call `feature_mark_failing` with feature_id={testing_feature_id}
4. Exit when done (no cleanup needed)
--- ---

View File

@@ -73,13 +73,14 @@ ORCHESTRATOR_PATTERNS = {
class AgentTracker: class AgentTracker:
"""Tracks active agents and their states for multi-agent mode. """Tracks active agents and their states for multi-agent mode.
Both coding and testing agents are now tracked by their feature ID. Both coding and testing agents are tracked using a composite key of
The agent_type field distinguishes between them. (feature_id, agent_type) to allow simultaneous tracking of both agent
types for the same feature.
""" """
def __init__(self): def __init__(self):
# feature_id -> {name, state, last_thought, agent_index, agent_type} # (feature_id, agent_type) -> {name, state, last_thought, agent_index, agent_type}
self.active_agents: dict[int, dict] = {} self.active_agents: dict[tuple[int, str], dict] = {}
self._next_agent_index = 0 self._next_agent_index = 0
self._lock = asyncio.Lock() self._lock = asyncio.Lock()
@@ -111,14 +112,14 @@ class AgentTracker:
if testing_complete_match: if testing_complete_match:
feature_id = int(testing_complete_match.group(1)) feature_id = int(testing_complete_match.group(1))
is_success = testing_complete_match.group(2) == "completed" is_success = testing_complete_match.group(2) == "completed"
return await self._handle_agent_complete(feature_id, is_success) return await self._handle_agent_complete(feature_id, is_success, agent_type="testing")
# Coding agent complete: "Feature #X completed/failed" (without "testing" keyword) # Coding agent complete: "Feature #X completed/failed" (without "testing" keyword)
if line.startswith("Feature #") and ("completed" in line or "failed" in line) and "testing" not in line: if line.startswith("Feature #") and ("completed" in line or "failed" in line) and "testing" not in line:
try: try:
feature_id = int(re.search(r'#(\d+)', line).group(1)) feature_id = int(re.search(r'#(\d+)', line).group(1))
is_success = "completed" in line is_success = "completed" in line
return await self._handle_agent_complete(feature_id, is_success) return await self._handle_agent_complete(feature_id, is_success, agent_type="coding")
except (AttributeError, ValueError): except (AttributeError, ValueError):
pass pass
@@ -132,11 +133,21 @@ class AgentTracker:
content = match.group(2) content = match.group(2)
async with self._lock: async with self._lock:
# Ensure agent is tracked # Check if either coding or testing agent exists for this feature
if feature_id not in self.active_agents: # This prevents creating ghost agents when a testing agent outputs [Feature #X] lines
coding_key = (feature_id, 'coding')
testing_key = (feature_id, 'testing')
if coding_key in self.active_agents:
key = coding_key
elif testing_key in self.active_agents:
key = testing_key
else:
# Neither exists, create a new coding agent entry (implicit tracking)
key = coding_key
agent_index = self._next_agent_index agent_index = self._next_agent_index
self._next_agent_index += 1 self._next_agent_index += 1
self.active_agents[feature_id] = { self.active_agents[key] = {
'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], 'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
'agent_index': agent_index, 'agent_index': agent_index,
'agent_type': 'coding', 'agent_type': 'coding',
@@ -145,7 +156,7 @@ class AgentTracker:
'last_thought': None, 'last_thought': None,
} }
agent = self.active_agents[feature_id] agent = self.active_agents[key]
# Detect state and thought from content # Detect state and thought from content
state = 'working' state = 'working'
@@ -178,16 +189,21 @@ class AgentTracker:
return None return None
async def get_agent_info(self, feature_id: int) -> tuple[int | None, str | None]: async def get_agent_info(self, feature_id: int, agent_type: str = "coding") -> tuple[int | None, str | None]:
"""Get agent index and name for a feature ID. """Get agent index and name for a feature ID and agent type.
Thread-safe method that acquires the lock before reading state. Thread-safe method that acquires the lock before reading state.
Args:
feature_id: The feature ID to look up.
agent_type: The agent type ("coding" or "testing"). Defaults to "coding".
Returns: Returns:
Tuple of (agentIndex, agentName) or (None, None) if not tracked. Tuple of (agentIndex, agentName) or (None, None) if not tracked.
""" """
async with self._lock: async with self._lock:
agent = self.active_agents.get(feature_id) key = (feature_id, agent_type)
agent = self.active_agents.get(key)
if agent: if agent:
return agent['agent_index'], agent['name'] return agent['agent_index'], agent['name']
return None, None return None, None
@@ -207,6 +223,7 @@ class AgentTracker:
async def _handle_agent_start(self, feature_id: int, line: str, agent_type: str = "coding") -> dict | None: async def _handle_agent_start(self, feature_id: int, line: str, agent_type: str = "coding") -> dict | None:
"""Handle agent start message from orchestrator.""" """Handle agent start message from orchestrator."""
async with self._lock: async with self._lock:
key = (feature_id, agent_type) # Composite key for separate tracking
agent_index = self._next_agent_index agent_index = self._next_agent_index
self._next_agent_index += 1 self._next_agent_index += 1
@@ -216,7 +233,7 @@ class AgentTracker:
if name_match: if name_match:
feature_name = name_match.group(1) feature_name = name_match.group(1)
self.active_agents[feature_id] = { self.active_agents[key] = {
'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], 'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
'agent_index': agent_index, 'agent_index': agent_index,
'agent_type': agent_type, 'agent_type': agent_type,
@@ -237,26 +254,33 @@ class AgentTracker:
'timestamp': datetime.now().isoformat(), 'timestamp': datetime.now().isoformat(),
} }
async def _handle_agent_complete(self, feature_id: int, is_success: bool) -> dict | None: async def _handle_agent_complete(self, feature_id: int, is_success: bool, agent_type: str = "coding") -> dict | None:
"""Handle agent completion - ALWAYS emits a message, even if agent wasn't tracked.""" """Handle agent completion - ALWAYS emits a message, even if agent wasn't tracked.
Args:
feature_id: The feature ID.
is_success: Whether the agent completed successfully.
agent_type: The agent type ("coding" or "testing"). Defaults to "coding".
"""
async with self._lock: async with self._lock:
key = (feature_id, agent_type) # Composite key for correct agent lookup
state = 'success' if is_success else 'error' state = 'success' if is_success else 'error'
if feature_id in self.active_agents: if key in self.active_agents:
# Normal case: agent was tracked # Normal case: agent was tracked
agent = self.active_agents[feature_id] agent = self.active_agents[key]
result = { result = {
'type': 'agent_update', 'type': 'agent_update',
'agentIndex': agent['agent_index'], 'agentIndex': agent['agent_index'],
'agentName': agent['name'], 'agentName': agent['name'],
'agentType': agent.get('agent_type', 'coding'), 'agentType': agent.get('agent_type', agent_type),
'featureId': feature_id, 'featureId': feature_id,
'featureName': agent['feature_name'], 'featureName': agent['feature_name'],
'state': state, 'state': state,
'thought': 'Completed successfully!' if is_success else 'Failed to complete', 'thought': 'Completed successfully!' if is_success else 'Failed to complete',
'timestamp': datetime.now().isoformat(), 'timestamp': datetime.now().isoformat(),
} }
del self.active_agents[feature_id] del self.active_agents[key]
return result return result
else: else:
# Synthetic completion for untracked agent # Synthetic completion for untracked agent
@@ -265,7 +289,7 @@ class AgentTracker:
'type': 'agent_update', 'type': 'agent_update',
'agentIndex': -1, # Sentinel for untracked 'agentIndex': -1, # Sentinel for untracked
'agentName': 'Unknown', 'agentName': 'Unknown',
'agentType': 'coding', 'agentType': agent_type,
'featureId': feature_id, 'featureId': feature_id,
'featureName': f'Feature #{feature_id}', 'featureName': f'Feature #{feature_id}',
'state': state, 'state': state,