mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-01-30 06:12:06 +00:00
refactor: remove testing agent claim mechanism for concurrent testing
Remove the testing_in_progress claim/release mechanism from the testing agent architecture. Multiple testing agents can now test the same feature concurrently, simplifying the system and eliminating potential stale lock issues. Changes: - parallel_orchestrator.py: - Remove claim_feature_for_testing() and release_testing_claim() methods - Remove _cleanup_stale_testing_locks() periodic cleanup - Replace with simple _get_random_passing_feature() selection - Remove startup stale lock cleanup code - Remove STALE_TESTING_LOCK_MINUTES constant - Remove unused imports (timedelta, text) - api/database.py: - Remove testing_in_progress and last_tested_at columns from Feature model - Update to_dict() to exclude these fields - Convert _migrate_add_testing_columns() to no-op for backwards compat - mcp_server/feature_mcp.py: - Remove feature_release_testing tool entirely - Remove unused datetime import - prompts.py: - Update testing prompt to remove feature_release_testing instruction - Testing agents now just verify and exit (no cleanup needed) - server/websocket.py: - Update AgentTracker to use composite keys (feature_id, agent_type) - Prevents ghost agent creation from ambiguous [Feature #X] messages - Proper separation of coding vs testing agent tracking Benefits: - Eliminates artificial bottleneck from claim coordination - No stale locks to clean up after crashes - Simpler crash recovery (no testing state to restore) - Reduced database writes (no claim/release transactions) - Matches intended design: random, concurrent regression testing Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -57,10 +57,6 @@ class Feature(Base):
|
|||||||
# Dependencies: list of feature IDs that must be completed before this feature
|
# Dependencies: list of feature IDs that must be completed before this feature
|
||||||
# NULL/empty = no dependencies (backwards compatible)
|
# NULL/empty = no dependencies (backwards compatible)
|
||||||
dependencies = Column(JSON, nullable=True, default=None)
|
dependencies = Column(JSON, nullable=True, default=None)
|
||||||
# Regression testing: prevent concurrent testing of the same feature
|
|
||||||
testing_in_progress = Column(Boolean, nullable=False, default=False, index=True)
|
|
||||||
# Last time this feature was tested (for session-based regression tracking)
|
|
||||||
last_tested_at = Column(DateTime, nullable=True, default=None)
|
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
"""Convert feature to dictionary for JSON serialization."""
|
"""Convert feature to dictionary for JSON serialization."""
|
||||||
@@ -76,9 +72,6 @@ class Feature(Base):
|
|||||||
"in_progress": self.in_progress if self.in_progress is not None else False,
|
"in_progress": self.in_progress if self.in_progress is not None else False,
|
||||||
# Dependencies: NULL/empty treated as empty list for backwards compat
|
# Dependencies: NULL/empty treated as empty list for backwards compat
|
||||||
"dependencies": self.dependencies if self.dependencies else [],
|
"dependencies": self.dependencies if self.dependencies else [],
|
||||||
# Regression testing fields
|
|
||||||
"testing_in_progress": self.testing_in_progress if self.testing_in_progress is not None else False,
|
|
||||||
"last_tested_at": self.last_tested_at.isoformat() if self.last_tested_at else None,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_dependencies_safe(self) -> list[int]:
|
def get_dependencies_safe(self) -> list[int]:
|
||||||
@@ -240,23 +233,18 @@ def _migrate_add_dependencies_column(engine) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def _migrate_add_testing_columns(engine) -> None:
|
def _migrate_add_testing_columns(engine) -> None:
|
||||||
"""Add testing_in_progress and last_tested_at columns for regression testing.
|
"""Legacy migration - no longer adds testing columns.
|
||||||
|
|
||||||
These columns support atomic claiming of features for regression testing
|
The testing_in_progress and last_tested_at columns were removed from the
|
||||||
and tracking when features were last tested in a session.
|
Feature model as part of simplifying the testing agent architecture.
|
||||||
|
Multiple testing agents can now test the same feature concurrently
|
||||||
|
without coordination.
|
||||||
|
|
||||||
|
This function is kept for backwards compatibility but does nothing.
|
||||||
|
Existing databases with these columns will continue to work - the columns
|
||||||
|
are simply ignored.
|
||||||
"""
|
"""
|
||||||
with engine.connect() as conn:
|
pass
|
||||||
# Check existing columns
|
|
||||||
result = conn.execute(text("PRAGMA table_info(features)"))
|
|
||||||
columns = [row[1] for row in result.fetchall()]
|
|
||||||
|
|
||||||
if "testing_in_progress" not in columns:
|
|
||||||
conn.execute(text("ALTER TABLE features ADD COLUMN testing_in_progress BOOLEAN DEFAULT 0"))
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
if "last_tested_at" not in columns:
|
|
||||||
conn.execute(text("ALTER TABLE features ADD COLUMN last_tested_at DATETIME DEFAULT NULL"))
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
|
|
||||||
def _is_network_path(path: Path) -> bool:
|
def _is_network_path(path: Path) -> bool:
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ Tools:
|
|||||||
- feature_mark_in_progress: Mark a feature as in-progress
|
- feature_mark_in_progress: Mark a feature as in-progress
|
||||||
- feature_claim_and_get: Atomically claim and get feature details
|
- feature_claim_and_get: Atomically claim and get feature details
|
||||||
- feature_clear_in_progress: Clear in-progress status
|
- feature_clear_in_progress: Clear in-progress status
|
||||||
- feature_release_testing: Release testing lock on a feature
|
|
||||||
- feature_create_bulk: Create multiple features at once
|
- feature_create_bulk: Create multiple features at once
|
||||||
- feature_create: Create a single feature
|
- feature_create: Create a single feature
|
||||||
- feature_add_dependency: Add a dependency between features
|
- feature_add_dependency: Add a dependency between features
|
||||||
@@ -33,7 +32,6 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
@@ -228,57 +226,6 @@ def feature_get_summary(
|
|||||||
session.close()
|
session.close()
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
|
||||||
def feature_release_testing(
|
|
||||||
feature_id: Annotated[int, Field(description="The ID of the feature to release", ge=1)],
|
|
||||||
tested_ok: Annotated[bool, Field(description="True if the feature passed testing, False if regression found")] = True
|
|
||||||
) -> str:
|
|
||||||
"""Release a feature after regression testing completes.
|
|
||||||
|
|
||||||
Clears the testing_in_progress flag and updates last_tested_at timestamp.
|
|
||||||
|
|
||||||
This should be called after testing is complete, whether the feature
|
|
||||||
passed or failed. If tested_ok=False, the feature was marked as failing
|
|
||||||
by a previous call to feature_mark_failing.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
feature_id: The ID of the feature that was being tested
|
|
||||||
tested_ok: True if testing passed, False if a regression was found
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
JSON with release confirmation or error message.
|
|
||||||
"""
|
|
||||||
session = get_session()
|
|
||||||
try:
|
|
||||||
feature = session.query(Feature).filter(Feature.id == feature_id).first()
|
|
||||||
|
|
||||||
if feature is None:
|
|
||||||
return json.dumps({"error": f"Feature with ID {feature_id} not found"})
|
|
||||||
|
|
||||||
if not feature.testing_in_progress:
|
|
||||||
return json.dumps({
|
|
||||||
"warning": f"Feature {feature_id} was not being tested",
|
|
||||||
"feature": feature.to_dict()
|
|
||||||
})
|
|
||||||
|
|
||||||
# Clear testing flag and update timestamp
|
|
||||||
feature.testing_in_progress = False
|
|
||||||
feature.last_tested_at = datetime.now(timezone.utc)
|
|
||||||
session.commit()
|
|
||||||
session.refresh(feature)
|
|
||||||
|
|
||||||
status = "passed" if tested_ok else "failed (regression detected)"
|
|
||||||
return json.dumps({
|
|
||||||
"message": f"Feature #{feature_id} testing {status}",
|
|
||||||
"feature": feature.to_dict()
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
session.rollback()
|
|
||||||
return json.dumps({"error": f"Failed to release testing claim: {str(e)}"})
|
|
||||||
finally:
|
|
||||||
session.close()
|
|
||||||
|
|
||||||
|
|
||||||
@mcp.tool()
|
@mcp.tool()
|
||||||
def feature_mark_passing(
|
def feature_mark_passing(
|
||||||
feature_id: Annotated[int, Field(description="The ID of the feature to mark as passing", ge=1)]
|
feature_id: Annotated[int, Field(description="The ID of the feature to mark as passing", ge=1)]
|
||||||
|
|||||||
@@ -23,12 +23,10 @@ import os
|
|||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import threading
|
import threading
|
||||||
from datetime import datetime, timedelta, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Literal
|
from typing import Callable, Literal
|
||||||
|
|
||||||
from sqlalchemy import text
|
|
||||||
|
|
||||||
from api.database import Feature, create_database
|
from api.database import Feature, create_database
|
||||||
from api.dependency_resolver import are_dependencies_satisfied, compute_scheduling_scores
|
from api.dependency_resolver import are_dependencies_satisfied, compute_scheduling_scores
|
||||||
from progress import has_features
|
from progress import has_features
|
||||||
@@ -126,7 +124,6 @@ DEFAULT_CONCURRENCY = 3
|
|||||||
POLL_INTERVAL = 5 # seconds between checking for ready features
|
POLL_INTERVAL = 5 # seconds between checking for ready features
|
||||||
MAX_FEATURE_RETRIES = 3 # Maximum times to retry a failed feature
|
MAX_FEATURE_RETRIES = 3 # Maximum times to retry a failed feature
|
||||||
INITIALIZER_TIMEOUT = 1800 # 30 minutes timeout for initializer
|
INITIALIZER_TIMEOUT = 1800 # 30 minutes timeout for initializer
|
||||||
STALE_TESTING_LOCK_MINUTES = 30 # Auto-release testing locks older than this
|
|
||||||
|
|
||||||
|
|
||||||
class ParallelOrchestrator:
|
class ParallelOrchestrator:
|
||||||
@@ -199,72 +196,28 @@ class ParallelOrchestrator:
|
|||||||
"""Get a new database session."""
|
"""Get a new database session."""
|
||||||
return self._session_maker()
|
return self._session_maker()
|
||||||
|
|
||||||
def claim_feature_for_testing(self) -> int | None:
|
def _get_random_passing_feature(self) -> int | None:
|
||||||
"""Claim a random passing feature for regression testing.
|
"""Get a random passing feature for regression testing (no claim needed).
|
||||||
|
|
||||||
Returns the feature ID if successful, None if no features available.
|
Testing agents can test the same feature concurrently - it doesn't matter.
|
||||||
Sets testing_in_progress=True on the claimed feature.
|
This simplifies the architecture by removing unnecessary coordination.
|
||||||
|
|
||||||
|
Returns the feature ID if available, None if no passing features exist.
|
||||||
"""
|
"""
|
||||||
session = self.get_session()
|
|
||||||
try:
|
|
||||||
from sqlalchemy.sql.expression import func
|
from sqlalchemy.sql.expression import func
|
||||||
|
|
||||||
# Find a passing feature that's not being worked on
|
session = self.get_session()
|
||||||
# Exclude features already being tested by this orchestrator
|
try:
|
||||||
with self._lock:
|
# Find a passing feature that's not currently being coded
|
||||||
testing_feature_ids = set(self.running_testing_agents.keys())
|
# Multiple testing agents can test the same feature - that's fine
|
||||||
|
feature = (
|
||||||
candidate = (
|
|
||||||
session.query(Feature)
|
session.query(Feature)
|
||||||
.filter(Feature.passes == True)
|
.filter(Feature.passes == True)
|
||||||
.filter(Feature.in_progress == False)
|
.filter(Feature.in_progress == False) # Don't test while coding
|
||||||
.filter(Feature.testing_in_progress == False)
|
|
||||||
.filter(~Feature.id.in_(testing_feature_ids) if testing_feature_ids else True)
|
|
||||||
.order_by(func.random())
|
.order_by(func.random())
|
||||||
.first()
|
.first()
|
||||||
)
|
)
|
||||||
|
return feature.id if feature else None
|
||||||
if not candidate:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Atomic claim using UPDATE with WHERE clause
|
|
||||||
result = session.execute(
|
|
||||||
text("""
|
|
||||||
UPDATE features
|
|
||||||
SET testing_in_progress = 1
|
|
||||||
WHERE id = :feature_id
|
|
||||||
AND passes = 1
|
|
||||||
AND in_progress = 0
|
|
||||||
AND testing_in_progress = 0
|
|
||||||
"""),
|
|
||||||
{"feature_id": candidate.id}
|
|
||||||
)
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
if result.rowcount == 0:
|
|
||||||
# Another process claimed it
|
|
||||||
return None
|
|
||||||
|
|
||||||
return candidate.id
|
|
||||||
except Exception as e:
|
|
||||||
session.rollback()
|
|
||||||
debug_log.log("TESTING", f"Failed to claim feature for testing: {e}")
|
|
||||||
return None
|
|
||||||
finally:
|
|
||||||
session.close()
|
|
||||||
|
|
||||||
def release_testing_claim(self, feature_id: int):
|
|
||||||
"""Release a testing claim on a feature (called when testing agent exits)."""
|
|
||||||
session = self.get_session()
|
|
||||||
try:
|
|
||||||
session.execute(
|
|
||||||
text("UPDATE features SET testing_in_progress = 0 WHERE id = :feature_id"),
|
|
||||||
{"feature_id": feature_id}
|
|
||||||
)
|
|
||||||
session.commit()
|
|
||||||
except Exception as e:
|
|
||||||
session.rollback()
|
|
||||||
debug_log.log("TESTING", f"Failed to release testing claim for feature {feature_id}: {e}")
|
|
||||||
finally:
|
finally:
|
||||||
session.close()
|
session.close()
|
||||||
|
|
||||||
@@ -424,55 +377,6 @@ class ParallelOrchestrator:
|
|||||||
finally:
|
finally:
|
||||||
session.close()
|
session.close()
|
||||||
|
|
||||||
def _cleanup_stale_testing_locks(self) -> None:
|
|
||||||
"""Release stale testing locks from crashed testing agents.
|
|
||||||
|
|
||||||
A feature is considered stale if:
|
|
||||||
- testing_in_progress=True AND
|
|
||||||
- last_tested_at is NOT NULL AND older than STALE_TESTING_LOCK_MINUTES
|
|
||||||
|
|
||||||
Note: We do NOT release features with last_tested_at=NULL because that would
|
|
||||||
incorrectly release features that are legitimately in the middle of their
|
|
||||||
first test. The last_tested_at is only set when testing completes.
|
|
||||||
|
|
||||||
This handles the case where a testing agent crashes mid-test, leaving
|
|
||||||
the feature locked until orchestrator restart. By checking periodically,
|
|
||||||
we can release these locks without requiring a restart.
|
|
||||||
"""
|
|
||||||
session = self.get_session()
|
|
||||||
try:
|
|
||||||
# Use timezone-aware UTC, then strip timezone for SQLite compatibility
|
|
||||||
# (SQLite stores datetimes as naive strings, but we want consistency with
|
|
||||||
# datetime.now(timezone.utc) used elsewhere in the codebase)
|
|
||||||
cutoff_time = (datetime.now(timezone.utc) - timedelta(minutes=STALE_TESTING_LOCK_MINUTES)).replace(tzinfo=None)
|
|
||||||
|
|
||||||
# Find stale locks: testing_in_progress=True AND last_tested_at < cutoff
|
|
||||||
# Excludes NULL last_tested_at to avoid false positives on first-time tests
|
|
||||||
stale_features = (
|
|
||||||
session.query(Feature)
|
|
||||||
.filter(Feature.testing_in_progress == True)
|
|
||||||
.filter(Feature.last_tested_at.isnot(None))
|
|
||||||
.filter(Feature.last_tested_at < cutoff_time)
|
|
||||||
.all()
|
|
||||||
)
|
|
||||||
|
|
||||||
if stale_features:
|
|
||||||
stale_ids = [f.id for f in stale_features]
|
|
||||||
# Use ORM update instead of raw SQL for SQLite IN clause compatibility
|
|
||||||
session.query(Feature).filter(Feature.id.in_(stale_ids)).update(
|
|
||||||
{"testing_in_progress": False},
|
|
||||||
synchronize_session=False
|
|
||||||
)
|
|
||||||
session.commit()
|
|
||||||
print(f"[CLEANUP] Released {len(stale_ids)} stale testing locks: {stale_ids}", flush=True)
|
|
||||||
debug_log.log("CLEANUP", "Released stale testing locks", feature_ids=stale_ids)
|
|
||||||
except Exception as e:
|
|
||||||
session.rollback()
|
|
||||||
print(f"[CLEANUP] Error cleaning stale locks: {e}", flush=True)
|
|
||||||
debug_log.log("CLEANUP", f"Error cleaning stale locks: {e}")
|
|
||||||
finally:
|
|
||||||
session.close()
|
|
||||||
|
|
||||||
def _maintain_testing_agents(self) -> None:
|
def _maintain_testing_agents(self) -> None:
|
||||||
"""Maintain the desired count of testing agents independently.
|
"""Maintain the desired count of testing agents independently.
|
||||||
|
|
||||||
@@ -480,8 +384,8 @@ class ParallelOrchestrator:
|
|||||||
the configured testing_agent_ratio. Testing agents run independently from
|
the configured testing_agent_ratio. Testing agents run independently from
|
||||||
coding agents and continuously re-test passing features to catch regressions.
|
coding agents and continuously re-test passing features to catch regressions.
|
||||||
|
|
||||||
Also periodically releases stale testing locks (features stuck in
|
Multiple testing agents can test the same feature concurrently - this is
|
||||||
testing_in_progress=True for more than STALE_TESTING_LOCK_MINUTES).
|
intentional and simplifies the architecture by removing claim coordination.
|
||||||
|
|
||||||
Stops spawning when:
|
Stops spawning when:
|
||||||
- YOLO mode is enabled
|
- YOLO mode is enabled
|
||||||
@@ -492,11 +396,6 @@ class ParallelOrchestrator:
|
|||||||
if self.yolo_mode or self.testing_agent_ratio == 0:
|
if self.yolo_mode or self.testing_agent_ratio == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Periodically clean up stale testing locks (features stuck mid-test due to crash)
|
|
||||||
# A feature is considered stale if testing_in_progress=True and last_tested_at
|
|
||||||
# is either NULL or older than STALE_TESTING_LOCK_MINUTES
|
|
||||||
self._cleanup_stale_testing_locks()
|
|
||||||
|
|
||||||
# No testing until there are passing features
|
# No testing until there are passing features
|
||||||
passing_count = self.get_passing_count()
|
passing_count = self.get_passing_count()
|
||||||
if passing_count == 0:
|
if passing_count == 0:
|
||||||
@@ -641,8 +540,9 @@ class ParallelOrchestrator:
|
|||||||
def _spawn_testing_agent(self) -> tuple[bool, str]:
|
def _spawn_testing_agent(self) -> tuple[bool, str]:
|
||||||
"""Spawn a testing agent subprocess for regression testing.
|
"""Spawn a testing agent subprocess for regression testing.
|
||||||
|
|
||||||
Claims a feature BEFORE spawning the agent (same pattern as coding agents).
|
Picks a random passing feature to test. Multiple testing agents can test
|
||||||
This ensures we know which feature is being tested for UI display.
|
the same feature concurrently - this is intentional and simplifies the
|
||||||
|
architecture by removing claim coordination.
|
||||||
"""
|
"""
|
||||||
# Check limits first (under lock)
|
# Check limits first (under lock)
|
||||||
with self._lock:
|
with self._lock:
|
||||||
@@ -655,20 +555,19 @@ class ParallelOrchestrator:
|
|||||||
debug_log.log("TESTING", f"Skipped spawn - at max total agents ({total_agents}/{MAX_TOTAL_AGENTS})")
|
debug_log.log("TESTING", f"Skipped spawn - at max total agents ({total_agents}/{MAX_TOTAL_AGENTS})")
|
||||||
return False, f"At max total agents ({total_agents})"
|
return False, f"At max total agents ({total_agents})"
|
||||||
|
|
||||||
# Claim a feature for testing (outside lock to avoid holding during DB ops)
|
# Pick a random passing feature (no claim needed - concurrent testing is fine)
|
||||||
feature_id = self.claim_feature_for_testing()
|
feature_id = self._get_random_passing_feature()
|
||||||
if feature_id is None:
|
if feature_id is None:
|
||||||
debug_log.log("TESTING", "No features available for testing")
|
debug_log.log("TESTING", "No features available for testing")
|
||||||
return False, "No features available for testing"
|
return False, "No features available for testing"
|
||||||
|
|
||||||
debug_log.log("TESTING", f"Claimed feature #{feature_id} for testing")
|
debug_log.log("TESTING", f"Selected feature #{feature_id} for testing")
|
||||||
|
|
||||||
# Now spawn with the claimed feature ID
|
# Spawn the testing agent
|
||||||
with self._lock:
|
with self._lock:
|
||||||
# Re-check limits in case another thread spawned while we were claiming
|
# Re-check limits in case another thread spawned while we were selecting
|
||||||
current_testing_count = len(self.running_testing_agents)
|
current_testing_count = len(self.running_testing_agents)
|
||||||
if current_testing_count >= self.max_concurrency:
|
if current_testing_count >= self.max_concurrency:
|
||||||
self.release_testing_claim(feature_id)
|
|
||||||
return False, f"At max testing agents ({current_testing_count})"
|
return False, f"At max testing agents ({current_testing_count})"
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
@@ -694,7 +593,6 @@ class ParallelOrchestrator:
|
|||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
debug_log.log("TESTING", f"FAILED to spawn testing agent: {e}")
|
debug_log.log("TESTING", f"FAILED to spawn testing agent: {e}")
|
||||||
self.release_testing_claim(feature_id)
|
|
||||||
return False, f"Failed to start testing agent: {e}"
|
return False, f"Failed to start testing agent: {e}"
|
||||||
|
|
||||||
# Register process with feature ID (same pattern as coding agents)
|
# Register process with feature ID (same pattern as coding agents)
|
||||||
@@ -865,22 +763,16 @@ class ParallelOrchestrator:
|
|||||||
is safe.
|
is safe.
|
||||||
|
|
||||||
For testing agents:
|
For testing agents:
|
||||||
- Remove from running dict and release testing claim on feature.
|
- Remove from running dict (no claim to release - concurrent testing is allowed).
|
||||||
"""
|
"""
|
||||||
if agent_type == "testing":
|
if agent_type == "testing":
|
||||||
with self._lock:
|
with self._lock:
|
||||||
# Remove from dict by finding the feature_id for this proc
|
# Remove from dict by finding the feature_id for this proc
|
||||||
found_feature_id = None
|
|
||||||
for fid, p in list(self.running_testing_agents.items()):
|
for fid, p in list(self.running_testing_agents.items()):
|
||||||
if p is proc:
|
if p is proc:
|
||||||
found_feature_id = fid
|
|
||||||
del self.running_testing_agents[fid]
|
del self.running_testing_agents[fid]
|
||||||
break
|
break
|
||||||
|
|
||||||
# Release testing claim on the feature
|
|
||||||
if found_feature_id is not None:
|
|
||||||
self.release_testing_claim(found_feature_id)
|
|
||||||
|
|
||||||
status = "completed" if return_code == 0 else "failed"
|
status = "completed" if return_code == 0 else "failed"
|
||||||
print(f"Feature #{feature_id} testing {status}", flush=True)
|
print(f"Feature #{feature_id} testing {status}", flush=True)
|
||||||
debug_log.log("COMPLETE", f"Testing agent for feature #{feature_id} finished",
|
debug_log.log("COMPLETE", f"Testing agent for feature #{feature_id} finished",
|
||||||
@@ -974,13 +866,12 @@ class ParallelOrchestrator:
|
|||||||
for fid in feature_ids:
|
for fid in feature_ids:
|
||||||
self.stop_feature(fid)
|
self.stop_feature(fid)
|
||||||
|
|
||||||
# Stop testing agents
|
# Stop testing agents (no claim to release - concurrent testing is allowed)
|
||||||
with self._lock:
|
with self._lock:
|
||||||
testing_items = list(self.running_testing_agents.items())
|
testing_items = list(self.running_testing_agents.items())
|
||||||
|
|
||||||
for feature_id, proc in testing_items:
|
for feature_id, proc in testing_items:
|
||||||
result = kill_process_tree(proc, timeout=5.0)
|
result = kill_process_tree(proc, timeout=5.0)
|
||||||
self.release_testing_claim(feature_id)
|
|
||||||
debug_log.log("STOP", f"Killed testing agent for feature #{feature_id} (PID {proc.pid})",
|
debug_log.log("STOP", f"Killed testing agent for feature #{feature_id} (PID {proc.pid})",
|
||||||
status=result.status, children_found=result.children_found,
|
status=result.status, children_found=result.children_found,
|
||||||
children_terminated=result.children_terminated, children_killed=result.children_killed)
|
children_terminated=result.children_terminated, children_killed=result.children_killed)
|
||||||
@@ -1002,19 +893,6 @@ class ParallelOrchestrator:
|
|||||||
# Must happen before any debug_log.log() calls
|
# Must happen before any debug_log.log() calls
|
||||||
debug_log.start_session()
|
debug_log.start_session()
|
||||||
|
|
||||||
# Clear stale testing_in_progress flags from crashed testing agents
|
|
||||||
# This ensures features aren't permanently locked if a previous session crashed
|
|
||||||
session = self.get_session()
|
|
||||||
try:
|
|
||||||
stale_count = session.query(Feature).filter(Feature.testing_in_progress == True).count()
|
|
||||||
if stale_count > 0:
|
|
||||||
session.execute(text("UPDATE features SET testing_in_progress = 0 WHERE testing_in_progress = 1"))
|
|
||||||
session.commit()
|
|
||||||
print(f"[STARTUP] Cleared {stale_count} stale testing_in_progress flags", flush=True)
|
|
||||||
debug_log.log("STARTUP", f"Cleared {stale_count} stale testing_in_progress flags")
|
|
||||||
finally:
|
|
||||||
session.close()
|
|
||||||
|
|
||||||
# Log startup to debug file
|
# Log startup to debug file
|
||||||
debug_log.section("ORCHESTRATOR STARTUP")
|
debug_log.section("ORCHESTRATOR STARTUP")
|
||||||
debug_log.log("STARTUP", "Orchestrator run_loop starting",
|
debug_log.log("STARTUP", "Orchestrator run_loop starting",
|
||||||
|
|||||||
@@ -93,12 +93,11 @@ def get_testing_prompt(project_dir: Path | None = None, testing_feature_id: int
|
|||||||
|
|
||||||
**You are assigned to regression test Feature #{testing_feature_id}.**
|
**You are assigned to regression test Feature #{testing_feature_id}.**
|
||||||
|
|
||||||
The orchestrator has already claimed this feature for you.
|
|
||||||
|
|
||||||
### Your workflow:
|
### Your workflow:
|
||||||
1. Call `feature_get_by_id` with ID {testing_feature_id} to get the feature details
|
1. Call `feature_get_by_id` with ID {testing_feature_id} to get the feature details
|
||||||
2. Verify the feature through the UI using browser automation
|
2. Verify the feature through the UI using browser automation
|
||||||
3. When done, call `feature_release_testing` with feature_id={testing_feature_id}
|
3. If regression found, call `feature_mark_failing` with feature_id={testing_feature_id}
|
||||||
|
4. Exit when done (no cleanup needed)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -73,13 +73,14 @@ ORCHESTRATOR_PATTERNS = {
|
|||||||
class AgentTracker:
|
class AgentTracker:
|
||||||
"""Tracks active agents and their states for multi-agent mode.
|
"""Tracks active agents and their states for multi-agent mode.
|
||||||
|
|
||||||
Both coding and testing agents are now tracked by their feature ID.
|
Both coding and testing agents are tracked using a composite key of
|
||||||
The agent_type field distinguishes between them.
|
(feature_id, agent_type) to allow simultaneous tracking of both agent
|
||||||
|
types for the same feature.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# feature_id -> {name, state, last_thought, agent_index, agent_type}
|
# (feature_id, agent_type) -> {name, state, last_thought, agent_index, agent_type}
|
||||||
self.active_agents: dict[int, dict] = {}
|
self.active_agents: dict[tuple[int, str], dict] = {}
|
||||||
self._next_agent_index = 0
|
self._next_agent_index = 0
|
||||||
self._lock = asyncio.Lock()
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
@@ -111,14 +112,14 @@ class AgentTracker:
|
|||||||
if testing_complete_match:
|
if testing_complete_match:
|
||||||
feature_id = int(testing_complete_match.group(1))
|
feature_id = int(testing_complete_match.group(1))
|
||||||
is_success = testing_complete_match.group(2) == "completed"
|
is_success = testing_complete_match.group(2) == "completed"
|
||||||
return await self._handle_agent_complete(feature_id, is_success)
|
return await self._handle_agent_complete(feature_id, is_success, agent_type="testing")
|
||||||
|
|
||||||
# Coding agent complete: "Feature #X completed/failed" (without "testing" keyword)
|
# Coding agent complete: "Feature #X completed/failed" (without "testing" keyword)
|
||||||
if line.startswith("Feature #") and ("completed" in line or "failed" in line) and "testing" not in line:
|
if line.startswith("Feature #") and ("completed" in line or "failed" in line) and "testing" not in line:
|
||||||
try:
|
try:
|
||||||
feature_id = int(re.search(r'#(\d+)', line).group(1))
|
feature_id = int(re.search(r'#(\d+)', line).group(1))
|
||||||
is_success = "completed" in line
|
is_success = "completed" in line
|
||||||
return await self._handle_agent_complete(feature_id, is_success)
|
return await self._handle_agent_complete(feature_id, is_success, agent_type="coding")
|
||||||
except (AttributeError, ValueError):
|
except (AttributeError, ValueError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -132,11 +133,21 @@ class AgentTracker:
|
|||||||
content = match.group(2)
|
content = match.group(2)
|
||||||
|
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
# Ensure agent is tracked
|
# Check if either coding or testing agent exists for this feature
|
||||||
if feature_id not in self.active_agents:
|
# This prevents creating ghost agents when a testing agent outputs [Feature #X] lines
|
||||||
|
coding_key = (feature_id, 'coding')
|
||||||
|
testing_key = (feature_id, 'testing')
|
||||||
|
|
||||||
|
if coding_key in self.active_agents:
|
||||||
|
key = coding_key
|
||||||
|
elif testing_key in self.active_agents:
|
||||||
|
key = testing_key
|
||||||
|
else:
|
||||||
|
# Neither exists, create a new coding agent entry (implicit tracking)
|
||||||
|
key = coding_key
|
||||||
agent_index = self._next_agent_index
|
agent_index = self._next_agent_index
|
||||||
self._next_agent_index += 1
|
self._next_agent_index += 1
|
||||||
self.active_agents[feature_id] = {
|
self.active_agents[key] = {
|
||||||
'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
|
'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
|
||||||
'agent_index': agent_index,
|
'agent_index': agent_index,
|
||||||
'agent_type': 'coding',
|
'agent_type': 'coding',
|
||||||
@@ -145,7 +156,7 @@ class AgentTracker:
|
|||||||
'last_thought': None,
|
'last_thought': None,
|
||||||
}
|
}
|
||||||
|
|
||||||
agent = self.active_agents[feature_id]
|
agent = self.active_agents[key]
|
||||||
|
|
||||||
# Detect state and thought from content
|
# Detect state and thought from content
|
||||||
state = 'working'
|
state = 'working'
|
||||||
@@ -178,16 +189,21 @@ class AgentTracker:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def get_agent_info(self, feature_id: int) -> tuple[int | None, str | None]:
|
async def get_agent_info(self, feature_id: int, agent_type: str = "coding") -> tuple[int | None, str | None]:
|
||||||
"""Get agent index and name for a feature ID.
|
"""Get agent index and name for a feature ID and agent type.
|
||||||
|
|
||||||
Thread-safe method that acquires the lock before reading state.
|
Thread-safe method that acquires the lock before reading state.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
feature_id: The feature ID to look up.
|
||||||
|
agent_type: The agent type ("coding" or "testing"). Defaults to "coding".
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (agentIndex, agentName) or (None, None) if not tracked.
|
Tuple of (agentIndex, agentName) or (None, None) if not tracked.
|
||||||
"""
|
"""
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
agent = self.active_agents.get(feature_id)
|
key = (feature_id, agent_type)
|
||||||
|
agent = self.active_agents.get(key)
|
||||||
if agent:
|
if agent:
|
||||||
return agent['agent_index'], agent['name']
|
return agent['agent_index'], agent['name']
|
||||||
return None, None
|
return None, None
|
||||||
@@ -207,6 +223,7 @@ class AgentTracker:
|
|||||||
async def _handle_agent_start(self, feature_id: int, line: str, agent_type: str = "coding") -> dict | None:
|
async def _handle_agent_start(self, feature_id: int, line: str, agent_type: str = "coding") -> dict | None:
|
||||||
"""Handle agent start message from orchestrator."""
|
"""Handle agent start message from orchestrator."""
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
|
key = (feature_id, agent_type) # Composite key for separate tracking
|
||||||
agent_index = self._next_agent_index
|
agent_index = self._next_agent_index
|
||||||
self._next_agent_index += 1
|
self._next_agent_index += 1
|
||||||
|
|
||||||
@@ -216,7 +233,7 @@ class AgentTracker:
|
|||||||
if name_match:
|
if name_match:
|
||||||
feature_name = name_match.group(1)
|
feature_name = name_match.group(1)
|
||||||
|
|
||||||
self.active_agents[feature_id] = {
|
self.active_agents[key] = {
|
||||||
'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
|
'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)],
|
||||||
'agent_index': agent_index,
|
'agent_index': agent_index,
|
||||||
'agent_type': agent_type,
|
'agent_type': agent_type,
|
||||||
@@ -237,26 +254,33 @@ class AgentTracker:
|
|||||||
'timestamp': datetime.now().isoformat(),
|
'timestamp': datetime.now().isoformat(),
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _handle_agent_complete(self, feature_id: int, is_success: bool) -> dict | None:
|
async def _handle_agent_complete(self, feature_id: int, is_success: bool, agent_type: str = "coding") -> dict | None:
|
||||||
"""Handle agent completion - ALWAYS emits a message, even if agent wasn't tracked."""
|
"""Handle agent completion - ALWAYS emits a message, even if agent wasn't tracked.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
feature_id: The feature ID.
|
||||||
|
is_success: Whether the agent completed successfully.
|
||||||
|
agent_type: The agent type ("coding" or "testing"). Defaults to "coding".
|
||||||
|
"""
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
|
key = (feature_id, agent_type) # Composite key for correct agent lookup
|
||||||
state = 'success' if is_success else 'error'
|
state = 'success' if is_success else 'error'
|
||||||
|
|
||||||
if feature_id in self.active_agents:
|
if key in self.active_agents:
|
||||||
# Normal case: agent was tracked
|
# Normal case: agent was tracked
|
||||||
agent = self.active_agents[feature_id]
|
agent = self.active_agents[key]
|
||||||
result = {
|
result = {
|
||||||
'type': 'agent_update',
|
'type': 'agent_update',
|
||||||
'agentIndex': agent['agent_index'],
|
'agentIndex': agent['agent_index'],
|
||||||
'agentName': agent['name'],
|
'agentName': agent['name'],
|
||||||
'agentType': agent.get('agent_type', 'coding'),
|
'agentType': agent.get('agent_type', agent_type),
|
||||||
'featureId': feature_id,
|
'featureId': feature_id,
|
||||||
'featureName': agent['feature_name'],
|
'featureName': agent['feature_name'],
|
||||||
'state': state,
|
'state': state,
|
||||||
'thought': 'Completed successfully!' if is_success else 'Failed to complete',
|
'thought': 'Completed successfully!' if is_success else 'Failed to complete',
|
||||||
'timestamp': datetime.now().isoformat(),
|
'timestamp': datetime.now().isoformat(),
|
||||||
}
|
}
|
||||||
del self.active_agents[feature_id]
|
del self.active_agents[key]
|
||||||
return result
|
return result
|
||||||
else:
|
else:
|
||||||
# Synthetic completion for untracked agent
|
# Synthetic completion for untracked agent
|
||||||
@@ -265,7 +289,7 @@ class AgentTracker:
|
|||||||
'type': 'agent_update',
|
'type': 'agent_update',
|
||||||
'agentIndex': -1, # Sentinel for untracked
|
'agentIndex': -1, # Sentinel for untracked
|
||||||
'agentName': 'Unknown',
|
'agentName': 'Unknown',
|
||||||
'agentType': 'coding',
|
'agentType': agent_type,
|
||||||
'featureId': feature_id,
|
'featureId': feature_id,
|
||||||
'featureName': f'Feature #{feature_id}',
|
'featureName': f'Feature #{feature_id}',
|
||||||
'state': state,
|
'state': state,
|
||||||
|
|||||||
Reference in New Issue
Block a user