Files
autocoder/server/routers/agent.py
Auto 357083dbae feat: decouple regression testing agents from coding agents
Major refactoring of the parallel orchestrator to run regression testing
agents independently from coding agents. This improves system reliability
and provides better control over testing behavior.

Key changes:

Database & MCP Layer:
- Add testing_in_progress and last_tested_at columns to Feature model
- Add feature_claim_for_testing() for atomic test claim with retry
- Add feature_release_testing() to release claims after testing
- Refactor claim functions to iterative loops (no recursion)
- Add OperationalError retry handling for transient DB errors
- Reduce MAX_CLAIM_RETRIES from 10 to 5

Orchestrator:
- Decouple testing agent lifecycle from coding agents
- Add _maintain_testing_agents() for continuous testing maintenance
- Fix TOCTOU race in _spawn_testing_agent() - hold lock during spawn
- Add _cleanup_stale_testing_locks() with 30-min timeout
- Fix log ordering - start_session() before stale flag cleanup
- Add stale testing_in_progress cleanup on startup

Dead Code Removal:
- Remove count_testing_in_concurrency from entire stack (12+ files)
- Remove ineffective with_for_update() from features router

API & UI:
- Pass testing_agent_ratio via CLI to orchestrator
- Update testing prompt template to use new claim/release tools
- Rename UI label to "Regression Agents" with clearer description
- Add process_utils.py for cross-platform process tree management

Testing agents now:
- Run continuously as long as passing features exist
- Can re-test features multiple times to catch regressions
- Are controlled by fixed count (0-3) via testing_agent_ratio setting
- Have atomic claiming to prevent concurrent testing of same feature

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-22 15:22:48 +02:00

189 lines
5.7 KiB
Python

"""
Agent Router
============
API endpoints for agent control (start/stop/pause/resume).
Uses project registry for path lookups.
"""
import re
from pathlib import Path
from fastapi import APIRouter, HTTPException
from ..schemas import AgentActionResponse, AgentStartRequest, AgentStatus
from ..services.process_manager import get_manager
def _get_project_path(project_name: str) -> Path:
"""Get project path from registry."""
import sys
root = Path(__file__).parent.parent.parent
if str(root) not in sys.path:
sys.path.insert(0, str(root))
from registry import get_project_path
return get_project_path(project_name)
def _get_settings_defaults() -> tuple[bool, str, int]:
"""Get defaults from global settings.
Returns:
Tuple of (yolo_mode, model, testing_agent_ratio)
"""
import sys
root = Path(__file__).parent.parent.parent
if str(root) not in sys.path:
sys.path.insert(0, str(root))
from registry import DEFAULT_MODEL, get_all_settings
settings = get_all_settings()
yolo_mode = (settings.get("yolo_mode") or "false").lower() == "true"
model = settings.get("model", DEFAULT_MODEL)
# Parse testing agent settings with defaults
try:
testing_agent_ratio = int(settings.get("testing_agent_ratio", "1"))
except (ValueError, TypeError):
testing_agent_ratio = 1
return yolo_mode, model, testing_agent_ratio
router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"])
# Root directory for process manager
ROOT_DIR = Path(__file__).parent.parent.parent
def validate_project_name(name: str) -> str:
"""Validate and sanitize project name to prevent path traversal."""
if not re.match(r'^[a-zA-Z0-9_-]{1,50}$', name):
raise HTTPException(
status_code=400,
detail="Invalid project name"
)
return name
def get_project_manager(project_name: str):
"""Get the process manager for a project."""
project_name = validate_project_name(project_name)
project_dir = _get_project_path(project_name)
if not project_dir:
raise HTTPException(status_code=404, detail=f"Project '{project_name}' not found in registry")
if not project_dir.exists():
raise HTTPException(status_code=404, detail=f"Project directory not found: {project_dir}")
return get_manager(project_name, project_dir, ROOT_DIR)
@router.get("/status", response_model=AgentStatus)
async def get_agent_status(project_name: str):
"""Get the current status of the agent for a project."""
manager = get_project_manager(project_name)
# Run healthcheck to detect crashed processes
await manager.healthcheck()
return AgentStatus(
status=manager.status,
pid=manager.pid,
started_at=manager.started_at,
yolo_mode=manager.yolo_mode,
model=manager.model,
parallel_mode=manager.parallel_mode,
max_concurrency=manager.max_concurrency,
testing_agent_ratio=manager.testing_agent_ratio,
)
@router.post("/start", response_model=AgentActionResponse)
async def start_agent(
project_name: str,
request: AgentStartRequest = AgentStartRequest(),
):
"""Start the agent for a project."""
manager = get_project_manager(project_name)
# Get defaults from global settings if not provided in request
default_yolo, default_model, default_testing_ratio = _get_settings_defaults()
yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo
model = request.model if request.model else default_model
max_concurrency = request.max_concurrency or 1
testing_agent_ratio = request.testing_agent_ratio if request.testing_agent_ratio is not None else default_testing_ratio
success, message = await manager.start(
yolo_mode=yolo_mode,
model=model,
max_concurrency=max_concurrency,
testing_agent_ratio=testing_agent_ratio,
)
# Notify scheduler of manual start (to prevent auto-stop during scheduled window)
if success:
from ..services.scheduler_service import get_scheduler
project_dir = _get_project_path(project_name)
if project_dir:
get_scheduler().notify_manual_start(project_name, project_dir)
return AgentActionResponse(
success=success,
status=manager.status,
message=message,
)
@router.post("/stop", response_model=AgentActionResponse)
async def stop_agent(project_name: str):
"""Stop the agent for a project."""
manager = get_project_manager(project_name)
success, message = await manager.stop()
# Notify scheduler of manual stop (to prevent auto-start during scheduled window)
if success:
from ..services.scheduler_service import get_scheduler
project_dir = _get_project_path(project_name)
if project_dir:
get_scheduler().notify_manual_stop(project_name, project_dir)
return AgentActionResponse(
success=success,
status=manager.status,
message=message,
)
@router.post("/pause", response_model=AgentActionResponse)
async def pause_agent(project_name: str):
"""Pause the agent for a project."""
manager = get_project_manager(project_name)
success, message = await manager.pause()
return AgentActionResponse(
success=success,
status=manager.status,
message=message,
)
@router.post("/resume", response_model=AgentActionResponse)
async def resume_agent(project_name: str):
"""Resume a paused agent."""
manager = get_project_manager(project_name)
success, message = await manager.resume()
return AgentActionResponse(
success=success,
status=manager.status,
message=message,
)