Files
autocoder/autonomous_agent_demo.py
Auto 357083dbae feat: decouple regression testing agents from coding agents
Major refactoring of the parallel orchestrator to run regression testing
agents independently from coding agents. This improves system reliability
and provides better control over testing behavior.

Key changes:

Database & MCP Layer:
- Add testing_in_progress and last_tested_at columns to Feature model
- Add feature_claim_for_testing() for atomic test claim with retry
- Add feature_release_testing() to release claims after testing
- Refactor claim functions to iterative loops (no recursion)
- Add OperationalError retry handling for transient DB errors
- Reduce MAX_CLAIM_RETRIES from 10 to 5

Orchestrator:
- Decouple testing agent lifecycle from coding agents
- Add _maintain_testing_agents() for continuous testing maintenance
- Fix TOCTOU race in _spawn_testing_agent() - hold lock during spawn
- Add _cleanup_stale_testing_locks() with 30-min timeout
- Fix log ordering - start_session() before stale flag cleanup
- Add stale testing_in_progress cleanup on startup

Dead Code Removal:
- Remove count_testing_in_concurrency from entire stack (12+ files)
- Remove ineffective with_for_update() from features router

API & UI:
- Pass testing_agent_ratio via CLI to orchestrator
- Update testing prompt template to use new claim/release tools
- Rename UI label to "Regression Agents" with clearer description
- Add process_utils.py for cross-platform process tree management

Testing agents now:
- Run continuously as long as passing features exist
- Can re-test features multiple times to catch regressions
- Are controlled by fixed count (0-3) via testing_agent_ratio setting
- Have atomic claiming to prevent concurrent testing of same feature

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-22 15:22:48 +02:00

230 lines
7.6 KiB
Python

#!/usr/bin/env python3
"""
Autonomous Coding Agent Demo
============================
A minimal harness demonstrating long-running autonomous coding with Claude.
This script implements a unified orchestrator pattern that handles:
- Initialization (creating features from app_spec)
- Coding agents (implementing features)
- Testing agents (regression testing)
Example Usage:
# Using absolute path directly
python autonomous_agent_demo.py --project-dir C:/Projects/my-app
# Using registered project name (looked up from registry)
python autonomous_agent_demo.py --project-dir my-app
# Limit iterations for testing (when running as subprocess)
python autonomous_agent_demo.py --project-dir my-app --max-iterations 5
# YOLO mode: rapid prototyping without testing agents
python autonomous_agent_demo.py --project-dir my-app --yolo
# Parallel execution with 3 concurrent coding agents
python autonomous_agent_demo.py --project-dir my-app --concurrency 3
# Single agent mode (orchestrator with concurrency=1, the default)
python autonomous_agent_demo.py --project-dir my-app
# Run as specific agent type (used by orchestrator to spawn subprocesses)
python autonomous_agent_demo.py --project-dir my-app --agent-type initializer
python autonomous_agent_demo.py --project-dir my-app --agent-type coding --feature-id 42
python autonomous_agent_demo.py --project-dir my-app --agent-type testing
"""
import argparse
import asyncio
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables from .env file (if it exists)
# IMPORTANT: Must be called BEFORE importing other modules that read env vars at load time
load_dotenv()
from agent import run_autonomous_agent
from registry import DEFAULT_MODEL, get_project_path
def parse_args() -> argparse.Namespace:
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Autonomous Coding Agent Demo - Unified orchestrator pattern",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Use absolute path directly (single agent, default)
python autonomous_agent_demo.py --project-dir C:/Projects/my-app
# Use registered project name (looked up from registry)
python autonomous_agent_demo.py --project-dir my-app
# Parallel execution with 3 concurrent agents
python autonomous_agent_demo.py --project-dir my-app --concurrency 3
# YOLO mode: rapid prototyping without testing agents
python autonomous_agent_demo.py --project-dir my-app --yolo
# Configure testing agent ratio (2 testing agents per coding agent)
python autonomous_agent_demo.py --project-dir my-app --testing-ratio 2
# Disable testing agents (similar to YOLO but with verification)
python autonomous_agent_demo.py --project-dir my-app --testing-ratio 0
Authentication:
Uses Claude CLI authentication (run 'claude login' if not logged in)
Authentication is handled by start.bat/start.sh before this runs
""",
)
parser.add_argument(
"--project-dir",
type=str,
required=True,
help="Project directory path (absolute) or registered project name",
)
parser.add_argument(
"--max-iterations",
type=int,
default=None,
help="Maximum number of agent iterations (default: unlimited, typically 1 for subprocesses)",
)
parser.add_argument(
"--model",
type=str,
default=DEFAULT_MODEL,
help=f"Claude model to use (default: {DEFAULT_MODEL})",
)
parser.add_argument(
"--yolo",
action="store_true",
default=False,
help="Enable YOLO mode: skip testing agents for rapid prototyping",
)
# Unified orchestrator mode (replaces --parallel)
parser.add_argument(
"--concurrency", "-c",
type=int,
default=1,
help="Number of concurrent coding agents (default: 1, max: 5)",
)
# Backward compatibility: --parallel is deprecated alias for --concurrency
parser.add_argument(
"--parallel", "-p",
type=int,
nargs="?",
const=3,
default=None,
metavar="N",
help="DEPRECATED: Use --concurrency instead. Alias for --concurrency.",
)
parser.add_argument(
"--feature-id",
type=int,
default=None,
help="Work on a specific feature ID only (used by orchestrator for coding agents)",
)
# Agent type for subprocess mode
parser.add_argument(
"--agent-type",
choices=["initializer", "coding", "testing"],
default=None,
help="Agent type (used by orchestrator to spawn specialized subprocesses)",
)
# Testing agent configuration
parser.add_argument(
"--testing-ratio",
type=int,
default=1,
help="Testing agents per coding agent (0-3, default: 1). Set to 0 to disable testing agents.",
)
return parser.parse_args()
def main() -> None:
"""Main entry point."""
print("[ENTRY] autonomous_agent_demo.py starting...", flush=True)
args = parse_args()
# Note: Authentication is handled by start.bat/start.sh before this script runs.
# The Claude SDK auto-detects credentials from ~/.claude/.credentials.json
# Handle deprecated --parallel flag
if args.parallel is not None:
print("WARNING: --parallel is deprecated. Use --concurrency instead.", flush=True)
args.concurrency = args.parallel
# Resolve project directory:
# 1. If absolute path, use as-is
# 2. Otherwise, look up from registry by name
project_dir_input = args.project_dir
project_dir = Path(project_dir_input)
if project_dir.is_absolute():
# Absolute path provided - use directly
if not project_dir.exists():
print(f"Error: Project directory does not exist: {project_dir}")
return
else:
# Treat as a project name - look up from registry
registered_path = get_project_path(project_dir_input)
if registered_path:
project_dir = registered_path
else:
print(f"Error: Project '{project_dir_input}' not found in registry")
print("Use an absolute path or register the project first.")
return
try:
if args.agent_type:
# Subprocess mode - spawned by orchestrator for a specific role
asyncio.run(
run_autonomous_agent(
project_dir=project_dir,
model=args.model,
max_iterations=args.max_iterations or 1,
yolo_mode=args.yolo,
feature_id=args.feature_id,
agent_type=args.agent_type,
)
)
else:
# Entry point mode - always use unified orchestrator
from parallel_orchestrator import run_parallel_orchestrator
# Clamp concurrency to valid range (1-5)
concurrency = max(1, min(args.concurrency, 5))
if concurrency != args.concurrency:
print(f"Clamping concurrency to valid range: {concurrency}", flush=True)
asyncio.run(
run_parallel_orchestrator(
project_dir=project_dir,
max_concurrency=concurrency,
model=args.model,
yolo_mode=args.yolo,
testing_agent_ratio=args.testing_ratio,
)
)
except KeyboardInterrupt:
print("\n\nInterrupted by user")
print("To resume, run the same command again")
except Exception as e:
print(f"\nFatal error: {e}")
raise
if __name__ == "__main__":
main()