feat: add multi-feature batching for coding agents

Enable the orchestrator to assign 1-3 features per coding agent subprocess,
selected via dependency chain extension + same-category fill. This reduces
cold-start overhead and leverages shared context across related features.

Orchestrator (parallel_orchestrator.py):
- Add batch tracking: _batch_features and _feature_to_primary data structures
- Add build_feature_batches() with dependency chain + category fill algorithm
- Add start_feature_batch() and _spawn_coding_agent_batch() methods
- Update _on_agent_complete() for batch cleanup across all features
- Update stop_feature() with _feature_to_primary lookup
- Update get_ready_features() to exclude all batch feature IDs
- Update main loop to build batches then spawn per available slot

CLI and agent layer:
- Add --feature-ids (comma-separated) and --batch-size CLI args
- Add feature_ids parameter to run_autonomous_agent() with batch prompt selection
- Add get_batch_feature_prompt() with sequential workflow instructions

WebSocket layer (server/websocket.py):
- Add BATCH_CODING_AGENT_START_PATTERN and BATCH_FEATURES_COMPLETE_PATTERN
- Add _handle_batch_agent_start() and _handle_batch_agent_complete() methods
- Add featureIds field to all agent_update messages
- Track current_feature_id updates as agent moves through batch

Frontend (React UI):
- Add featureIds to ActiveAgent and WSAgentUpdateMessage types
- Update KanbanColumn and DependencyGraph agent-feature maps for batch
- Update AgentCard to show "Batch: #X, #Y, #Z" with active feature highlight
- Add "Features per Agent" segmented control (1-3) in SettingsModal

Settings integration (full stack):
- Add batch_size to schemas, settings router, agent router, process manager
- Default batch_size=3, user-configurable 1-3 via settings UI
- batch_size=1 is functionally identical to pre-batching behavior

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Auto
2026-02-01 16:35:07 +02:00
parent e1e5209866
commit 1607fc8175
16 changed files with 654 additions and 82 deletions

View File

@@ -153,6 +153,7 @@ class ParallelOrchestrator:
yolo_mode: bool = False,
testing_agent_ratio: int = 1,
testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE,
batch_size: int = 3,
on_output: Callable[[int, str], None] | None = None,
on_status: Callable[[int, str], None] | None = None,
):
@@ -177,6 +178,7 @@ class ParallelOrchestrator:
self.yolo_mode = yolo_mode
self.testing_agent_ratio = min(max(testing_agent_ratio, 0), 3) # Clamp 0-3
self.testing_batch_size = min(max(testing_batch_size, 1), 5) # Clamp 1-5
self.batch_size = min(max(batch_size, 1), 3) # Clamp 1-3
self.on_output = on_output
self.on_status = on_status
@@ -200,6 +202,11 @@ class ParallelOrchestrator:
# Cleared when all passing features have been covered at least once.
self._recently_tested: set[int] = set()
# Batch tracking: primary feature_id -> all feature IDs in batch
self._batch_features: dict[int, list[int]] = {}
# Reverse mapping: any feature_id -> primary feature_id
self._feature_to_primary: dict[int, int] = {}
# Shutdown flag for async-safe signal handling
# Signal handlers only set this flag; cleanup happens in the main loop
self._shutdown_requested = False
@@ -352,6 +359,104 @@ class ParallelOrchestrator:
return selected
def build_feature_batches(
self,
ready: list[dict],
all_features: list[dict],
scheduling_scores: dict[int, float],
) -> list[list[dict]]:
"""Build dependency-aware feature batches for coding agents.
Each batch contains up to `batch_size` features. The algorithm:
1. Start with a ready feature (sorted by scheduling score)
2. Chain extension: find dependents whose deps are satisfied if earlier batch features pass
3. Same-category fill: fill remaining slots with ready features from the same category
Args:
ready: Ready features (sorted by scheduling score)
all_features: All features for dependency checking
scheduling_scores: Pre-computed scheduling scores
Returns:
List of batches, each batch is a list of feature dicts
"""
if self.batch_size <= 1:
# No batching - return each feature as a single-item batch
return [[f] for f in ready]
# Build children adjacency: parent_id -> [child_ids]
children: dict[int, list[int]] = {f["id"]: [] for f in all_features}
feature_map: dict[int, dict] = {f["id"]: f for f in all_features}
for f in all_features:
for dep_id in (f.get("dependencies") or []):
if dep_id in children:
children[dep_id].append(f["id"])
# Pre-compute passing IDs
passing_ids = {f["id"] for f in all_features if f.get("passes")}
used_ids: set[int] = set() # Features already assigned to a batch
batches: list[list[dict]] = []
for feature in ready:
if feature["id"] in used_ids:
continue
batch = [feature]
used_ids.add(feature["id"])
# Simulate passing set = real passing + batch features
simulated_passing = passing_ids | {feature["id"]}
# Phase 1: Chain extension - find dependents whose deps are met
for _ in range(self.batch_size - 1):
best_candidate = None
best_score = -1.0
# Check children of all features currently in the batch
candidate_ids: set[int] = set()
for bf in batch:
for child_id in children.get(bf["id"], []):
if child_id not in used_ids and child_id not in simulated_passing:
candidate_ids.add(child_id)
for cid in candidate_ids:
cf = feature_map.get(cid)
if not cf or cf.get("passes") or cf.get("in_progress"):
continue
# Check if ALL deps are satisfied by simulated passing set
deps = cf.get("dependencies") or []
if all(d in simulated_passing for d in deps):
score = scheduling_scores.get(cid, 0)
if score > best_score:
best_score = score
best_candidate = cf
if best_candidate:
batch.append(best_candidate)
used_ids.add(best_candidate["id"])
simulated_passing.add(best_candidate["id"])
else:
break
# Phase 2: Same-category fill
if len(batch) < self.batch_size:
category = feature.get("category", "")
for rf in ready:
if len(batch) >= self.batch_size:
break
if rf["id"] in used_ids:
continue
if rf.get("category", "") == category:
batch.append(rf)
used_ids.add(rf["id"])
batches.append(batch)
debug_log.log("BATCH", f"Built {len(batches)} batches from {len(ready)} ready features",
batch_sizes=[len(b) for b in batches],
batch_ids=[[f['id'] for f in b] for b in batches[:5]])
return batches
def get_resumable_features(
self,
feature_dicts: list[dict] | None = None,
@@ -376,9 +481,11 @@ class ParallelOrchestrator:
finally:
session.close()
# Snapshot running IDs once to avoid acquiring lock per feature
# Snapshot running IDs once (include all batch feature IDs)
with self._lock:
running_ids = set(self.running_coding_agents.keys())
for batch_ids in self._batch_features.values():
running_ids.update(batch_ids)
resumable = []
for fd in feature_dicts:
@@ -421,9 +528,11 @@ class ParallelOrchestrator:
# Pre-compute passing_ids once to avoid O(n^2) in the loop
passing_ids = {fd["id"] for fd in feature_dicts if fd.get("passes")}
# Snapshot running IDs once to avoid acquiring lock per feature
# Snapshot running IDs once (include all batch feature IDs)
with self._lock:
running_ids = set(self.running_coding_agents.keys())
for batch_ids in self._batch_features.values():
running_ids.update(batch_ids)
ready = []
skipped_reasons = {"passes": 0, "in_progress": 0, "running": 0, "failed": 0, "deps": 0}
@@ -635,6 +744,75 @@ class ParallelOrchestrator:
return True, f"Started feature {feature_id}"
def start_feature_batch(self, feature_ids: list[int], resume: bool = False) -> tuple[bool, str]:
"""Start a coding agent for a batch of features.
Args:
feature_ids: List of feature IDs to implement in batch
resume: If True, resume features already in_progress
Returns:
Tuple of (success, message)
"""
if not feature_ids:
return False, "No features to start"
# Single feature falls back to start_feature
if len(feature_ids) == 1:
return self.start_feature(feature_ids[0], resume=resume)
with self._lock:
# Check if any feature in batch is already running
for fid in feature_ids:
if fid in self.running_coding_agents or fid in self._feature_to_primary:
return False, f"Feature {fid} already running"
if len(self.running_coding_agents) >= self.max_concurrency:
return False, "At max concurrency"
total_agents = len(self.running_coding_agents) + len(self.running_testing_agents)
if total_agents >= MAX_TOTAL_AGENTS:
return False, f"At max total agents ({total_agents}/{MAX_TOTAL_AGENTS})"
# Mark all features as in_progress in a single transaction
session = self.get_session()
try:
features_to_mark = []
for fid in feature_ids:
feature = session.query(Feature).filter(Feature.id == fid).first()
if not feature:
return False, f"Feature {fid} not found"
if feature.passes:
return False, f"Feature {fid} already complete"
if not resume:
if feature.in_progress:
return False, f"Feature {fid} already in progress"
features_to_mark.append(feature)
else:
if not feature.in_progress:
return False, f"Feature {fid} not in progress, cannot resume"
for feature in features_to_mark:
feature.in_progress = True
session.commit()
finally:
session.close()
# Spawn batch coding agent
success, message = self._spawn_coding_agent_batch(feature_ids)
if not success:
# Clear in_progress on failure
session = self.get_session()
try:
for fid in feature_ids:
feature = session.query(Feature).filter(Feature.id == fid).first()
if feature and not resume:
feature.in_progress = False
session.commit()
finally:
session.close()
return False, message
return True, f"Started batch [{', '.join(str(fid) for fid in feature_ids)}]"
def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]:
"""Spawn a coding agent subprocess for a specific feature."""
# Create abort event
@@ -702,6 +880,75 @@ class ParallelOrchestrator:
print(f"Started coding agent for feature #{feature_id}", flush=True)
return True, f"Started feature {feature_id}"
def _spawn_coding_agent_batch(self, feature_ids: list[int]) -> tuple[bool, str]:
"""Spawn a coding agent subprocess for a batch of features."""
primary_id = feature_ids[0]
abort_event = threading.Event()
cmd = [
sys.executable,
"-u",
str(AUTOCODER_ROOT / "autonomous_agent_demo.py"),
"--project-dir", str(self.project_dir),
"--max-iterations", "1",
"--agent-type", "coding",
"--feature-ids", ",".join(str(fid) for fid in feature_ids),
]
if self.model:
cmd.extend(["--model", self.model])
if self.yolo_mode:
cmd.append("--yolo")
try:
popen_kwargs: dict[str, Any] = {
"stdin": subprocess.DEVNULL,
"stdout": subprocess.PIPE,
"stderr": subprocess.STDOUT,
"text": True,
"encoding": "utf-8",
"errors": "replace",
"cwd": str(AUTOCODER_ROOT),
"env": {**os.environ, "PYTHONUNBUFFERED": "1"},
}
if sys.platform == "win32":
popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
proc = subprocess.Popen(cmd, **popen_kwargs)
except Exception as e:
# Reset in_progress on failure
session = self.get_session()
try:
for fid in feature_ids:
feature = session.query(Feature).filter(Feature.id == fid).first()
if feature:
feature.in_progress = False
session.commit()
finally:
session.close()
return False, f"Failed to start batch agent: {e}"
with self._lock:
self.running_coding_agents[primary_id] = proc
self.abort_events[primary_id] = abort_event
self._batch_features[primary_id] = list(feature_ids)
for fid in feature_ids:
self._feature_to_primary[fid] = primary_id
# Start output reader thread
threading.Thread(
target=self._read_output,
args=(primary_id, proc, abort_event, "coding"),
daemon=True
).start()
if self.on_status is not None:
for fid in feature_ids:
self.on_status(fid, "running")
ids_str = ", ".join(f"#{fid}" for fid in feature_ids)
print(f"Started coding agent for features {ids_str}", flush=True)
return True, f"Started batch [{ids_str}]"
def _spawn_testing_agent(self) -> tuple[bool, str]:
"""Spawn a testing agent subprocess for batch regression testing.
@@ -982,73 +1229,84 @@ class ParallelOrchestrator:
# feature_id is required for coding agents (always passed from start_feature)
assert feature_id is not None, "feature_id must not be None for coding agents"
# Coding agent completion
debug_log.log("COMPLETE", f"Coding agent for feature #{feature_id} finished",
return_code=return_code,
status="success" if return_code == 0 else "failed")
# Coding agent completion - handle both single and batch features
batch_ids = None
with self._lock:
batch_ids = self._batch_features.pop(feature_id, None)
if batch_ids:
# Clean up reverse mapping
for fid in batch_ids:
self._feature_to_primary.pop(fid, None)
self.running_coding_agents.pop(feature_id, None)
self.abort_events.pop(feature_id, None)
all_feature_ids = batch_ids or [feature_id]
debug_log.log("COMPLETE", f"Coding agent for feature(s) {all_feature_ids} finished",
return_code=return_code,
status="success" if return_code == 0 else "failed",
batch_size=len(all_feature_ids))
# Refresh session cache to see subprocess commits
# The coding agent runs as a subprocess and commits changes (e.g., passes=True).
# Using session.expire_all() is lighter weight than engine.dispose() for SQLite WAL mode
# and is sufficient to invalidate cached data and force fresh reads.
# engine.dispose() is only called on orchestrator shutdown, not on every agent completion.
session = self.get_session()
try:
session.expire_all()
feature = session.query(Feature).filter(Feature.id == feature_id).first()
feature_passes = feature.passes if feature else None
feature_in_progress = feature.in_progress if feature else None
debug_log.log("DB", f"Feature #{feature_id} state after session.expire_all()",
passes=feature_passes,
in_progress=feature_in_progress)
if feature and feature.in_progress and not feature.passes:
feature.in_progress = False
session.commit()
debug_log.log("DB", f"Cleared in_progress for feature #{feature_id} (agent failed)")
for fid in all_feature_ids:
feature = session.query(Feature).filter(Feature.id == fid).first()
feature_passes = feature.passes if feature else None
feature_in_progress = feature.in_progress if feature else None
debug_log.log("DB", f"Feature #{fid} state after session.expire_all()",
passes=feature_passes,
in_progress=feature_in_progress)
if feature and feature.in_progress and not feature.passes:
feature.in_progress = False
session.commit()
debug_log.log("DB", f"Cleared in_progress for feature #{fid} (agent failed)")
finally:
session.close()
# Track failures to prevent infinite retry loops
# Track failures for features still in_progress at exit
if return_code != 0:
with self._lock:
self._failure_counts[feature_id] = self._failure_counts.get(feature_id, 0) + 1
failure_count = self._failure_counts[feature_id]
if failure_count >= MAX_FEATURE_RETRIES:
print(f"Feature #{feature_id} has failed {failure_count} times, will not retry", flush=True)
debug_log.log("COMPLETE", f"Feature #{feature_id} exceeded max retries",
failure_count=failure_count)
for fid in all_feature_ids:
self._failure_counts[fid] = self._failure_counts.get(fid, 0) + 1
failure_count = self._failure_counts[fid]
if failure_count >= MAX_FEATURE_RETRIES:
print(f"Feature #{fid} has failed {failure_count} times, will not retry", flush=True)
debug_log.log("COMPLETE", f"Feature #{fid} exceeded max retries",
failure_count=failure_count)
status = "completed" if return_code == 0 else "failed"
if self.on_status is not None:
self.on_status(feature_id, status)
# CRITICAL: This print triggers the WebSocket to emit agent_update with state='error' or 'success'
print(f"Feature #{feature_id} {status}", flush=True)
for fid in all_feature_ids:
self.on_status(fid, status)
# CRITICAL: Print triggers WebSocket to emit agent_update
if batch_ids and len(batch_ids) > 1:
ids_str = ", ".join(f"#{fid}" for fid in batch_ids)
print(f"Features {ids_str} {status}", flush=True)
else:
print(f"Feature #{feature_id} {status}", flush=True)
# Signal main loop that an agent slot is available
self._signal_agent_completed()
# NOTE: Testing agents are now spawned in start_feature() when coding agents START,
# not here when they complete. This ensures 1:1 ratio and proper termination.
def stop_feature(self, feature_id: int) -> tuple[bool, str]:
"""Stop a running coding agent and all its child processes."""
with self._lock:
if feature_id not in self.running_coding_agents:
# Check if this feature is part of a batch
primary_id = self._feature_to_primary.get(feature_id, feature_id)
if primary_id not in self.running_coding_agents:
return False, "Feature not running"
abort = self.abort_events.get(feature_id)
proc = self.running_coding_agents.get(feature_id)
abort = self.abort_events.get(primary_id)
proc = self.running_coding_agents.get(primary_id)
if abort:
abort.set()
if proc:
# Kill entire process tree to avoid orphaned children (e.g., browser instances)
result = kill_process_tree(proc, timeout=5.0)
debug_log.log("STOP", f"Killed feature {feature_id} process tree",
debug_log.log("STOP", f"Killed feature {feature_id} (primary {primary_id}) process tree",
status=result.status, children_found=result.children_found,
children_terminated=result.children_terminated, children_killed=result.children_killed)
@@ -1113,6 +1371,7 @@ class ParallelOrchestrator:
print(f"Max concurrency: {self.max_concurrency} coding agents", flush=True)
print(f"YOLO mode: {self.yolo_mode}", flush=True)
print(f"Regression agents: {self.testing_agent_ratio} (maintained independently)", flush=True)
print(f"Batch size: {self.batch_size} features per agent", flush=True)
print("=" * 70, flush=True)
print(flush=True)
@@ -1276,37 +1535,39 @@ class ParallelOrchestrator:
await self._wait_for_agent_completion(timeout=POLL_INTERVAL * 2)
continue
# Start features up to capacity
# Build dependency-aware batches from ready features
slots = self.max_concurrency - current
logger.debug("Spawning loop: %d ready, %d slots available, max_concurrency=%d",
len(ready), slots, self.max_concurrency)
features_to_start = ready[:slots]
logger.debug("Features to start: %s", [f['id'] for f in features_to_start])
batches = self.build_feature_batches(ready, feature_dicts, scheduling_scores)
debug_log.log("SPAWN", "Starting features batch",
logger.debug("Spawning loop: %d ready, %d slots available, %d batches built",
len(ready), slots, len(batches))
debug_log.log("SPAWN", "Starting feature batches",
ready_count=len(ready),
slots_available=slots,
features_to_start=[f['id'] for f in features_to_start])
batch_count=len(batches),
batches=[[f['id'] for f in b] for b in batches[:slots]])
for i, feature in enumerate(features_to_start):
logger.debug("Starting feature %d/%d: #%d - %s",
i + 1, len(features_to_start), feature['id'], feature['name'])
success, msg = self.start_feature(feature["id"])
for batch in batches[:slots]:
batch_ids = [f["id"] for f in batch]
batch_names = [f"{f['id']}:{f['name']}" for f in batch]
logger.debug("Starting batch: %s", batch_ids)
success, msg = self.start_feature_batch(batch_ids)
if not success:
logger.debug("Failed to start feature #%d: %s", feature['id'], msg)
debug_log.log("SPAWN", f"FAILED to start feature #{feature['id']}",
feature_name=feature['name'],
logger.debug("Failed to start batch %s: %s", batch_ids, msg)
debug_log.log("SPAWN", f"FAILED to start batch {batch_ids}",
batch_names=batch_names,
error=msg)
else:
logger.debug("Successfully started feature #%d", feature['id'])
logger.debug("Successfully started batch %s", batch_ids)
with self._lock:
running_count = len(self.running_coding_agents)
logger.debug("Running coding agents after start: %d", running_count)
debug_log.log("SPAWN", f"Successfully started feature #{feature['id']}",
feature_name=feature['name'],
debug_log.log("SPAWN", f"Successfully started batch {batch_ids}",
batch_names=batch_names,
running_coding_agents=running_count)
await asyncio.sleep(0.5) # Brief delay for subprocess to claim feature before re-querying
await asyncio.sleep(0.5)
except Exception as e:
print(f"Orchestrator error: {e}", flush=True)
@@ -1376,6 +1637,7 @@ async def run_parallel_orchestrator(
yolo_mode: bool = False,
testing_agent_ratio: int = 1,
testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE,
batch_size: int = 3,
) -> None:
"""Run the unified orchestrator.
@@ -1386,6 +1648,7 @@ async def run_parallel_orchestrator(
yolo_mode: Whether to run in YOLO mode (skip testing agents)
testing_agent_ratio: Number of regression agents to maintain (0-3)
testing_batch_size: Number of features per testing batch (1-5)
batch_size: Max features per coding agent batch (1-3)
"""
print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}", flush=True)
orchestrator = ParallelOrchestrator(
@@ -1395,6 +1658,7 @@ async def run_parallel_orchestrator(
yolo_mode=yolo_mode,
testing_agent_ratio=testing_agent_ratio,
testing_batch_size=testing_batch_size,
batch_size=batch_size,
)
# Set up cleanup to run on exit (handles normal exit, exceptions)
@@ -1480,6 +1744,12 @@ def main():
default=DEFAULT_TESTING_BATCH_SIZE,
help=f"Number of features per testing batch (1-5, default: {DEFAULT_TESTING_BATCH_SIZE})",
)
parser.add_argument(
"--batch-size",
type=int,
default=3,
help="Max features per coding agent batch (1-5, default: 3)",
)
args = parser.parse_args()
@@ -1507,6 +1777,7 @@ def main():
yolo_mode=args.yolo,
testing_agent_ratio=args.testing_agent_ratio,
testing_batch_size=args.testing_batch_size,
batch_size=args.batch_size,
))
except KeyboardInterrupt:
print("\n\nInterrupted by user", flush=True)