mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-03-17 02:43:09 +00:00
Merge remote-tracking branch 'origin/master' into feature/blocked-for-human-input
# Conflicts: # server/services/process_manager.py
This commit is contained in:
@@ -194,6 +194,7 @@ class ParallelOrchestrator:
|
||||
# Legacy alias for backward compatibility
|
||||
self.running_agents = self.running_coding_agents
|
||||
self.abort_events: dict[int, threading.Event] = {}
|
||||
self._testing_session_counter = 0
|
||||
self.is_running = False
|
||||
|
||||
# Track feature failures to prevent infinite retry loops
|
||||
@@ -212,6 +213,9 @@ class ParallelOrchestrator:
|
||||
# Signal handlers only set this flag; cleanup happens in the main loop
|
||||
self._shutdown_requested = False
|
||||
|
||||
# Graceful pause (drain mode) flag
|
||||
self._drain_requested = False
|
||||
|
||||
# Session tracking for logging/debugging
|
||||
self.session_start_time: datetime | None = None
|
||||
|
||||
@@ -852,7 +856,7 @@ class ParallelOrchestrator:
|
||||
"encoding": "utf-8",
|
||||
"errors": "replace",
|
||||
"cwd": str(self.project_dir), # Run from project dir so CLI creates .claude/ in project
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1"},
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": "", "PLAYWRIGHT_CLI_SESSION": f"coding-{feature_id}"},
|
||||
}
|
||||
if sys.platform == "win32":
|
||||
popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
|
||||
@@ -915,7 +919,7 @@ class ParallelOrchestrator:
|
||||
"encoding": "utf-8",
|
||||
"errors": "replace",
|
||||
"cwd": str(self.project_dir), # Run from project dir so CLI creates .claude/ in project
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1"},
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": "", "PLAYWRIGHT_CLI_SESSION": f"coding-{primary_id}"},
|
||||
}
|
||||
if sys.platform == "win32":
|
||||
popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
|
||||
@@ -1019,8 +1023,9 @@ class ParallelOrchestrator:
|
||||
"encoding": "utf-8",
|
||||
"errors": "replace",
|
||||
"cwd": str(self.project_dir), # Run from project dir so CLI creates .claude/ in project
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1"},
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": "", "PLAYWRIGHT_CLI_SESSION": f"testing-{self._testing_session_counter}"},
|
||||
}
|
||||
self._testing_session_counter += 1
|
||||
if sys.platform == "win32":
|
||||
popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
|
||||
|
||||
@@ -1080,7 +1085,7 @@ class ParallelOrchestrator:
|
||||
"encoding": "utf-8",
|
||||
"errors": "replace",
|
||||
"cwd": str(AUTOFORGE_ROOT),
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1"},
|
||||
"env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
|
||||
}
|
||||
if sys.platform == "win32":
|
||||
popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
|
||||
@@ -1166,6 +1171,19 @@ class ParallelOrchestrator:
|
||||
debug_log.log("CLEANUP", f"Error killing process tree for {agent_type} agent", error=str(e))
|
||||
self._on_agent_complete(feature_id, proc.returncode, agent_type, proc)
|
||||
|
||||
def _run_inter_session_cleanup(self):
|
||||
"""Run lightweight cleanup between agent sessions.
|
||||
|
||||
Removes stale temp files and project screenshots to prevent
|
||||
disk space accumulation during long overnight runs.
|
||||
"""
|
||||
try:
|
||||
from temp_cleanup import cleanup_project_screenshots, cleanup_stale_temp
|
||||
cleanup_stale_temp()
|
||||
cleanup_project_screenshots(self.project_dir)
|
||||
except Exception as e:
|
||||
debug_log.log("CLEANUP", f"Inter-session cleanup failed (non-fatal): {e}")
|
||||
|
||||
def _signal_agent_completed(self):
|
||||
"""Signal that an agent has completed, waking the main loop.
|
||||
|
||||
@@ -1241,6 +1259,8 @@ class ParallelOrchestrator:
|
||||
pid=proc.pid,
|
||||
feature_id=feature_id,
|
||||
status=status)
|
||||
# Run lightweight cleanup between sessions
|
||||
self._run_inter_session_cleanup()
|
||||
# Signal main loop that an agent slot is available
|
||||
self._signal_agent_completed()
|
||||
return
|
||||
@@ -1307,6 +1327,8 @@ class ParallelOrchestrator:
|
||||
else:
|
||||
print(f"Feature #{feature_id} {status}", flush=True)
|
||||
|
||||
# Run lightweight cleanup between sessions
|
||||
self._run_inter_session_cleanup()
|
||||
# Signal main loop that an agent slot is available
|
||||
self._signal_agent_completed()
|
||||
|
||||
@@ -1374,6 +1396,9 @@ class ParallelOrchestrator:
|
||||
# Must happen before any debug_log.log() calls
|
||||
debug_log.start_session()
|
||||
|
||||
# Clear any stale drain signal from a previous session
|
||||
self._clear_drain_signal()
|
||||
|
||||
# Log startup to debug file
|
||||
debug_log.section("ORCHESTRATOR STARTUP")
|
||||
debug_log.log("STARTUP", "Orchestrator run_loop starting",
|
||||
@@ -1495,6 +1520,34 @@ class ParallelOrchestrator:
|
||||
print("\nAll features complete!", flush=True)
|
||||
break
|
||||
|
||||
# --- Graceful pause (drain mode) ---
|
||||
if not self._drain_requested and self._check_drain_signal():
|
||||
self._drain_requested = True
|
||||
print("Graceful pause requested - draining running agents...", flush=True)
|
||||
debug_log.log("DRAIN", "Graceful pause requested, draining running agents")
|
||||
|
||||
if self._drain_requested:
|
||||
with self._lock:
|
||||
coding_count = len(self.running_coding_agents)
|
||||
testing_count = len(self.running_testing_agents)
|
||||
|
||||
if coding_count == 0 and testing_count == 0:
|
||||
print("All agents drained - paused.", flush=True)
|
||||
debug_log.log("DRAIN", "All agents drained, entering paused state")
|
||||
# Wait until signal file is removed (resume) or shutdown
|
||||
while self._check_drain_signal() and self.is_running and not self._shutdown_requested:
|
||||
await asyncio.sleep(1)
|
||||
if not self.is_running or self._shutdown_requested:
|
||||
break
|
||||
self._drain_requested = False
|
||||
print("Resuming from graceful pause...", flush=True)
|
||||
debug_log.log("DRAIN", "Resuming from graceful pause")
|
||||
continue
|
||||
else:
|
||||
debug_log.log("DRAIN", f"Waiting for agents to finish: coding={coding_count}, testing={testing_count}")
|
||||
await self._wait_for_agent_completion()
|
||||
continue
|
||||
|
||||
# Maintain testing agents independently (runs every iteration)
|
||||
self._maintain_testing_agents(feature_dicts)
|
||||
|
||||
@@ -1619,6 +1672,17 @@ class ParallelOrchestrator:
|
||||
"yolo_mode": self.yolo_mode,
|
||||
}
|
||||
|
||||
def _check_drain_signal(self) -> bool:
|
||||
"""Check if the graceful pause (drain) signal file exists."""
|
||||
from autoforge_paths import get_pause_drain_path
|
||||
return get_pause_drain_path(self.project_dir).exists()
|
||||
|
||||
def _clear_drain_signal(self) -> None:
|
||||
"""Delete the drain signal file and reset the flag."""
|
||||
from autoforge_paths import get_pause_drain_path
|
||||
get_pause_drain_path(self.project_dir).unlink(missing_ok=True)
|
||||
self._drain_requested = False
|
||||
|
||||
def cleanup(self) -> None:
|
||||
"""Clean up database resources. Safe to call multiple times.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user