fix: prevent temp file accumulation during long agent runs

Address three issues reported after overnight AutoForge runs: 1. ~193GB of .node files in %TEMP% from V8 compile caching 2. Stale npm artifact folders on drive root when %TEMP% fills up 3. PNG screenshot files left in project root by Playwright Changes: - Widen .node cleanup glob from ".78912*.node" to ".[0-9a-f]*.node" to match all V8 compile cache hex prefixes - Add "node-compile-cache" directory to temp cleanup patterns - Set NODE_COMPILE_CACHE="" in all subprocess environments (client.py, parallel_orchestrator.py, process_manager.py) to disable V8 compile caching at the source - Add cleanup_project_screenshots() to remove stale .png files from project directories (feature*-*.png, screenshot-*.png, step-*.png) - Run cleanup_stale_temp() at server startup in lifespan() - Add _run_inter_session_cleanup() to orchestrator, called after each agent completes (both coding and testing paths) - Update coding and testing prompt templates to instruct agents to use inline (base64) screenshots only, never saving files to disk Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:43:09 +00:00 · 2026-02-09 08:54:52 +02:00
parent 9eb08d3f71
commit f87970daca
7 changed files with 94 additions and 11 deletions
--- a/.claude/templates/coding_prompt.template.md
+++ b/.claude/templates/coding_prompt.template.md
@@ -90,13 +90,13 @@ Use browser automation tools:
 - Navigate to the app in a real browser
 - Interact like a human user (click, type, scroll)
- Take screenshots at each step
+- Take screenshots at each step (use inline screenshots only -- do NOT save screenshot files to disk)
 - Verify both functionality AND visual appearance
 **DO:**
 - Test through the UI with clicks and keyboard input
- Take screenshots to verify visual appearance
+- Take screenshots to verify visual appearance (inline only, never save to disk)
 - Check for console errors in browser
 - Verify complete user workflows end-to-end
@@ -194,6 +194,8 @@ Before context fills up:
 Use Playwright MCP tools (`browser_*`) for UI verification. Key tools: `navigate`, `click`, `type`, `fill_form`, `take_screenshot`, `console_messages`, `network_requests`. All tools have auto-wait built in.
 **Screenshot rule:** Always use inline mode (base64). NEVER save screenshots as files to disk.
 Test like a human user with mouse and keyboard. Use `browser_console_messages` to detect errors. Don't bypass UI with JavaScript evaluation.
 ---
--- a/.claude/templates/testing_prompt.template.md
+++ b/.claude/templates/testing_prompt.template.md
@@ -31,14 +31,14 @@ For the feature returned:
 1. Read and understand the feature's verification steps
 2. Navigate to the relevant part of the application
 3. Execute each verification step using browser automation
-4. Take screenshots to document the verification
+4. Take screenshots to document the verification (inline only -- do NOT save to disk)
 5. Check for console errors
 Use browser automation tools:
 **Navigation & Screenshots:**
 - browser_navigate - Navigate to a URL
- browser_take_screenshot - Capture screenshot (use for visual verification)
+- browser_take_screenshot - Capture screenshot (inline mode only -- never save to disk)
 - browser_snapshot - Get accessibility tree snapshot
 **Element Interaction:**
@@ -79,7 +79,7 @@ A regression has been introduced. You MUST fix it:
 4. **Verify the fix:**
   - Run through all verification steps again
-   - Take screenshots confirming the fix
+   - Take screenshots confirming the fix (inline only, never save to disk)
 5. **Mark as passing after fix:**
   ```
@@ -110,7 +110,7 @@ A regression has been introduced. You MUST fix it:
 All interaction tools have **built-in auto-wait** -- no manual timeouts needed.
 - `browser_navigate` - Navigate to URL
- `browser_take_screenshot` - Capture screenshot
+- `browser_take_screenshot` - Capture screenshot (inline only, never save to disk)
 - `browser_snapshot` - Get accessibility tree
 - `browser_click` - Click elements
 - `browser_type` - Type text
--- a/client.py
+++ b/client.py
@@ -446,6 +446,9 @@ def create_client(
        mcp_servers["playwright"] = {
            "command": "npx",
            "args": playwright_args,
            "env": {
                "NODE_COMPILE_CACHE": "",  # Disable V8 compile caching to prevent .node file accumulation in %TEMP%
            },
        }
    # Build environment overrides for API endpoint configuration
--- a/parallel_orchestrator.py
+++ b/parallel_orchestrator.py
@@ -846,7 +846,7 @@ class ParallelOrchestrator:
                "encoding": "utf-8",
                "errors": "replace",
                "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
            }
            if sys.platform == "win32":
                popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -909,7 +909,7 @@ class ParallelOrchestrator:
                "encoding": "utf-8",
                "errors": "replace",
                "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
            }
            if sys.platform == "win32":
                popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1013,7 +1013,7 @@ class ParallelOrchestrator:
                    "encoding": "utf-8",
                    "errors": "replace",
                    "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                    "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                    "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
                }
                if sys.platform == "win32":
                    popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1074,7 +1074,7 @@ class ParallelOrchestrator:
            "encoding": "utf-8",
            "errors": "replace",
            "cwd": str(AUTOFORGE_ROOT),
-            "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+            "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
        }
        if sys.platform == "win32":
            popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1160,6 +1160,19 @@ class ParallelOrchestrator:
                debug_log.log("CLEANUP", f"Error killing process tree for {agent_type} agent", error=str(e))
            self._on_agent_complete(feature_id, proc.returncode, agent_type, proc)
    def _run_inter_session_cleanup(self):
        """Run lightweight cleanup between agent sessions.
        Removes stale temp files and project screenshots to prevent
        disk space accumulation during long overnight runs.
        """
        try:
            from temp_cleanup import cleanup_project_screenshots, cleanup_stale_temp
            cleanup_stale_temp()
            cleanup_project_screenshots(self.project_dir)
        except Exception as e:
            debug_log.log("CLEANUP", f"Inter-session cleanup failed (non-fatal): {e}")
    def _signal_agent_completed(self):
        """Signal that an agent has completed, waking the main loop.
@@ -1235,6 +1248,8 @@ class ParallelOrchestrator:
                pid=proc.pid,
                feature_id=feature_id,
                status=status)
            # Run lightweight cleanup between sessions
            self._run_inter_session_cleanup()
            # Signal main loop that an agent slot is available
            self._signal_agent_completed()
            return
@@ -1301,6 +1316,8 @@ class ParallelOrchestrator:
        else:
            print(f"Feature #{feature_id} {status}", flush=True)
        # Run lightweight cleanup between sessions
        self._run_inter_session_cleanup()
        # Signal main loop that an agent slot is available
        self._signal_agent_completed()
--- a/server/main.py
+++ b/server/main.py
@@ -61,6 +61,17 @@ UI_DIST_DIR = ROOT_DIR / "ui" / "dist"
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Lifespan context manager for startup and shutdown."""
    # Startup - clean up stale temp files (Playwright profiles, .node cache, etc.)
    try:
        from temp_cleanup import cleanup_stale_temp
        stats = cleanup_stale_temp()
        if stats["dirs_deleted"] > 0 or stats["files_deleted"] > 0:
            mb_freed = stats["bytes_freed"] / (1024 * 1024)
            logger.info("Startup temp cleanup: %d dirs, %d files, %.1f MB freed",
                        stats["dirs_deleted"], stats["files_deleted"], mb_freed)
    except Exception as e:
        logger.warning("Startup temp cleanup failed (non-fatal): %s", e)
    # Startup - clean up orphaned lock files from previous runs
    cleanup_orphaned_locks()
    cleanup_orphaned_devserver_locks()
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -410,6 +410,7 @@ class AgentProcessManager:
                **os.environ,
                "PYTHONUNBUFFERED": "1",
                "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false",
                "NODE_COMPILE_CACHE": "",  # Disable V8 compile caching to prevent .node file accumulation in %TEMP%
                **api_env,
            }
--- a/temp_cleanup.py
+++ b/temp_cleanup.py
@@ -37,11 +37,12 @@ DIR_PATTERNS = [
    "mongodb-memory-server*",           # MongoDB Memory Server binaries
    "ng-*",                             # Angular CLI temp directories
    "scoped_dir*",                      # Chrome/Chromium temp directories
    "node-compile-cache",               # Node.js V8 compile cache directory
 ]
 # File patterns to clean up (glob patterns)
 FILE_PATTERNS = [
-    ".78912*.node",   # Node.js native module cache (major space consumer, ~7MB each)
+    ".[0-9a-f]*.node",   # Node.js/V8 compile cache files (~7MB each, varying hex prefixes)
    "claude-*-cwd",   # Claude CLI working directory temp files
    "mat-debug-*.log",  # Material/Angular debug logs
 ]
@@ -122,6 +123,54 @@ def cleanup_stale_temp(max_age_seconds: int = MAX_AGE_SECONDS) -> dict:
    return stats
 def cleanup_project_screenshots(project_dir: Path, max_age_seconds: int = 300) -> dict:
    """
    Clean up stale screenshot files from the project root.
    Playwright browser verification can leave .png files in the project
    directory. This removes them after they've aged out (default 5 minutes).
    Args:
        project_dir: Path to the project directory.
        max_age_seconds: Maximum age in seconds before a screenshot is deleted.
                        Defaults to 5 minutes (300 seconds).
    Returns:
        Dictionary with cleanup statistics (files_deleted, bytes_freed, errors).
    """
    cutoff_time = time.time() - max_age_seconds
    stats: dict = {"files_deleted": 0, "bytes_freed": 0, "errors": []}
    screenshot_patterns = [
        "feature*-*.png",
        "screenshot-*.png",
        "step-*.png",
    ]
    for pattern in screenshot_patterns:
        for item in project_dir.glob(pattern):
            if not item.is_file():
                continue
            try:
                mtime = item.stat().st_mtime
                if mtime < cutoff_time:
                    size = item.stat().st_size
                    item.unlink(missing_ok=True)
                    if not item.exists():
                        stats["files_deleted"] += 1
                        stats["bytes_freed"] += size
                        logger.debug(f"Deleted project screenshot: {item}")
            except Exception as e:
                stats["errors"].append(f"Failed to delete {item}: {e}")
                logger.debug(f"Failed to delete screenshot {item}: {e}")
    if stats["files_deleted"] > 0:
        mb_freed = stats["bytes_freed"] / (1024 * 1024)
        logger.info(f"Screenshot cleanup: {stats['files_deleted']} files, {mb_freed:.1f} MB freed")
    return stats
 def _get_dir_size(path: Path) -> int:
    """Get total size of a directory in bytes."""
    total = 0