fix: prevent temp file accumulation during long agent runs

Address three issues reported after overnight AutoForge runs: 1. ~193GB of .node files in %TEMP% from V8 compile caching 2. Stale npm artifact folders on drive root when %TEMP% fills up 3. PNG screenshot files left in project root by Playwright Changes: - Widen .node cleanup glob from ".78912*.node" to ".[0-9a-f]*.node" to match all V8 compile cache hex prefixes - Add "node-compile-cache" directory to temp cleanup patterns - Set NODE_COMPILE_CACHE="" in all subprocess environments (client.py, parallel_orchestrator.py, process_manager.py) to disable V8 compile caching at the source - Add cleanup_project_screenshots() to remove stale .png files from project directories (feature*-*.png, screenshot-*.png, step-*.png) - Run cleanup_stale_temp() at server startup in lifespan() - Add _run_inter_session_cleanup() to orchestrator, called after each agent completes (both coding and testing paths) - Update coding and testing prompt templates to instruct agents to use inline (base64) screenshots only, never saving files to disk Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:43:09 +00:00 · 2026-02-09 08:54:52 +02:00
parent 9eb08d3f71
commit f87970daca
7 changed files with 94 additions and 11 deletions
--- a/.claude/templates/coding_prompt.template.md
+++ b/.claude/templates/coding_prompt.template.md
@@ -90,13 +90,13 @@ Use browser automation tools:

 - Navigate to the app in a real browser
 - Interact like a human user (click, type, scroll)
- Take screenshots at each step
+- Take screenshots at each step (use inline screenshots only -- do NOT save screenshot files to disk)
 - Verify both functionality AND visual appearance

 **DO:**

 - Test through the UI with clicks and keyboard input
- Take screenshots to verify visual appearance
+- Take screenshots to verify visual appearance (inline only, never save to disk)
 - Check for console errors in browser
 - Verify complete user workflows end-to-end

@@ -194,6 +194,8 @@ Before context fills up:

 Use Playwright MCP tools (`browser_*`) for UI verification. Key tools: `navigate`, `click`, `type`, `fill_form`, `take_screenshot`, `console_messages`, `network_requests`. All tools have auto-wait built in.

+**Screenshot rule:** Always use inline mode (base64). NEVER save screenshots as files to disk.
+
 Test like a human user with mouse and keyboard. Use `browser_console_messages` to detect errors. Don't bypass UI with JavaScript evaluation.

 ---
--- a/.claude/templates/testing_prompt.template.md
+++ b/.claude/templates/testing_prompt.template.md
@@ -31,14 +31,14 @@ For the feature returned:
 1. Read and understand the feature's verification steps
 2. Navigate to the relevant part of the application
 3. Execute each verification step using browser automation
-4. Take screenshots to document the verification
+4. Take screenshots to document the verification (inline only -- do NOT save to disk)
 5. Check for console errors

 Use browser automation tools:

 **Navigation & Screenshots:**
 - browser_navigate - Navigate to a URL
- browser_take_screenshot - Capture screenshot (use for visual verification)
+- browser_take_screenshot - Capture screenshot (inline mode only -- never save to disk)
 - browser_snapshot - Get accessibility tree snapshot

 **Element Interaction:**
@@ -79,7 +79,7 @@ A regression has been introduced. You MUST fix it:

 4. **Verify the fix:**
   - Run through all verification steps again
-   - Take screenshots confirming the fix
+   - Take screenshots confirming the fix (inline only, never save to disk)

 5. **Mark as passing after fix:**
   ```
@@ -110,7 +110,7 @@ A regression has been introduced. You MUST fix it:
 All interaction tools have **built-in auto-wait** -- no manual timeouts needed.

 - `browser_navigate` - Navigate to URL
- `browser_take_screenshot` - Capture screenshot
+- `browser_take_screenshot` - Capture screenshot (inline only, never save to disk)
 - `browser_snapshot` - Get accessibility tree
 - `browser_click` - Click elements
 - `browser_type` - Type text
--- a/client.py
+++ b/client.py
@@ -446,6 +446,9 @@ def create_client(
        mcp_servers["playwright"] = {
            "command": "npx",
            "args": playwright_args,
+            "env": {
+                "NODE_COMPILE_CACHE": "",  # Disable V8 compile caching to prevent .node file accumulation in %TEMP%
+            },
        }

    # Build environment overrides for API endpoint configuration
--- a/parallel_orchestrator.py
+++ b/parallel_orchestrator.py
@@ -846,7 +846,7 @@ class ParallelOrchestrator:
                "encoding": "utf-8",
                "errors": "replace",
                "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
            }
            if sys.platform == "win32":
                popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -909,7 +909,7 @@ class ParallelOrchestrator:
                "encoding": "utf-8",
                "errors": "replace",
                "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
            }
            if sys.platform == "win32":
                popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1013,7 +1013,7 @@ class ParallelOrchestrator:
                    "encoding": "utf-8",
                    "errors": "replace",
                    "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                    "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                    "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
                }
                if sys.platform == "win32":
                    popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1074,7 +1074,7 @@ class ParallelOrchestrator:
            "encoding": "utf-8",
            "errors": "replace",
            "cwd": str(AUTOFORGE_ROOT),
-            "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+            "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
        }
        if sys.platform == "win32":
            popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1160,6 +1160,19 @@ class ParallelOrchestrator:
                debug_log.log("CLEANUP", f"Error killing process tree for {agent_type} agent", error=str(e))
            self._on_agent_complete(feature_id, proc.returncode, agent_type, proc)

+    def _run_inter_session_cleanup(self):
+        """Run lightweight cleanup between agent sessions.
+
+        Removes stale temp files and project screenshots to prevent
+        disk space accumulation during long overnight runs.
+        """
+        try:
+            from temp_cleanup import cleanup_project_screenshots, cleanup_stale_temp
+            cleanup_stale_temp()
+            cleanup_project_screenshots(self.project_dir)
+        except Exception as e:
+            debug_log.log("CLEANUP", f"Inter-session cleanup failed (non-fatal): {e}")
+
    def _signal_agent_completed(self):
        """Signal that an agent has completed, waking the main loop.

@@ -1235,6 +1248,8 @@ class ParallelOrchestrator:
                pid=proc.pid,
                feature_id=feature_id,
                status=status)
+            # Run lightweight cleanup between sessions
+            self._run_inter_session_cleanup()
            # Signal main loop that an agent slot is available
            self._signal_agent_completed()
            return
@@ -1301,6 +1316,8 @@ class ParallelOrchestrator:
        else:
            print(f"Feature #{feature_id} {status}", flush=True)

+        # Run lightweight cleanup between sessions
+        self._run_inter_session_cleanup()
        # Signal main loop that an agent slot is available
        self._signal_agent_completed()

--- a/server/main.py
+++ b/server/main.py
@@ -61,6 +61,17 @@ UI_DIST_DIR = ROOT_DIR / "ui" / "dist"
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Lifespan context manager for startup and shutdown."""
+    # Startup - clean up stale temp files (Playwright profiles, .node cache, etc.)
+    try:
+        from temp_cleanup import cleanup_stale_temp
+        stats = cleanup_stale_temp()
+        if stats["dirs_deleted"] > 0 or stats["files_deleted"] > 0:
+            mb_freed = stats["bytes_freed"] / (1024 * 1024)
+            logger.info("Startup temp cleanup: %d dirs, %d files, %.1f MB freed",
+                        stats["dirs_deleted"], stats["files_deleted"], mb_freed)
+    except Exception as e:
+        logger.warning("Startup temp cleanup failed (non-fatal): %s", e)
+
    # Startup - clean up orphaned lock files from previous runs
    cleanup_orphaned_locks()
    cleanup_orphaned_devserver_locks()
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -410,6 +410,7 @@ class AgentProcessManager:
                **os.environ,
                "PYTHONUNBUFFERED": "1",
                "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false",
+                "NODE_COMPILE_CACHE": "",  # Disable V8 compile caching to prevent .node file accumulation in %TEMP%
                **api_env,
            }

--- a/temp_cleanup.py
+++ b/temp_cleanup.py
@@ -37,11 +37,12 @@ DIR_PATTERNS = [
    "mongodb-memory-server*",           # MongoDB Memory Server binaries
    "ng-*",                             # Angular CLI temp directories
    "scoped_dir*",                      # Chrome/Chromium temp directories
+    "node-compile-cache",               # Node.js V8 compile cache directory
 ]

 # File patterns to clean up (glob patterns)
 FILE_PATTERNS = [
-    ".78912*.node",   # Node.js native module cache (major space consumer, ~7MB each)
+    ".[0-9a-f]*.node",   # Node.js/V8 compile cache files (~7MB each, varying hex prefixes)
    "claude-*-cwd",   # Claude CLI working directory temp files
    "mat-debug-*.log",  # Material/Angular debug logs
 ]
@@ -122,6 +123,54 @@ def cleanup_stale_temp(max_age_seconds: int = MAX_AGE_SECONDS) -> dict:
    return stats


+def cleanup_project_screenshots(project_dir: Path, max_age_seconds: int = 300) -> dict:
+    """
+    Clean up stale screenshot files from the project root.
+
+    Playwright browser verification can leave .png files in the project
+    directory. This removes them after they've aged out (default 5 minutes).
+
+    Args:
+        project_dir: Path to the project directory.
+        max_age_seconds: Maximum age in seconds before a screenshot is deleted.
+                        Defaults to 5 minutes (300 seconds).
+
+    Returns:
+        Dictionary with cleanup statistics (files_deleted, bytes_freed, errors).
+    """
+    cutoff_time = time.time() - max_age_seconds
+    stats: dict = {"files_deleted": 0, "bytes_freed": 0, "errors": []}
+
+    screenshot_patterns = [
+        "feature*-*.png",
+        "screenshot-*.png",
+        "step-*.png",
+    ]
+
+    for pattern in screenshot_patterns:
+        for item in project_dir.glob(pattern):
+            if not item.is_file():
+                continue
+            try:
+                mtime = item.stat().st_mtime
+                if mtime < cutoff_time:
+                    size = item.stat().st_size
+                    item.unlink(missing_ok=True)
+                    if not item.exists():
+                        stats["files_deleted"] += 1
+                        stats["bytes_freed"] += size
+                        logger.debug(f"Deleted project screenshot: {item}")
+            except Exception as e:
+                stats["errors"].append(f"Failed to delete {item}: {e}")
+                logger.debug(f"Failed to delete screenshot {item}: {e}")
+
+    if stats["files_deleted"] > 0:
+        mb_freed = stats["bytes_freed"] / (1024 * 1024)
+        logger.info(f"Screenshot cleanup: {stats['files_deleted']} files, {mb_freed:.1f} MB freed")
+
+    return stats
+
+
 def _get_dir_size(path: Path) -> int:
    """Get total size of a directory in bytes."""
    total = 0