From 9721368188ca0f04c8c36030ce0a6552f0ac3780 Mon Sep 17 00:00:00 2001
From: Caitlyn Byrne <cbyrne@arachne.us>
Date: Sun, 8 Feb 2026 13:25:37 -0500
Subject: [PATCH 01/14] feat: add graceful pause (drain mode) for running
 agents

File-based signal (.pause_drain) lets the orchestrator finish current
work before pausing instead of hard-freezing the process tree.  New
status states pausing/paused_graceful flow through WebSocket to the UI
where a Pause button, draining indicator, and Resume button are shown.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 autoforge_paths.py                           | 10 +++
 parallel_orchestrator.py                     | 45 ++++++++++
 server/routers/agent.py                      | 28 +++++++
 server/schemas.py                            |  2 +-
 server/services/process_manager.py           | 77 ++++++++++++++++--
 server/websocket.py                          | 27 ++++++
 ui/package-lock.json                         | 14 ++++
 ui/src/components/AgentControl.tsx           | 86 ++++++++++++++++----
 ui/src/components/OrchestratorStatusCard.tsx |  8 ++
 ui/src/hooks/useProjects.ts                  | 22 +++++
 ui/src/lib/api.ts                            | 12 +++
 ui/src/lib/types.ts                          |  4 +-
 12 files changed, 311 insertions(+), 24 deletions(-)

diff --git a/autoforge_paths.py b/autoforge_paths.py
index 8283a9b..c782f2c 100644
--- a/autoforge_paths.py
+++ b/autoforge_paths.py
@@ -39,6 +39,7 @@ assistant.db-wal
 assistant.db-shm
 .agent.lock
 .devserver.lock
+.pause_drain
 .claude_settings.json
 .claude_assistant_settings.json
 .claude_settings.expand.*.json
@@ -145,6 +146,15 @@ def get_claude_assistant_settings_path(project_dir: Path) -> Path:
     return _resolve_path(project_dir, ".claude_assistant_settings.json")
 
 
+def get_pause_drain_path(project_dir: Path) -> Path:
+    """Return the path to the ``.pause_drain`` signal file.
+
+    This file is created to request a graceful pause (drain mode).
+    Always uses the new location since it's a transient signal file.
+    """
+    return project_dir / ".autoforge" / ".pause_drain"
+
+
 def get_progress_cache_path(project_dir: Path) -> Path:
     """Resolve the path to ``.progress_cache``."""
     return _resolve_path(project_dir, ".progress_cache")
diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py
index 856e33c..5112b4a 100644
--- a/parallel_orchestrator.py
+++ b/parallel_orchestrator.py
@@ -212,6 +212,9 @@ class ParallelOrchestrator:
         # Signal handlers only set this flag; cleanup happens in the main loop
         self._shutdown_requested = False
 
+        # Graceful pause (drain mode) flag
+        self._drain_requested = False
+
         # Session tracking for logging/debugging
         self.session_start_time: datetime | None = None
 
@@ -1368,6 +1371,9 @@ class ParallelOrchestrator:
         # Must happen before any debug_log.log() calls
         debug_log.start_session()
 
+        # Clear any stale drain signal from a previous session
+        self._clear_drain_signal()
+
         # Log startup to debug file
         debug_log.section("ORCHESTRATOR STARTUP")
         debug_log.log("STARTUP", "Orchestrator run_loop starting",
@@ -1489,6 +1495,34 @@ class ParallelOrchestrator:
                     print("\nAll features complete!", flush=True)
                     break
 
+                # --- Graceful pause (drain mode) ---
+                if not self._drain_requested and self._check_drain_signal():
+                    self._drain_requested = True
+                    print("Graceful pause requested - draining running agents...", flush=True)
+                    debug_log.log("DRAIN", "Graceful pause requested, draining running agents")
+
+                if self._drain_requested:
+                    with self._lock:
+                        coding_count = len(self.running_coding_agents)
+                        testing_count = len(self.running_testing_agents)
+
+                    if coding_count == 0 and testing_count == 0:
+                        print("All agents drained - paused.", flush=True)
+                        debug_log.log("DRAIN", "All agents drained, entering paused state")
+                        # Wait until signal file is removed (resume) or shutdown
+                        while self._check_drain_signal() and self.is_running and not self._shutdown_requested:
+                            await asyncio.sleep(1)
+                        if not self.is_running or self._shutdown_requested:
+                            break
+                        self._drain_requested = False
+                        print("Resuming from graceful pause...", flush=True)
+                        debug_log.log("DRAIN", "Resuming from graceful pause")
+                        continue
+                    else:
+                        debug_log.log("DRAIN", f"Waiting for agents to finish: coding={coding_count}, testing={testing_count}")
+                        await self._wait_for_agent_completion()
+                        continue
+
                 # Maintain testing agents independently (runs every iteration)
                 self._maintain_testing_agents(feature_dicts)
 
@@ -1613,6 +1647,17 @@ class ParallelOrchestrator:
                 "yolo_mode": self.yolo_mode,
             }
 
+    def _check_drain_signal(self) -> bool:
+        """Check if the graceful pause (drain) signal file exists."""
+        from autoforge_paths import get_pause_drain_path
+        return get_pause_drain_path(self.project_dir).exists()
+
+    def _clear_drain_signal(self) -> None:
+        """Delete the drain signal file and reset the flag."""
+        from autoforge_paths import get_pause_drain_path
+        get_pause_drain_path(self.project_dir).unlink(missing_ok=True)
+        self._drain_requested = False
+
     def cleanup(self) -> None:
         """Clean up database resources. Safe to call multiple times.
 
diff --git a/server/routers/agent.py b/server/routers/agent.py
index 26605e4..ea96166 100644
--- a/server/routers/agent.py
+++ b/server/routers/agent.py
@@ -175,3 +175,31 @@ async def resume_agent(project_name: str):
         status=manager.status,
         message=message,
     )
+
+
+@router.post("/graceful-pause", response_model=AgentActionResponse)
+async def graceful_pause_agent(project_name: str):
+    """Request a graceful pause (drain mode) - finish current work then pause."""
+    manager = get_project_manager(project_name)
+
+    success, message = await manager.graceful_pause()
+
+    return AgentActionResponse(
+        success=success,
+        status=manager.status,
+        message=message,
+    )
+
+
+@router.post("/graceful-resume", response_model=AgentActionResponse)
+async def graceful_resume_agent(project_name: str):
+    """Resume from a graceful pause."""
+    manager = get_project_manager(project_name)
+
+    success, message = await manager.graceful_resume()
+
+    return AgentActionResponse(
+        success=success,
+        status=manager.status,
+        message=message,
+    )
diff --git a/server/schemas.py b/server/schemas.py
index 5f546e2..d470d49 100644
--- a/server/schemas.py
+++ b/server/schemas.py
@@ -217,7 +217,7 @@ class AgentStartRequest(BaseModel):
 
 class AgentStatus(BaseModel):
     """Current agent status."""
-    status: Literal["stopped", "running", "paused", "crashed"]
+    status: Literal["stopped", "running", "paused", "crashed", "pausing", "paused_graceful"]
     pid: int | None = None
     started_at: datetime | None = None
     yolo_mode: bool = False
diff --git a/server/services/process_manager.py b/server/services/process_manager.py
index d38d900..0af7cba 100644
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -77,7 +77,7 @@ class AgentProcessManager:
         self.project_dir = project_dir
         self.root_dir = root_dir
         self.process: subprocess.Popen | None = None
-        self._status: Literal["stopped", "running", "paused", "crashed"] = "stopped"
+        self._status: Literal["stopped", "running", "paused", "crashed", "pausing", "paused_graceful"] = "stopped"
         self.started_at: datetime | None = None
         self._output_task: asyncio.Task | None = None
         self.yolo_mode: bool = False  # YOLO mode for rapid prototyping
@@ -96,11 +96,11 @@ class AgentProcessManager:
         self.lock_file = get_agent_lock_path(self.project_dir)
 
     @property
-    def status(self) -> Literal["stopped", "running", "paused", "crashed"]:
+    def status(self) -> Literal["stopped", "running", "paused", "crashed", "pausing", "paused_graceful"]:
         return self._status
 
     @status.setter
-    def status(self, value: Literal["stopped", "running", "paused", "crashed"]):
+    def status(self, value: Literal["stopped", "running", "paused", "crashed", "pausing", "paused_graceful"]):
         old_status = self._status
         self._status = value
         if old_status != value:
@@ -308,6 +308,12 @@ class AgentProcessManager:
                     for help_line in AUTH_ERROR_HELP.strip().split('\n'):
                         await self._broadcast_output(help_line)
 
+                # Detect graceful pause status transitions from orchestrator output
+                if "All agents drained - paused." in decoded:
+                    self.status = "paused_graceful"
+                elif "Resuming from graceful pause..." in decoded:
+                    self.status = "running"
+
                 await self._broadcast_output(sanitized)
 
         except asyncio.CancelledError:
@@ -355,7 +361,7 @@ class AgentProcessManager:
         Returns:
             Tuple of (success, message)
         """
-        if self.status in ("running", "paused"):
+        if self.status in ("running", "paused", "pausing", "paused_graceful"):
             return False, f"Agent is already {self.status}"
 
         if not self._check_lock():
@@ -481,6 +487,12 @@ class AgentProcessManager:
 
             self._cleanup_stale_features()
             self._remove_lock()
+            # Clean up drain signal file if present
+            try:
+                from autoforge_paths import get_pause_drain_path
+                get_pause_drain_path(self.project_dir).unlink(missing_ok=True)
+            except Exception:
+                pass
             self.status = "stopped"
             self.process = None
             self.started_at = None
@@ -541,6 +553,47 @@ class AgentProcessManager:
             logger.exception("Failed to resume agent")
             return False, f"Failed to resume agent: {e}"
 
+    async def graceful_pause(self) -> tuple[bool, str]:
+        """Request a graceful pause (drain mode).
+
+        Creates a signal file that the orchestrator polls. Running agents
+        finish their current work before the orchestrator enters a paused state.
+
+        Returns:
+            Tuple of (success, message)
+        """
+        if not self.process or self.status not in ("running",):
+            return False, "Agent is not running"
+
+        try:
+            from autoforge_paths import get_pause_drain_path
+            drain_path = get_pause_drain_path(self.project_dir)
+            drain_path.parent.mkdir(parents=True, exist_ok=True)
+            drain_path.write_text(str(self.process.pid))
+            self.status = "pausing"
+            return True, "Graceful pause requested"
+        except Exception as e:
+            logger.exception("Failed to request graceful pause")
+            return False, f"Failed to request graceful pause: {e}"
+
+    async def graceful_resume(self) -> tuple[bool, str]:
+        """Resume from a graceful pause by removing the drain signal file.
+
+        Returns:
+            Tuple of (success, message)
+        """
+        if not self.process or self.status not in ("pausing", "paused_graceful"):
+            return False, "Agent is not in a graceful pause state"
+
+        try:
+            from autoforge_paths import get_pause_drain_path
+            get_pause_drain_path(self.project_dir).unlink(missing_ok=True)
+            self.status = "running"
+            return True, "Agent resumed from graceful pause"
+        except Exception as e:
+            logger.exception("Failed to resume from graceful pause")
+            return False, f"Failed to resume: {e}"
+
     async def healthcheck(self) -> bool:
         """
         Check if the agent process is still alive.
@@ -556,8 +609,14 @@ class AgentProcessManager:
         poll = self.process.poll()
         if poll is not None:
             # Process has terminated
-            if self.status in ("running", "paused"):
+            if self.status in ("running", "paused", "pausing", "paused_graceful"):
                 self._cleanup_stale_features()
+                # Clean up drain signal file if present
+                try:
+                    from autoforge_paths import get_pause_drain_path
+                    get_pause_drain_path(self.project_dir).unlink(missing_ok=True)
+                except Exception:
+                    pass
                 self.status = "crashed"
                 self._remove_lock()
             return False
@@ -642,8 +701,14 @@ def cleanup_orphaned_locks() -> int:
             if not project_path.exists():
                 continue
 
+            # Clean up stale drain signal files
+            from autoforge_paths import get_autoforge_dir, get_pause_drain_path
+            drain_file = get_pause_drain_path(project_path)
+            if drain_file.exists():
+                drain_file.unlink(missing_ok=True)
+                logger.info("Removed stale drain signal file for project '%s'", name)
+
             # Check both legacy and new locations for lock files
-            from autoforge_paths import get_autoforge_dir
             lock_locations = [
                 project_path / ".agent.lock",
                 get_autoforge_dir(project_path) / ".agent.lock",
diff --git a/server/websocket.py b/server/websocket.py
index e660064..ef57bf0 100644
--- a/server/websocket.py
+++ b/server/websocket.py
@@ -78,6 +78,9 @@ ORCHESTRATOR_PATTERNS = {
     'testing_complete': re.compile(r'Feature #(\d+) testing (completed|failed)'),
     'all_complete': re.compile(r'All features complete'),
     'blocked_features': re.compile(r'(\d+) blocked by dependencies'),
+    'drain_start': re.compile(r'Graceful pause requested'),
+    'drain_complete': re.compile(r'All agents drained'),
+    'drain_resume': re.compile(r'Resuming from graceful pause'),
 }
 
 
@@ -562,6 +565,30 @@ class OrchestratorTracker:
                     'All features complete!'
                 )
 
+            # Graceful pause (drain mode) events
+            elif ORCHESTRATOR_PATTERNS['drain_start'].search(line):
+                self.state = 'draining'
+                update = self._create_update(
+                    'drain_start',
+                    'Draining active agents...'
+                )
+
+            elif ORCHESTRATOR_PATTERNS['drain_complete'].search(line):
+                self.state = 'paused'
+                self.coding_agents = 0
+                self.testing_agents = 0
+                update = self._create_update(
+                    'drain_complete',
+                    'All agents drained. Paused.'
+                )
+
+            elif ORCHESTRATOR_PATTERNS['drain_resume'].search(line):
+                self.state = 'scheduling'
+                update = self._create_update(
+                    'drain_resume',
+                    'Resuming feature scheduling'
+                )
+
             return update
 
     def _create_update(
diff --git a/ui/package-lock.json b/ui/package-lock.json
index 56a3de2..6c11fce 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -96,6 +96,7 @@
       "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.27.1",
         "@babel/generator": "^7.28.5",
@@ -2825,6 +2826,7 @@
       "integrity": "sha512-MciR4AKGHWl7xwxkBa6xUGxQJ4VBOmPTF7sL+iGzuahOFaO0jHCsuEfS80pan1ef4gWId1oWOweIhrDEYLuaOw==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "undici-types": "~6.21.0"
       }
@@ -2834,6 +2836,7 @@
       "resolved": "https://registry.npmjs.org/@types/react/-/react-19.2.9.tgz",
       "integrity": "sha512-Lpo8kgb/igvMIPeNV2rsYKTgaORYdO1XGVZ4Qz3akwOj0ySGYMPlQWa8BaLn0G63D1aSaAQ5ldR06wCpChQCjA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "csstype": "^3.2.2"
       }
@@ -2844,6 +2847,7 @@
       "integrity": "sha512-jp2L/eY6fn+KgVVQAOqYItbF0VY/YApe5Mz2F0aykSO8gx31bYCZyvSeYxCHKvzHG5eZjc+zyaS5BrBWya2+kQ==",
       "devOptional": true,
       "license": "MIT",
+      "peer": true,
       "peerDependencies": {
         "@types/react": "^19.2.0"
       }
@@ -2899,6 +2903,7 @@
       "integrity": "sha512-3xP4XzzDNQOIqBMWogftkwxhg5oMKApqY0BAflmLZiFYHqyhSOxv/cd/zPQLTcCXr4AkaKb25joocY0BD1WC6A==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.51.0",
         "@typescript-eslint/types": "8.51.0",
@@ -3209,6 +3214,7 @@
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -3340,6 +3346,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.9.0",
         "caniuse-lite": "^1.0.30001759",
@@ -3611,6 +3618,7 @@
       "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
       "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
       "license": "ISC",
+      "peer": true,
       "engines": {
         "node": ">=12"
       }
@@ -3836,6 +3844,7 @@
       "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -5836,6 +5845,7 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -5951,6 +5961,7 @@
       "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz",
       "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -5960,6 +5971,7 @@
       "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-19.2.3.tgz",
       "integrity": "sha512-yELu4WmLPw5Mr/lmeEpox5rw3RETacE++JgHqQzd2dg+YbJuat3jH4ingc+WPZhxaoFzdv9y33G+F7Nl5O0GBg==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "scheduler": "^0.27.0"
       },
@@ -6424,6 +6436,7 @@
       "integrity": "sha512-84MVSjMEHP+FQRPy3pX9sTVV/INIex71s9TL2Gm5FG/WG1SqXeKyZ0k7/blY/4FdOzI12CBy1vGc4og/eus0fw==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -6677,6 +6690,7 @@
       "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==",
       "dev": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "esbuild": "^0.27.0",
         "fdir": "^6.5.0",
diff --git a/ui/src/components/AgentControl.tsx b/ui/src/components/AgentControl.tsx
index 3529c03..7dc7e0a 100644
--- a/ui/src/components/AgentControl.tsx
+++ b/ui/src/components/AgentControl.tsx
@@ -1,8 +1,10 @@
 import { useState, useEffect, useRef, useCallback } from 'react'
-import { Play, Square, Loader2, GitBranch, Clock } from 'lucide-react'
+import { Play, Square, Loader2, GitBranch, Clock, Pause, PlayCircle } from 'lucide-react'
 import {
   useStartAgent,
   useStopAgent,
+  useGracefulPauseAgent,
+  useGracefulResumeAgent,
   useSettings,
   useUpdateProjectSettings,
 } from '../hooks/useProjects'
@@ -60,12 +62,14 @@ export function AgentControl({ projectName, status, defaultConcurrency = 3 }: Ag
 
   const startAgent = useStartAgent(projectName)
   const stopAgent = useStopAgent(projectName)
+  const gracefulPause = useGracefulPauseAgent(projectName)
+  const gracefulResume = useGracefulResumeAgent(projectName)
   const { data: nextRun } = useNextScheduledRun(projectName)
 
   const [showScheduleModal, setShowScheduleModal] = useState(false)
 
-  const isLoading = startAgent.isPending || stopAgent.isPending
-  const isRunning = status === 'running' || status === 'paused'
+  const isLoading = startAgent.isPending || stopAgent.isPending || gracefulPause.isPending || gracefulResume.isPending
+  const isRunning = status === 'running' || status === 'paused' || status === 'pausing' || status === 'paused_graceful'
   const isLoadingStatus = status === 'loading'
   const isParallel = concurrency > 1
 
@@ -126,7 +130,7 @@ export function AgentControl({ projectName, status, defaultConcurrency = 3 }: Ag
           </Badge>
         )}
 
-        {/* Start/Stop button */}
+        {/* Start/Stop/Pause/Resume buttons */}
         {isLoadingStatus ? (
           <Button disabled variant="outline" size="sm">
             <Loader2 size={18} className="animate-spin" />
@@ -146,19 +150,69 @@ export function AgentControl({ projectName, status, defaultConcurrency = 3 }: Ag
             )}
           </Button>
         ) : (
-          <Button
-            onClick={handleStop}
-            disabled={isLoading}
-            variant="destructive"
-            size="sm"
-            title={yoloMode ? 'Stop Agent (YOLO Mode)' : 'Stop Agent'}
-          >
-            {isLoading ? (
-              <Loader2 size={18} className="animate-spin" />
-            ) : (
-              <Square size={18} />
+          <div className="flex items-center gap-1.5">
+            {/* Pausing indicator */}
+            {status === 'pausing' && (
+              <Badge variant="secondary" className="gap-1 animate-pulse">
+                <Loader2 size={12} className="animate-spin" />
+                Pausing...
+              </Badge>
             )}
-          </Button>
+
+            {/* Paused indicator + Resume button */}
+            {status === 'paused_graceful' && (
+              <>
+                <Badge variant="outline" className="gap-1">
+                  Paused
+                </Badge>
+                <Button
+                  onClick={() => gracefulResume.mutate()}
+                  disabled={isLoading}
+                  variant="default"
+                  size="sm"
+                  title="Resume agent"
+                >
+                  {gracefulResume.isPending ? (
+                    <Loader2 size={18} className="animate-spin" />
+                  ) : (
+                    <PlayCircle size={18} />
+                  )}
+                </Button>
+              </>
+            )}
+
+            {/* Graceful pause button (only when running normally) */}
+            {status === 'running' && (
+              <Button
+                onClick={() => gracefulPause.mutate()}
+                disabled={isLoading}
+                variant="outline"
+                size="sm"
+                title="Pause agent (finish current work first)"
+              >
+                {gracefulPause.isPending ? (
+                  <Loader2 size={18} className="animate-spin" />
+                ) : (
+                  <Pause size={18} />
+                )}
+              </Button>
+            )}
+
+            {/* Stop button (always available) */}
+            <Button
+              onClick={handleStop}
+              disabled={isLoading}
+              variant="destructive"
+              size="sm"
+              title="Stop Agent (immediate)"
+            >
+              {stopAgent.isPending ? (
+                <Loader2 size={18} className="animate-spin" />
+              ) : (
+                <Square size={18} />
+              )}
+            </Button>
+          </div>
         )}
 
         {/* Clock button to open schedule modal */}
diff --git a/ui/src/components/OrchestratorStatusCard.tsx b/ui/src/components/OrchestratorStatusCard.tsx
index dedeaa9..860abfd 100644
--- a/ui/src/components/OrchestratorStatusCard.tsx
+++ b/ui/src/components/OrchestratorStatusCard.tsx
@@ -25,6 +25,10 @@ function getStateText(state: OrchestratorState): string {
       return 'Watching progress...'
     case 'complete':
       return 'Mission accomplished!'
+    case 'draining':
+      return 'Draining agents...'
+    case 'paused':
+      return 'Paused'
     default:
       return 'Orchestrating...'
   }
@@ -42,6 +46,10 @@ function getStateColor(state: OrchestratorState): string {
       return 'text-primary'
     case 'initializing':
       return 'text-yellow-600 dark:text-yellow-400'
+    case 'draining':
+      return 'text-amber-600 dark:text-amber-400'
+    case 'paused':
+      return 'text-muted-foreground'
     default:
       return 'text-muted-foreground'
   }
diff --git a/ui/src/hooks/useProjects.ts b/ui/src/hooks/useProjects.ts
index f69d90f..4ed4436 100644
--- a/ui/src/hooks/useProjects.ts
+++ b/ui/src/hooks/useProjects.ts
@@ -197,6 +197,28 @@ export function useResumeAgent(projectName: string) {
   })
 }
 
+export function useGracefulPauseAgent(projectName: string) {
+  const queryClient = useQueryClient()
+
+  return useMutation({
+    mutationFn: () => api.gracefulPauseAgent(projectName),
+    onSuccess: () => {
+      queryClient.invalidateQueries({ queryKey: ['agent-status', projectName] })
+    },
+  })
+}
+
+export function useGracefulResumeAgent(projectName: string) {
+  const queryClient = useQueryClient()
+
+  return useMutation({
+    mutationFn: () => api.gracefulResumeAgent(projectName),
+    onSuccess: () => {
+      queryClient.invalidateQueries({ queryKey: ['agent-status', projectName] })
+    },
+  })
+}
+
 // ============================================================================
 // Setup
 // ============================================================================
diff --git a/ui/src/lib/api.ts b/ui/src/lib/api.ts
index 23e9973..739d5ff 100644
--- a/ui/src/lib/api.ts
+++ b/ui/src/lib/api.ts
@@ -271,6 +271,18 @@ export async function resumeAgent(projectName: string): Promise<AgentActionRespo
   })
 }
 
+export async function gracefulPauseAgent(projectName: string): Promise<AgentActionResponse> {
+  return fetchJSON(`/projects/${encodeURIComponent(projectName)}/agent/graceful-pause`, {
+    method: 'POST',
+  })
+}
+
+export async function gracefulResumeAgent(projectName: string): Promise<AgentActionResponse> {
+  return fetchJSON(`/projects/${encodeURIComponent(projectName)}/agent/graceful-resume`, {
+    method: 'POST',
+  })
+}
+
 // ============================================================================
 // Spec Creation API
 // ============================================================================
diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts
index ba8eab9..5a82a63 100644
--- a/ui/src/lib/types.ts
+++ b/ui/src/lib/types.ts
@@ -120,7 +120,7 @@ export interface FeatureUpdate {
 }
 
 // Agent types
-export type AgentStatus = 'stopped' | 'running' | 'paused' | 'crashed' | 'loading'
+export type AgentStatus = 'stopped' | 'running' | 'paused' | 'crashed' | 'loading' | 'pausing' | 'paused_graceful'
 
 export interface AgentStatusResponse {
   status: AgentStatus
@@ -216,6 +216,8 @@ export type OrchestratorState =
   | 'spawning'
   | 'monitoring'
   | 'complete'
+  | 'draining'
+  | 'paused'
 
 // Orchestrator event for recent activity
 export interface OrchestratorEvent {

From f87970daca85ab268284de9cb36911bbc10f7cc7 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Mon, 9 Feb 2026 08:54:52 +0200
Subject: [PATCH 02/14] fix: prevent temp file accumulation during long agent
 runs

Address three issues reported after overnight AutoForge runs:
1. ~193GB of .node files in %TEMP% from V8 compile caching
2. Stale npm artifact folders on drive root when %TEMP% fills up
3. PNG screenshot files left in project root by Playwright

Changes:
- Widen .node cleanup glob from ".78912*.node" to ".[0-9a-f]*.node"
  to match all V8 compile cache hex prefixes
- Add "node-compile-cache" directory to temp cleanup patterns
- Set NODE_COMPILE_CACHE="" in all subprocess environments (client.py,
  parallel_orchestrator.py, process_manager.py) to disable V8 compile
  caching at the source
- Add cleanup_project_screenshots() to remove stale .png files from
  project directories (feature*-*.png, screenshot-*.png, step-*.png)
- Run cleanup_stale_temp() at server startup in lifespan()
- Add _run_inter_session_cleanup() to orchestrator, called after each
  agent completes (both coding and testing paths)
- Update coding and testing prompt templates to instruct agents to use
  inline (base64) screenshots only, never saving files to disk

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .claude/templates/coding_prompt.template.md  |  6 ++-
 .claude/templates/testing_prompt.template.md |  8 +--
 client.py                                    |  3 ++
 parallel_orchestrator.py                     | 25 ++++++++--
 server/main.py                               | 11 +++++
 server/services/process_manager.py           |  1 +
 temp_cleanup.py                              | 51 +++++++++++++++++++-
 7 files changed, 94 insertions(+), 11 deletions(-)

diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md
index c8d3ba6..65243d8 100644
--- a/.claude/templates/coding_prompt.template.md
+++ b/.claude/templates/coding_prompt.template.md
@@ -90,13 +90,13 @@ Use browser automation tools:
 
 - Navigate to the app in a real browser
 - Interact like a human user (click, type, scroll)
-- Take screenshots at each step
+- Take screenshots at each step (use inline screenshots only -- do NOT save screenshot files to disk)
 - Verify both functionality AND visual appearance
 
 **DO:**
 
 - Test through the UI with clicks and keyboard input
-- Take screenshots to verify visual appearance
+- Take screenshots to verify visual appearance (inline only, never save to disk)
 - Check for console errors in browser
 - Verify complete user workflows end-to-end
 
@@ -194,6 +194,8 @@ Before context fills up:
 
 Use Playwright MCP tools (`browser_*`) for UI verification. Key tools: `navigate`, `click`, `type`, `fill_form`, `take_screenshot`, `console_messages`, `network_requests`. All tools have auto-wait built in.
 
+**Screenshot rule:** Always use inline mode (base64). NEVER save screenshots as files to disk.
+
 Test like a human user with mouse and keyboard. Use `browser_console_messages` to detect errors. Don't bypass UI with JavaScript evaluation.
 
 ---
diff --git a/.claude/templates/testing_prompt.template.md b/.claude/templates/testing_prompt.template.md
index c8011a3..3714d47 100644
--- a/.claude/templates/testing_prompt.template.md
+++ b/.claude/templates/testing_prompt.template.md
@@ -31,14 +31,14 @@ For the feature returned:
 1. Read and understand the feature's verification steps
 2. Navigate to the relevant part of the application
 3. Execute each verification step using browser automation
-4. Take screenshots to document the verification
+4. Take screenshots to document the verification (inline only -- do NOT save to disk)
 5. Check for console errors
 
 Use browser automation tools:
 
 **Navigation & Screenshots:**
 - browser_navigate - Navigate to a URL
-- browser_take_screenshot - Capture screenshot (use for visual verification)
+- browser_take_screenshot - Capture screenshot (inline mode only -- never save to disk)
 - browser_snapshot - Get accessibility tree snapshot
 
 **Element Interaction:**
@@ -79,7 +79,7 @@ A regression has been introduced. You MUST fix it:
 
 4. **Verify the fix:**
    - Run through all verification steps again
-   - Take screenshots confirming the fix
+   - Take screenshots confirming the fix (inline only, never save to disk)
 
 5. **Mark as passing after fix:**
    ```
@@ -110,7 +110,7 @@ A regression has been introduced. You MUST fix it:
 All interaction tools have **built-in auto-wait** -- no manual timeouts needed.
 
 - `browser_navigate` - Navigate to URL
-- `browser_take_screenshot` - Capture screenshot
+- `browser_take_screenshot` - Capture screenshot (inline only, never save to disk)
 - `browser_snapshot` - Get accessibility tree
 - `browser_click` - Click elements
 - `browser_type` - Type text
diff --git a/client.py b/client.py
index a81a66d..4d06816 100644
--- a/client.py
+++ b/client.py
@@ -446,6 +446,9 @@ def create_client(
         mcp_servers["playwright"] = {
             "command": "npx",
             "args": playwright_args,
+            "env": {
+                "NODE_COMPILE_CACHE": "",  # Disable V8 compile caching to prevent .node file accumulation in %TEMP%
+            },
         }
 
     # Build environment overrides for API endpoint configuration
diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py
index 856e33c..fc7fe7a 100644
--- a/parallel_orchestrator.py
+++ b/parallel_orchestrator.py
@@ -846,7 +846,7 @@ class ParallelOrchestrator:
                 "encoding": "utf-8",
                 "errors": "replace",
                 "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
             }
             if sys.platform == "win32":
                 popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -909,7 +909,7 @@ class ParallelOrchestrator:
                 "encoding": "utf-8",
                 "errors": "replace",
                 "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
             }
             if sys.platform == "win32":
                 popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1013,7 +1013,7 @@ class ParallelOrchestrator:
                     "encoding": "utf-8",
                     "errors": "replace",
                     "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                    "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+                    "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
                 }
                 if sys.platform == "win32":
                     popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1074,7 +1074,7 @@ class ParallelOrchestrator:
             "encoding": "utf-8",
             "errors": "replace",
             "cwd": str(AUTOFORGE_ROOT),
-            "env": {**os.environ, "PYTHONUNBUFFERED": "1"},
+            "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
         }
         if sys.platform == "win32":
             popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1160,6 +1160,19 @@ class ParallelOrchestrator:
                 debug_log.log("CLEANUP", f"Error killing process tree for {agent_type} agent", error=str(e))
             self._on_agent_complete(feature_id, proc.returncode, agent_type, proc)
 
+    def _run_inter_session_cleanup(self):
+        """Run lightweight cleanup between agent sessions.
+
+        Removes stale temp files and project screenshots to prevent
+        disk space accumulation during long overnight runs.
+        """
+        try:
+            from temp_cleanup import cleanup_project_screenshots, cleanup_stale_temp
+            cleanup_stale_temp()
+            cleanup_project_screenshots(self.project_dir)
+        except Exception as e:
+            debug_log.log("CLEANUP", f"Inter-session cleanup failed (non-fatal): {e}")
+
     def _signal_agent_completed(self):
         """Signal that an agent has completed, waking the main loop.
 
@@ -1235,6 +1248,8 @@ class ParallelOrchestrator:
                 pid=proc.pid,
                 feature_id=feature_id,
                 status=status)
+            # Run lightweight cleanup between sessions
+            self._run_inter_session_cleanup()
             # Signal main loop that an agent slot is available
             self._signal_agent_completed()
             return
@@ -1301,6 +1316,8 @@ class ParallelOrchestrator:
         else:
             print(f"Feature #{feature_id} {status}", flush=True)
 
+        # Run lightweight cleanup between sessions
+        self._run_inter_session_cleanup()
         # Signal main loop that an agent slot is available
         self._signal_agent_completed()
 
diff --git a/server/main.py b/server/main.py
index 33fd348..20ccd0b 100644
--- a/server/main.py
+++ b/server/main.py
@@ -61,6 +61,17 @@ UI_DIST_DIR = ROOT_DIR / "ui" / "dist"
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Lifespan context manager for startup and shutdown."""
+    # Startup - clean up stale temp files (Playwright profiles, .node cache, etc.)
+    try:
+        from temp_cleanup import cleanup_stale_temp
+        stats = cleanup_stale_temp()
+        if stats["dirs_deleted"] > 0 or stats["files_deleted"] > 0:
+            mb_freed = stats["bytes_freed"] / (1024 * 1024)
+            logger.info("Startup temp cleanup: %d dirs, %d files, %.1f MB freed",
+                        stats["dirs_deleted"], stats["files_deleted"], mb_freed)
+    except Exception as e:
+        logger.warning("Startup temp cleanup failed (non-fatal): %s", e)
+
     # Startup - clean up orphaned lock files from previous runs
     cleanup_orphaned_locks()
     cleanup_orphaned_devserver_locks()
diff --git a/server/services/process_manager.py b/server/services/process_manager.py
index d38d900..9a4bd5c 100644
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -410,6 +410,7 @@ class AgentProcessManager:
                 **os.environ,
                 "PYTHONUNBUFFERED": "1",
                 "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false",
+                "NODE_COMPILE_CACHE": "",  # Disable V8 compile caching to prevent .node file accumulation in %TEMP%
                 **api_env,
             }
 
diff --git a/temp_cleanup.py b/temp_cleanup.py
index 59e53ef..5cfda06 100644
--- a/temp_cleanup.py
+++ b/temp_cleanup.py
@@ -37,11 +37,12 @@ DIR_PATTERNS = [
     "mongodb-memory-server*",           # MongoDB Memory Server binaries
     "ng-*",                             # Angular CLI temp directories
     "scoped_dir*",                      # Chrome/Chromium temp directories
+    "node-compile-cache",               # Node.js V8 compile cache directory
 ]
 
 # File patterns to clean up (glob patterns)
 FILE_PATTERNS = [
-    ".78912*.node",   # Node.js native module cache (major space consumer, ~7MB each)
+    ".[0-9a-f]*.node",   # Node.js/V8 compile cache files (~7MB each, varying hex prefixes)
     "claude-*-cwd",   # Claude CLI working directory temp files
     "mat-debug-*.log",  # Material/Angular debug logs
 ]
@@ -122,6 +123,54 @@ def cleanup_stale_temp(max_age_seconds: int = MAX_AGE_SECONDS) -> dict:
     return stats
 
 
+def cleanup_project_screenshots(project_dir: Path, max_age_seconds: int = 300) -> dict:
+    """
+    Clean up stale screenshot files from the project root.
+
+    Playwright browser verification can leave .png files in the project
+    directory. This removes them after they've aged out (default 5 minutes).
+
+    Args:
+        project_dir: Path to the project directory.
+        max_age_seconds: Maximum age in seconds before a screenshot is deleted.
+                        Defaults to 5 minutes (300 seconds).
+
+    Returns:
+        Dictionary with cleanup statistics (files_deleted, bytes_freed, errors).
+    """
+    cutoff_time = time.time() - max_age_seconds
+    stats: dict = {"files_deleted": 0, "bytes_freed": 0, "errors": []}
+
+    screenshot_patterns = [
+        "feature*-*.png",
+        "screenshot-*.png",
+        "step-*.png",
+    ]
+
+    for pattern in screenshot_patterns:
+        for item in project_dir.glob(pattern):
+            if not item.is_file():
+                continue
+            try:
+                mtime = item.stat().st_mtime
+                if mtime < cutoff_time:
+                    size = item.stat().st_size
+                    item.unlink(missing_ok=True)
+                    if not item.exists():
+                        stats["files_deleted"] += 1
+                        stats["bytes_freed"] += size
+                        logger.debug(f"Deleted project screenshot: {item}")
+            except Exception as e:
+                stats["errors"].append(f"Failed to delete {item}: {e}")
+                logger.debug(f"Failed to delete screenshot {item}: {e}")
+
+    if stats["files_deleted"] > 0:
+        mb_freed = stats["bytes_freed"] / (1024 * 1024)
+        logger.info(f"Screenshot cleanup: {stats['files_deleted']} files, {mb_freed:.1f} MB freed")
+
+    return stats
+
+
 def _get_dir_size(path: Path) -> int:
     """Get total size of a directory in bytes."""
     total = 0

From 859987e3b48150e2f378920933799db00fe74132 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Mon, 9 Feb 2026 08:55:49 +0200
Subject: [PATCH 03/14] 0.1.10

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 92d7e12..f9a47c6 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "autoforge-ai",
-  "version": "0.1.9",
+  "version": "0.1.10",
   "description": "Autonomous coding agent with web UI - build complete apps with AI",
   "license": "AGPL-3.0",
   "bin": {

From 55064945a455d0ef9150e7cf2b24792f0a94cc04 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Mon, 9 Feb 2026 08:56:33 +0200
Subject: [PATCH 04/14] version patch

---
 ui/package-lock.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/package-lock.json b/ui/package-lock.json
index 56a3de2..e19d991 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -56,7 +56,7 @@
     },
     "..": {
       "name": "autoforge-ai",
-      "version": "0.1.9",
+      "version": "0.1.10",
       "license": "AGPL-3.0",
       "bin": {
         "autoforge": "bin/autoforge.js"

From d2b3ba9aeebe3864a9ae6ed8cac6bfa6b1238a0d Mon Sep 17 00:00:00 2001
From: nogataka <nogataka@gmail.com>
Date: Tue, 10 Feb 2026 21:29:05 +0900
Subject: [PATCH 05/14] feat: add Azure Anthropic (Claude) provider support

- Add "Azure Anthropic (Claude)" to API_PROVIDERS in registry.py
  with ANTHROPIC_API_KEY auth (required for Claude CLI to route
  through custom base URL instead of default Anthropic endpoint)
- Add Azure env var template to .env.example
- Show Base URL input field for Azure provider in Settings UI
  with "Configured" state and Azure-specific placeholder
- Widen Settings modal for better readability with long URLs
- Add Azure endpoint detection and "Azure Mode" log label
- Rename misleading "GLM Mode" fallback label to "Alternative API"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .env.example                        |  9 ++++-
 client.py                           |  5 ++-
 registry.py                         | 12 ++++++
 ui/src/components/SettingsModal.tsx | 58 +++++++++++++++++++----------
 4 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/.env.example b/.env.example
index 8458726..ed163ba 100644
--- a/.env.example
+++ b/.env.example
@@ -30,11 +30,18 @@
 # ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-3-5-haiku@20241022
 
 # ===================
-# Alternative API Providers (GLM, Ollama, Kimi, Custom)
+# Alternative API Providers (Azure, GLM, Ollama, Kimi, Custom)
 # ===================
 # Configure via Settings UI (recommended) or set env vars below.
 # When both are set, env vars take precedence.
 #
+# Azure Anthropic (Claude):
+# ANTHROPIC_BASE_URL=https://your-resource.services.ai.azure.com/anthropic
+# ANTHROPIC_API_KEY=your-azure-api-key
+# ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4-6
+# ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4-5
+# ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-haiku-4-5
+#
 # GLM (Zhipu AI):
 # ANTHROPIC_BASE_URL=https://api.z.ai/api/anthropic
 # ANTHROPIC_AUTH_TOKEN=your-glm-api-key
diff --git a/client.py b/client.py
index 4d06816..6e90c9c 100644
--- a/client.py
+++ b/client.py
@@ -463,6 +463,7 @@ def create_client(
     is_vertex = sdk_env.get("CLAUDE_CODE_USE_VERTEX") == "1"
     is_alternative_api = bool(base_url) or is_vertex
     is_ollama = "localhost:11434" in base_url or "127.0.0.1:11434" in base_url
+    is_azure = "services.ai.azure.com" in base_url
     model = convert_model_for_vertex(model)
     if sdk_env:
         print(f"   - API overrides: {', '.join(sdk_env.keys())}")
@@ -472,8 +473,10 @@ def create_client(
             print(f"   - Vertex AI Mode: Using GCP project '{project_id}' with model '{model}' in region '{region}'")
         elif is_ollama:
             print("   - Ollama Mode: Using local models")
+        elif is_azure:
+            print(f"   - Azure Mode: Using {base_url}")
         elif "ANTHROPIC_BASE_URL" in sdk_env:
-            print(f"   - GLM Mode: Using {sdk_env['ANTHROPIC_BASE_URL']}")
+            print(f"   - Alternative API: Using {sdk_env['ANTHROPIC_BASE_URL']}")
 
     # Create a wrapper for bash_security_hook that passes project_dir via context
     async def bash_hook_with_context(input_data, tool_use_id=None, context=None):
diff --git a/registry.py b/registry.py
index 3076519..60aed4e 100644
--- a/registry.py
+++ b/registry.py
@@ -676,6 +676,18 @@ API_PROVIDERS: dict[str, dict[str, Any]] = {
         ],
         "default_model": "glm-4.7",
     },
+    "azure": {
+        "name": "Azure Anthropic (Claude)",
+        "base_url": "",
+        "requires_auth": True,
+        "auth_env_var": "ANTHROPIC_API_KEY",
+        "models": [
+            {"id": "claude-opus-4-6", "name": "Claude Opus"},
+            {"id": "claude-sonnet-4-5", "name": "Claude Sonnet"},
+            {"id": "claude-haiku-4-5", "name": "Claude Haiku"},
+        ],
+        "default_model": "claude-opus-4-6",
+    },
     "ollama": {
         "name": "Ollama (Local)",
         "base_url": "http://localhost:11434",
diff --git a/ui/src/components/SettingsModal.tsx b/ui/src/components/SettingsModal.tsx
index 0a2b9ee..dbd9ed4 100644
--- a/ui/src/components/SettingsModal.tsx
+++ b/ui/src/components/SettingsModal.tsx
@@ -83,8 +83,10 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
   }
 
   const handleSaveCustomBaseUrl = () => {
-    if (customBaseUrlInput.trim() && !updateSettings.isPending) {
-      updateSettings.mutate({ api_base_url: customBaseUrlInput.trim() })
+    const effectiveBaseUrl = customBaseUrlInput || settings?.api_base_url || ''
+    if (effectiveBaseUrl.trim() && !updateSettings.isPending) {
+      updateSettings.mutate({ api_base_url: effectiveBaseUrl.trim() })
+      setCustomBaseUrlInput('')
     }
   }
 
@@ -102,12 +104,12 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
   const currentProviderInfo: ProviderInfo | undefined = providers.find(p => p.id === currentProvider)
   const isAlternativeProvider = currentProvider !== 'claude'
   const showAuthField = isAlternativeProvider && currentProviderInfo?.requires_auth
-  const showBaseUrlField = currentProvider === 'custom'
+  const showBaseUrlField = currentProvider === 'custom' || currentProvider === 'azure'
   const showCustomModelInput = currentProvider === 'custom' || currentProvider === 'ollama'
 
   return (
     <Dialog open={isOpen} onOpenChange={(open) => !open && onClose()}>
-      <DialogContent aria-describedby={undefined} className="sm:max-w-sm max-h-[85vh] overflow-y-auto">
+      <DialogContent aria-describedby={undefined} className="sm:max-w-lg max-h-[90vh] overflow-y-auto">
         <DialogHeader>
           <DialogTitle className="flex items-center gap-2">
             Settings
@@ -289,22 +291,38 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
               {showBaseUrlField && (
                 <div className="space-y-2 pt-1">
                   <Label className="text-sm">Base URL</Label>
-                  <div className="flex gap-2">
-                    <input
-                      type="text"
-                      value={customBaseUrlInput || settings.api_base_url || ''}
-                      onChange={(e) => setCustomBaseUrlInput(e.target.value)}
-                      placeholder="https://api.example.com/v1"
-                      className="flex-1 py-1.5 px-3 text-sm border rounded-md bg-background"
-                    />
-                    <Button
-                      size="sm"
-                      onClick={handleSaveCustomBaseUrl}
-                      disabled={!customBaseUrlInput.trim() || isSaving}
-                    >
-                      Save
-                    </Button>
-                  </div>
+                  {settings.api_base_url && !customBaseUrlInput && (
+                    <div className="flex items-center gap-2 text-sm text-muted-foreground">
+                      <ShieldCheck size={14} className="text-green-500" />
+                      <span className="truncate">{settings.api_base_url}</span>
+                      <Button
+                        variant="ghost"
+                        size="sm"
+                        className="h-auto py-0.5 px-2 text-xs shrink-0"
+                        onClick={() => setCustomBaseUrlInput(settings.api_base_url || ' ')}
+                      >
+                        Change
+                      </Button>
+                    </div>
+                  )}
+                  {(!settings.api_base_url || customBaseUrlInput) && (
+                    <div className="flex gap-2">
+                      <input
+                        type="text"
+                        value={customBaseUrlInput.trim()}
+                        onChange={(e) => setCustomBaseUrlInput(e.target.value)}
+                        placeholder={currentProvider === 'azure' ? 'https://your-resource.services.ai.azure.com/anthropic' : 'https://api.example.com/v1'}
+                        className="flex-1 py-1.5 px-3 text-sm border rounded-md bg-background"
+                      />
+                      <Button
+                        size="sm"
+                        onClick={handleSaveCustomBaseUrl}
+                        disabled={!customBaseUrlInput.trim() || isSaving}
+                      >
+                        Save
+                      </Button>
+                    </div>
+                  )}
                 </div>
               )}
             </div>

From f285db1ad39cbdfad09d981950194c16605d8faf Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Wed, 11 Feb 2026 08:38:53 +0200
Subject: [PATCH 06/14] add paywright cli skill

---
 .claude/skills/playwright-cli/SKILL.md        | 259 +++++++++++++++++
 .../references/request-mocking.md             |  87 ++++++
 .../playwright-cli/references/running-code.md | 232 +++++++++++++++
 .../references/session-management.md          | 169 +++++++++++
 .../references/storage-state.md               | 275 ++++++++++++++++++
 .../references/test-generation.md             |  88 ++++++
 .../playwright-cli/references/tracing.md      | 139 +++++++++
 .../references/video-recording.md             |  43 +++
 8 files changed, 1292 insertions(+)
 create mode 100644 .claude/skills/playwright-cli/SKILL.md
 create mode 100644 .claude/skills/playwright-cli/references/request-mocking.md
 create mode 100644 .claude/skills/playwright-cli/references/running-code.md
 create mode 100644 .claude/skills/playwright-cli/references/session-management.md
 create mode 100644 .claude/skills/playwright-cli/references/storage-state.md
 create mode 100644 .claude/skills/playwright-cli/references/test-generation.md
 create mode 100644 .claude/skills/playwright-cli/references/tracing.md
 create mode 100644 .claude/skills/playwright-cli/references/video-recording.md

diff --git a/.claude/skills/playwright-cli/SKILL.md b/.claude/skills/playwright-cli/SKILL.md
new file mode 100644
index 0000000..29182e7
--- /dev/null
+++ b/.claude/skills/playwright-cli/SKILL.md
@@ -0,0 +1,259 @@
+---
+name: playwright-cli
+description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
+allowed-tools: Bash(playwright-cli:*)
+---
+
+# Browser Automation with playwright-cli
+
+## Quick start
+
+```bash
+# open new browser
+playwright-cli open
+# navigate to a page
+playwright-cli goto https://playwright.dev
+# interact with the page using refs from the snapshot
+playwright-cli click e15
+playwright-cli type "page.click"
+playwright-cli press Enter
+# take a screenshot
+playwright-cli screenshot
+# close the browser
+playwright-cli close
+```
+
+## Commands
+
+### Core
+
+```bash
+playwright-cli open
+# open and navigate right away
+playwright-cli open https://example.com/
+playwright-cli goto https://playwright.dev
+playwright-cli type "search query"
+playwright-cli click e3
+playwright-cli dblclick e7
+playwright-cli fill e5 "user@example.com"
+playwright-cli drag e2 e8
+playwright-cli hover e4
+playwright-cli select e9 "option-value"
+playwright-cli upload ./document.pdf
+playwright-cli check e12
+playwright-cli uncheck e12
+playwright-cli snapshot
+playwright-cli snapshot --filename=after-click.yaml
+playwright-cli eval "document.title"
+playwright-cli eval "el => el.textContent" e5
+playwright-cli dialog-accept
+playwright-cli dialog-accept "confirmation text"
+playwright-cli dialog-dismiss
+playwright-cli resize 1920 1080
+playwright-cli close
+```
+
+### Navigation
+
+```bash
+playwright-cli go-back
+playwright-cli go-forward
+playwright-cli reload
+```
+
+### Keyboard
+
+```bash
+playwright-cli press Enter
+playwright-cli press ArrowDown
+playwright-cli keydown Shift
+playwright-cli keyup Shift
+```
+
+### Mouse
+
+```bash
+playwright-cli mousemove 150 300
+playwright-cli mousedown
+playwright-cli mousedown right
+playwright-cli mouseup
+playwright-cli mouseup right
+playwright-cli mousewheel 0 100
+```
+
+### Save as
+
+```bash
+playwright-cli screenshot
+playwright-cli screenshot e5
+playwright-cli screenshot --filename=page.png
+playwright-cli pdf --filename=page.pdf
+```
+
+### Tabs
+
+```bash
+playwright-cli tab-list
+playwright-cli tab-new
+playwright-cli tab-new https://example.com/page
+playwright-cli tab-close
+playwright-cli tab-close 2
+playwright-cli tab-select 0
+```
+
+### Storage
+
+```bash
+playwright-cli state-save
+playwright-cli state-save auth.json
+playwright-cli state-load auth.json
+
+# Cookies
+playwright-cli cookie-list
+playwright-cli cookie-list --domain=example.com
+playwright-cli cookie-get session_id
+playwright-cli cookie-set session_id abc123
+playwright-cli cookie-set session_id abc123 --domain=example.com --httpOnly --secure
+playwright-cli cookie-delete session_id
+playwright-cli cookie-clear
+
+# LocalStorage
+playwright-cli localstorage-list
+playwright-cli localstorage-get theme
+playwright-cli localstorage-set theme dark
+playwright-cli localstorage-delete theme
+playwright-cli localstorage-clear
+
+# SessionStorage
+playwright-cli sessionstorage-list
+playwright-cli sessionstorage-get step
+playwright-cli sessionstorage-set step 3
+playwright-cli sessionstorage-delete step
+playwright-cli sessionstorage-clear
+```
+
+### Network
+
+```bash
+playwright-cli route "**/*.jpg" --status=404
+playwright-cli route "https://api.example.com/**" --body='{"mock": true}'
+playwright-cli route-list
+playwright-cli unroute "**/*.jpg"
+playwright-cli unroute
+```
+
+### DevTools
+
+```bash
+playwright-cli console
+playwright-cli console warning
+playwright-cli network
+playwright-cli run-code "async page => await page.context().grantPermissions(['geolocation'])"
+playwright-cli tracing-start
+playwright-cli tracing-stop
+playwright-cli video-start
+playwright-cli video-stop video.webm
+```
+
+### Install
+
+```bash
+playwright-cli install --skills
+playwright-cli install-browser
+```
+
+### Configuration
+```bash
+# Use specific browser when creating session
+playwright-cli open --browser=chrome
+playwright-cli open --browser=firefox
+playwright-cli open --browser=webkit
+playwright-cli open --browser=msedge
+# Connect to browser via extension
+playwright-cli open --extension
+
+# Use persistent profile (by default profile is in-memory)
+playwright-cli open --persistent
+# Use persistent profile with custom directory
+playwright-cli open --profile=/path/to/profile
+
+# Start with config file
+playwright-cli open --config=my-config.json
+
+# Close the browser
+playwright-cli close
+# Delete user data for the default session
+playwright-cli delete-data
+```
+
+### Browser Sessions
+
+```bash
+# create new browser session named "mysession" with persistent profile
+playwright-cli -s=mysession open example.com --persistent
+# same with manually specified profile directory (use when requested explicitly)
+playwright-cli -s=mysession open example.com --profile=/path/to/profile
+playwright-cli -s=mysession click e6
+playwright-cli -s=mysession close  # stop a named browser
+playwright-cli -s=mysession delete-data  # delete user data for persistent session
+
+playwright-cli list
+# Close all browsers
+playwright-cli close-all
+# Forcefully kill all browser processes
+playwright-cli kill-all
+```
+
+## Example: Form submission
+
+```bash
+playwright-cli open https://example.com/form
+playwright-cli snapshot
+
+playwright-cli fill e1 "user@example.com"
+playwright-cli fill e2 "password123"
+playwright-cli click e3
+playwright-cli snapshot
+playwright-cli close
+```
+
+## Example: Multi-tab workflow
+
+```bash
+playwright-cli open https://example.com
+playwright-cli tab-new https://example.com/other
+playwright-cli tab-list
+playwright-cli tab-select 0
+playwright-cli snapshot
+playwright-cli close
+```
+
+## Example: Debugging with DevTools
+
+```bash
+playwright-cli open https://example.com
+playwright-cli click e4
+playwright-cli fill e7 "test"
+playwright-cli console
+playwright-cli network
+playwright-cli close
+```
+
+```bash
+playwright-cli open https://example.com
+playwright-cli tracing-start
+playwright-cli click e4
+playwright-cli fill e7 "test"
+playwright-cli tracing-stop
+playwright-cli close
+```
+
+## Specific tasks
+
+* **Request mocking** [references/request-mocking.md](references/request-mocking.md)
+* **Running Playwright code** [references/running-code.md](references/running-code.md)
+* **Browser session management** [references/session-management.md](references/session-management.md)
+* **Storage state (cookies, localStorage)** [references/storage-state.md](references/storage-state.md)
+* **Test generation** [references/test-generation.md](references/test-generation.md)
+* **Tracing** [references/tracing.md](references/tracing.md)
+* **Video recording** [references/video-recording.md](references/video-recording.md)
diff --git a/.claude/skills/playwright-cli/references/request-mocking.md b/.claude/skills/playwright-cli/references/request-mocking.md
new file mode 100644
index 0000000..9005fda
--- /dev/null
+++ b/.claude/skills/playwright-cli/references/request-mocking.md
@@ -0,0 +1,87 @@
+# Request Mocking
+
+Intercept, mock, modify, and block network requests.
+
+## CLI Route Commands
+
+```bash
+# Mock with custom status
+playwright-cli route "**/*.jpg" --status=404
+
+# Mock with JSON body
+playwright-cli route "**/api/users" --body='[{"id":1,"name":"Alice"}]' --content-type=application/json
+
+# Mock with custom headers
+playwright-cli route "**/api/data" --body='{"ok":true}' --header="X-Custom: value"
+
+# Remove headers from requests
+playwright-cli route "**/*" --remove-header=cookie,authorization
+
+# List active routes
+playwright-cli route-list
+
+# Remove a route or all routes
+playwright-cli unroute "**/*.jpg"
+playwright-cli unroute
+```
+
+## URL Patterns
+
+```
+**/api/users           - Exact path match
+**/api/*/details       - Wildcard in path
+**/*.{png,jpg,jpeg}    - Match file extensions
+**/search?q=*          - Match query parameters
+```
+
+## Advanced Mocking with run-code
+
+For conditional responses, request body inspection, response modification, or delays:
+
+### Conditional Response Based on Request
+
+```bash
+playwright-cli run-code "async page => {
+  await page.route('**/api/login', route => {
+    const body = route.request().postDataJSON();
+    if (body.username === 'admin') {
+      route.fulfill({ body: JSON.stringify({ token: 'mock-token' }) });
+    } else {
+      route.fulfill({ status: 401, body: JSON.stringify({ error: 'Invalid' }) });
+    }
+  });
+}"
+```
+
+### Modify Real Response
+
+```bash
+playwright-cli run-code "async page => {
+  await page.route('**/api/user', async route => {
+    const response = await route.fetch();
+    const json = await response.json();
+    json.isPremium = true;
+    await route.fulfill({ response, json });
+  });
+}"
+```
+
+### Simulate Network Failures
+
+```bash
+playwright-cli run-code "async page => {
+  await page.route('**/api/offline', route => route.abort('internetdisconnected'));
+}"
+# Options: connectionrefused, timedout, connectionreset, internetdisconnected
+```
+
+### Delayed Response
+
+```bash
+playwright-cli run-code "async page => {
+  await page.route('**/api/slow', async route => {
+    await new Promise(r => setTimeout(r, 3000));
+    route.fulfill({ body: JSON.stringify({ data: 'loaded' }) });
+  });
+}"
+```
diff --git a/.claude/skills/playwright-cli/references/running-code.md b/.claude/skills/playwright-cli/references/running-code.md
new file mode 100644
index 0000000..7d6d22f
--- /dev/null
+++ b/.claude/skills/playwright-cli/references/running-code.md
@@ -0,0 +1,232 @@
+# Running Custom Playwright Code
+
+Use `run-code` to execute arbitrary Playwright code for advanced scenarios not covered by CLI commands.
+
+## Syntax
+
+```bash
+playwright-cli run-code "async page => {
+  // Your Playwright code here
+  // Access page.context() for browser context operations
+}"
+```
+
+## Geolocation
+
+```bash
+# Grant geolocation permission and set location
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions(['geolocation']);
+  await page.context().setGeolocation({ latitude: 37.7749, longitude: -122.4194 });
+}"
+
+# Set location to London
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions(['geolocation']);
+  await page.context().setGeolocation({ latitude: 51.5074, longitude: -0.1278 });
+}"
+
+# Clear geolocation override
+playwright-cli run-code "async page => {
+  await page.context().clearPermissions();
+}"
+```
+
+## Permissions
+
+```bash
+# Grant multiple permissions
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions([
+    'geolocation',
+    'notifications',
+    'camera',
+    'microphone'
+  ]);
+}"
+
+# Grant permissions for specific origin
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions(['clipboard-read'], {
+    origin: 'https://example.com'
+  });
+}"
+```
+
+## Media Emulation
+
+```bash
+# Emulate dark color scheme
+playwright-cli run-code "async page => {
+  await page.emulateMedia({ colorScheme: 'dark' });
+}"
+
+# Emulate light color scheme
+playwright-cli run-code "async page => {
+  await page.emulateMedia({ colorScheme: 'light' });
+}"
+
+# Emulate reduced motion
+playwright-cli run-code "async page => {
+  await page.emulateMedia({ reducedMotion: 'reduce' });
+}"
+
+# Emulate print media
+playwright-cli run-code "async page => {
+  await page.emulateMedia({ media: 'print' });
+}"
+```
+
+## Wait Strategies
+
+```bash
+# Wait for network idle
+playwright-cli run-code "async page => {
+  await page.waitForLoadState('networkidle');
+}"
+
+# Wait for specific element
+playwright-cli run-code "async page => {
+  await page.waitForSelector('.loading', { state: 'hidden' });
+}"
+
+# Wait for function to return true
+playwright-cli run-code "async page => {
+  await page.waitForFunction(() => window.appReady === true);
+}"
+
+# Wait with timeout
+playwright-cli run-code "async page => {
+  await page.waitForSelector('.result', { timeout: 10000 });
+}"
+```
+
+## Frames and Iframes
+
+```bash
+# Work with iframe
+playwright-cli run-code "async page => {
+  const frame = page.locator('iframe#my-iframe').contentFrame();
+  await frame.locator('button').click();
+}"
+
+# Get all frames
+playwright-cli run-code "async page => {
+  const frames = page.frames();
+  return frames.map(f => f.url());
+}"
+```
+
+## File Downloads
+
+```bash
+# Handle file download
+playwright-cli run-code "async page => {
+  const [download] = await Promise.all([
+    page.waitForEvent('download'),
+    page.click('a.download-link')
+  ]);
+  await download.saveAs('./downloaded-file.pdf');
+  return download.suggestedFilename();
+}"
+```
+
+## Clipboard
+
+```bash
+# Read clipboard (requires permission)
+playwright-cli run-code "async page => {
+  await page.context().grantPermissions(['clipboard-read']);
+  return await page.evaluate(() => navigator.clipboard.readText());
+}"
+
+# Write to clipboard
+playwright-cli run-code "async page => {
+  await page.evaluate(text => navigator.clipboard.writeText(text), 'Hello clipboard!');
+}"
+```
+
+## Page Information
+
+```bash
+# Get page title
+playwright-cli run-code "async page => {
+  return await page.title();
+}"
+
+# Get current URL
+playwright-cli run-code "async page => {
+  return page.url();
+}"
+
+# Get page content
+playwright-cli run-code "async page => {
+  return await page.content();
+}"
+
+# Get viewport size
+playwright-cli run-code "async page => {
+  return page.viewportSize();
+}"
+```
+
+## JavaScript Execution
+
+```bash
+# Execute JavaScript and return result
+playwright-cli run-code "async page => {
+  return await page.evaluate(() => {
+    return {
+      userAgent: navigator.userAgent,
+      language: navigator.language,
+      cookiesEnabled: navigator.cookieEnabled
+    };
+  });
+}"
+
+# Pass arguments to evaluate
+playwright-cli run-code "async page => {
+  const multiplier = 5;
+  return await page.evaluate(m => document.querySelectorAll('li').length * m, multiplier);
+}"
+```
+
+## Error Handling
+
+```bash
+# Try-catch in run-code
+playwright-cli run-code "async page => {
+  try {
+    await page.click('.maybe-missing', { timeout: 1000 });
+    return 'clicked';
+  } catch (e) {
+    return 'element not found';
+  }
+}"
+```
+
+## Complex Workflows
+
+```bash
+# Login and save state
+playwright-cli run-code "async page => {
+  await page.goto('https://example.com/login');
+  await page.fill('input[name=email]', 'user@example.com');
+  await page.fill('input[name=password]', 'secret');
+  await page.click('button[type=submit]');
+  await page.waitForURL('**/dashboard');
+  await page.context().storageState({ path: 'auth.json' });
+  return 'Login successful';
+}"
+
+# Scrape data from multiple pages
+playwright-cli run-code "async page => {
+  const results = [];
+  for (let i = 1; i <= 3; i++) {
+    await page.goto(\`https://example.com/page/\${i}\`);
+    const items = await page.locator('.item').allTextContents();
+    results.push(...items);
+  }
+  return results;
+}"
+```
diff --git a/.claude/skills/playwright-cli/references/session-management.md b/.claude/skills/playwright-cli/references/session-management.md
new file mode 100644
index 0000000..08c8c90
--- /dev/null
+++ b/.claude/skills/playwright-cli/references/session-management.md
@@ -0,0 +1,169 @@
+# Browser Session Management
+
+Run multiple isolated browser sessions concurrently with state persistence.
+
+## Named Browser Sessions
+
+Use `-b` flag to isolate browser contexts:
+
+```bash
+# Browser 1: Authentication flow
+playwright-cli -s=auth open https://app.example.com/login
+
+# Browser 2: Public browsing (separate cookies, storage)
+playwright-cli -s=public open https://example.com
+
+# Commands are isolated by browser session
+playwright-cli -s=auth fill e1 "user@example.com"
+playwright-cli -s=public snapshot
+```
+
+## Browser Session Isolation Properties
+
+Each browser session has independent:
+- Cookies
+- LocalStorage / SessionStorage
+- IndexedDB
+- Cache
+- Browsing history
+- Open tabs
+
+## Browser Session Commands
+
+```bash
+# List all browser sessions
+playwright-cli list
+
+# Stop a browser session (close the browser)
+playwright-cli close                # stop the default browser
+playwright-cli -s=mysession close   # stop a named browser
+
+# Stop all browser sessions
+playwright-cli close-all
+
+# Forcefully kill all daemon processes (for stale/zombie processes)
+playwright-cli kill-all
+
+# Delete browser session user data (profile directory)
+playwright-cli delete-data                # delete default browser data
+playwright-cli -s=mysession delete-data   # delete named browser data
+```
+
+## Environment Variable
+
+Set a default browser session name via environment variable:
+
+```bash
+export PLAYWRIGHT_CLI_SESSION="mysession"
+playwright-cli open example.com  # Uses "mysession" automatically
+```
+
+## Common Patterns
+
+### Concurrent Scraping
+
+```bash
+#!/bin/bash
+# Scrape multiple sites concurrently
+
+# Start all browsers
+playwright-cli -s=site1 open https://site1.com &
+playwright-cli -s=site2 open https://site2.com &
+playwright-cli -s=site3 open https://site3.com &
+wait
+
+# Take snapshots from each
+playwright-cli -s=site1 snapshot
+playwright-cli -s=site2 snapshot
+playwright-cli -s=site3 snapshot
+
+# Cleanup
+playwright-cli close-all
+```
+
+### A/B Testing Sessions
+
+```bash
+# Test different user experiences
+playwright-cli -s=variant-a open "https://app.com?variant=a"
+playwright-cli -s=variant-b open "https://app.com?variant=b"
+
+# Compare
+playwright-cli -s=variant-a screenshot
+playwright-cli -s=variant-b screenshot
+```
+
+### Persistent Profile
+
+By default, browser profile is kept in memory only. Use `--persistent` flag on `open` to persist the browser profile to disk:
+
+```bash
+# Use persistent profile (auto-generated location)
+playwright-cli open https://example.com --persistent
+
+# Use persistent profile with custom directory
+playwright-cli open https://example.com --profile=/path/to/profile
+```
+
+## Default Browser Session
+
+When `-s` is omitted, commands use the default browser session:
+
+```bash
+# These use the same default browser session
+playwright-cli open https://example.com
+playwright-cli snapshot
+playwright-cli close  # Stops default browser
+```
+
+## Browser Session Configuration
+
+Configure a browser session with specific settings when opening:
+
+```bash
+# Open with config file
+playwright-cli open https://example.com --config=.playwright/my-cli.json
+
+# Open with specific browser
+playwright-cli open https://example.com --browser=firefox
+
+# Open in headed mode
+playwright-cli open https://example.com --headed
+
+# Open with persistent profile
+playwright-cli open https://example.com --persistent
+```
+
+## Best Practices
+
+### 1. Name Browser Sessions Semantically
+
+```bash
+# GOOD: Clear purpose
+playwright-cli -s=github-auth open https://github.com
+playwright-cli -s=docs-scrape open https://docs.example.com
+
+# AVOID: Generic names
+playwright-cli -s=s1 open https://github.com
+```
+
+### 2. Always Clean Up
+
+```bash
+# Stop browsers when done
+playwright-cli -s=auth close
+playwright-cli -s=scrape close
+
+# Or stop all at once
+playwright-cli close-all
+
+# If browsers become unresponsive or zombie processes remain
+playwright-cli kill-all
+```
+
+### 3. Delete Stale Browser Data
+
+```bash
+# Remove old browser data to free disk space
+playwright-cli -s=oldsession delete-data
+```
diff --git a/.claude/skills/playwright-cli/references/storage-state.md b/.claude/skills/playwright-cli/references/storage-state.md
new file mode 100644
index 0000000..c856db5
--- /dev/null
+++ b/.claude/skills/playwright-cli/references/storage-state.md
@@ -0,0 +1,275 @@
+# Storage Management
+
+Manage cookies, localStorage, sessionStorage, and browser storage state.
+
+## Storage State
+
+Save and restore complete browser state including cookies and storage.
+
+### Save Storage State
+
+```bash
+# Save to auto-generated filename (storage-state-{timestamp}.json)
+playwright-cli state-save
+
+# Save to specific filename
+playwright-cli state-save my-auth-state.json
+```
+
+### Restore Storage State
+
+```bash
+# Load storage state from file
+playwright-cli state-load my-auth-state.json
+
+# Reload page to apply cookies
+playwright-cli open https://example.com
+```
+
+### Storage State File Format
+
+The saved file contains:
+
+```json
+{
+  "cookies": [
+    {
+      "name": "session_id",
+      "value": "abc123",
+      "domain": "example.com",
+      "path": "/",
+      "expires": 1735689600,
+      "httpOnly": true,
+      "secure": true,
+      "sameSite": "Lax"
+    }
+  ],
+  "origins": [
+    {
+      "origin": "https://example.com",
+      "localStorage": [
+        { "name": "theme", "value": "dark" },
+        { "name": "user_id", "value": "12345" }
+      ]
+    }
+  ]
+}
+```
+
+## Cookies
+
+### List All Cookies
+
+```bash
+playwright-cli cookie-list
+```
+
+### Filter Cookies by Domain
+
+```bash
+playwright-cli cookie-list --domain=example.com
+```
+
+### Filter Cookies by Path
+
+```bash
+playwright-cli cookie-list --path=/api
+```
+
+### Get Specific Cookie
+
+```bash
+playwright-cli cookie-get session_id
+```
+
+### Set a Cookie
+
+```bash
+# Basic cookie
+playwright-cli cookie-set session abc123
+
+# Cookie with options
+playwright-cli cookie-set session abc123 --domain=example.com --path=/ --httpOnly --secure --sameSite=Lax
+
+# Cookie with expiration (Unix timestamp)
+playwright-cli cookie-set remember_me token123 --expires=1735689600
+```
+
+### Delete a Cookie
+
+```bash
+playwright-cli cookie-delete session_id
+```
+
+### Clear All Cookies
+
+```bash
+playwright-cli cookie-clear
+```
+
+### Advanced: Multiple Cookies or Custom Options
+
+For complex scenarios like adding multiple cookies at once, use `run-code`:
+
+```bash
+playwright-cli run-code "async page => {
+  await page.context().addCookies([
+    { name: 'session_id', value: 'sess_abc123', domain: 'example.com', path: '/', httpOnly: true },
+    { name: 'preferences', value: JSON.stringify({ theme: 'dark' }), domain: 'example.com', path: '/' }
+  ]);
+}"
+```
+
+## Local Storage
+
+### List All localStorage Items
+
+```bash
+playwright-cli localstorage-list
+```
+
+### Get Single Value
+
+```bash
+playwright-cli localstorage-get token
+```
+
+### Set Value
+
+```bash
+playwright-cli localstorage-set theme dark
+```
+
+### Set JSON Value
+
+```bash
+playwright-cli localstorage-set user_settings '{"theme":"dark","language":"en"}'
+```
+
+### Delete Single Item
+
+```bash
+playwright-cli localstorage-delete token
+```
+
+### Clear All localStorage
+
+```bash
+playwright-cli localstorage-clear
+```
+
+### Advanced: Multiple Operations
+
+For complex scenarios like setting multiple values at once, use `run-code`:
+
+```bash
+playwright-cli run-code "async page => {
+  await page.evaluate(() => {
+    localStorage.setItem('token', 'jwt_abc123');
+    localStorage.setItem('user_id', '12345');
+    localStorage.setItem('expires_at', Date.now() + 3600000);
+  });
+}"
+```
+
+## Session Storage
+
+### List All sessionStorage Items
+
+```bash
+playwright-cli sessionstorage-list
+```
+
+### Get Single Value
+
+```bash
+playwright-cli sessionstorage-get form_data
+```
+
+### Set Value
+
+```bash
+playwright-cli sessionstorage-set step 3
+```
+
+### Delete Single Item
+
+```bash
+playwright-cli sessionstorage-delete step
+```
+
+### Clear sessionStorage
+
+```bash
+playwright-cli sessionstorage-clear
+```
+
+## IndexedDB
+
+### List Databases
+
+```bash
+playwright-cli run-code "async page => {
+  return await page.evaluate(async () => {
+    const databases = await indexedDB.databases();
+    return databases;
+  });
+}"
+```
+
+### Delete Database
+
+```bash
+playwright-cli run-code "async page => {
+  await page.evaluate(() => {
+    indexedDB.deleteDatabase('myDatabase');
+  });
+}"
+```
+
+## Common Patterns
+
+### Authentication State Reuse
+
+```bash
+# Step 1: Login and save state
+playwright-cli open https://app.example.com/login
+playwright-cli snapshot
+playwright-cli fill e1 "user@example.com"
+playwright-cli fill e2 "password123"
+playwright-cli click e3
+
+# Save the authenticated state
+playwright-cli state-save auth.json
+
+# Step 2: Later, restore state and skip login
+playwright-cli state-load auth.json
+playwright-cli open https://app.example.com/dashboard
+# Already logged in!
+```
+
+### Save and Restore Roundtrip
+
+```bash
+# Set up authentication state
+playwright-cli open https://example.com
+playwright-cli eval "() => { document.cookie = 'session=abc123'; localStorage.setItem('user', 'john'); }"
+
+# Save state to file
+playwright-cli state-save my-session.json
+
+# ... later, in a new session ...
+
+# Restore state
+playwright-cli state-load my-session.json
+playwright-cli open https://example.com
+# Cookies and localStorage are restored!
+```
+
+## Security Notes
+
+- Never commit storage state files containing auth tokens
+- Add `*.auth-state.json` to `.gitignore`
+- Delete state files after automation completes
+- Use environment variables for sensitive data
+- By default, sessions run in-memory mode which is safer for sensitive operations
diff --git a/.claude/skills/playwright-cli/references/test-generation.md b/.claude/skills/playwright-cli/references/test-generation.md
new file mode 100644
index 0000000..7a09df3
--- /dev/null
+++ b/.claude/skills/playwright-cli/references/test-generation.md
@@ -0,0 +1,88 @@
+# Test Generation
+
+Generate Playwright test code automatically as you interact with the browser.
+
+## How It Works
+
+Every action you perform with `playwright-cli` generates corresponding Playwright TypeScript code.
+This code appears in the output and can be copied directly into your test files.
+
+## Example Workflow
+
+```bash
+# Start a session
+playwright-cli open https://example.com/login
+
+# Take a snapshot to see elements
+playwright-cli snapshot
+# Output shows: e1 [textbox "Email"], e2 [textbox "Password"], e3 [button "Sign In"]
+
+# Fill form fields - generates code automatically
+playwright-cli fill e1 "user@example.com"
+# Ran Playwright code:
+# await page.getByRole('textbox', { name: 'Email' }).fill('user@example.com');
+
+playwright-cli fill e2 "password123"
+# Ran Playwright code:
+# await page.getByRole('textbox', { name: 'Password' }).fill('password123');
+
+playwright-cli click e3
+# Ran Playwright code:
+# await page.getByRole('button', { name: 'Sign In' }).click();
+```
+
+## Building a Test File
+
+Collect the generated code into a Playwright test:
+
+```typescript
+import { test, expect } from '@playwright/test';
+
+test('login flow', async ({ page }) => {
+  // Generated code from playwright-cli session:
+  await page.goto('https://example.com/login');
+  await page.getByRole('textbox', { name: 'Email' }).fill('user@example.com');
+  await page.getByRole('textbox', { name: 'Password' }).fill('password123');
+  await page.getByRole('button', { name: 'Sign In' }).click();
+
+  // Add assertions
+  await expect(page).toHaveURL(/.*dashboard/);
+});
+```
+
+## Best Practices
+
+### 1. Use Semantic Locators
+
+The generated code uses role-based locators when possible, which are more resilient:
+
+```typescript
+// Generated (good - semantic)
+await page.getByRole('button', { name: 'Submit' }).click();
+
+// Avoid (fragile - CSS selectors)
+await page.locator('#submit-btn').click();
+```
+
+### 2. Explore Before Recording
+
+Take snapshots to understand the page structure before recording actions:
+
+```bash
+playwright-cli open https://example.com
+playwright-cli snapshot
+# Review the element structure
+playwright-cli click e5
+```
+
+### 3. Add Assertions Manually
+
+Generated code captures actions but not assertions. Add expectations in your test:
+
+```typescript
+// Generated action
+await page.getByRole('button', { name: 'Submit' }).click();
+
+// Manual assertion
+await expect(page.getByText('Success')).toBeVisible();
+```
diff --git a/.claude/skills/playwright-cli/references/tracing.md b/.claude/skills/playwright-cli/references/tracing.md
new file mode 100644
index 0000000..7ce7bab
--- /dev/null
+++ b/.claude/skills/playwright-cli/references/tracing.md
@@ -0,0 +1,139 @@
+# Tracing
+
+Capture detailed execution traces for debugging and analysis. Traces include DOM snapshots, screenshots, network activity, and console logs.
+
+## Basic Usage
+
+```bash
+# Start trace recording
+playwright-cli tracing-start
+
+# Perform actions
+playwright-cli open https://example.com
+playwright-cli click e1
+playwright-cli fill e2 "test"
+
+# Stop trace recording
+playwright-cli tracing-stop
+```
+
+## Trace Output Files
+
+When you start tracing, Playwright creates a `traces/` directory with several files:
+
+### `trace-{timestamp}.trace`
+
+**Action log** - The main trace file containing:
+- Every action performed (clicks, fills, navigations)
+- DOM snapshots before and after each action
+- Screenshots at each step
+- Timing information
+- Console messages
+- Source locations
+
+### `trace-{timestamp}.network`
+
+**Network log** - Complete network activity:
+- All HTTP requests and responses
+- Request headers and bodies
+- Response headers and bodies
+- Timing (DNS, connect, TLS, TTFB, download)
+- Resource sizes
+- Failed requests and errors
+
+### `resources/`
+
+**Resources directory** - Cached resources:
+- Images, fonts, stylesheets, scripts
+- Response bodies for replay
+- Assets needed to reconstruct page state
+
+## What Traces Capture
+
+| Category | Details |
+|----------|---------|
+| **Actions** | Clicks, fills, hovers, keyboard input, navigations |
+| **DOM** | Full DOM snapshot before/after each action |
+| **Screenshots** | Visual state at each step |
+| **Network** | All requests, responses, headers, bodies, timing |
+| **Console** | All console.log, warn, error messages |
+| **Timing** | Precise timing for each operation |
+
+## Use Cases
+
+### Debugging Failed Actions
+
+```bash
+playwright-cli tracing-start
+playwright-cli open https://app.example.com
+
+# This click fails - why?
+playwright-cli click e5
+
+playwright-cli tracing-stop
+# Open trace to see DOM state when click was attempted
+```
+
+### Analyzing Performance
+
+```bash
+playwright-cli tracing-start
+playwright-cli open https://slow-site.com
+playwright-cli tracing-stop
+
+# View network waterfall to identify slow resources
+```
+
+### Capturing Evidence
+
+```bash
+# Record a complete user flow for documentation
+playwright-cli tracing-start
+
+playwright-cli open https://app.example.com/checkout
+playwright-cli fill e1 "4111111111111111"
+playwright-cli fill e2 "12/25"
+playwright-cli fill e3 "123"
+playwright-cli click e4
+
+playwright-cli tracing-stop
+# Trace shows exact sequence of events
+```
+
+## Trace vs Video vs Screenshot
+
+| Feature | Trace | Video | Screenshot |
+|---------|-------|-------|------------|
+| **Format** | .trace file | .webm video | .png/.jpeg image |
+| **DOM inspection** | Yes | No | No |
+| **Network details** | Yes | No | No |
+| **Step-by-step replay** | Yes | Continuous | Single frame |
+| **File size** | Medium | Large | Small |
+| **Best for** | Debugging | Demos | Quick capture |
+
+## Best Practices
+
+### 1. Start Tracing Before the Problem
+
+```bash
+# Trace the entire flow, not just the failing step
+playwright-cli tracing-start
+playwright-cli open https://example.com
+# ... all steps leading to the issue ...
+playwright-cli tracing-stop
+```
+
+### 2. Clean Up Old Traces
+
+Traces can consume significant disk space:
+
+```bash
+# Remove traces older than 7 days
+find .playwright-cli/traces -mtime +7 -delete
+```
+
+## Limitations
+
+- Traces add overhead to automation
+- Large traces can consume significant disk space
+- Some dynamic content may not replay perfectly
diff --git a/.claude/skills/playwright-cli/references/video-recording.md b/.claude/skills/playwright-cli/references/video-recording.md
new file mode 100644
index 0000000..38391b3
--- /dev/null
+++ b/.claude/skills/playwright-cli/references/video-recording.md
@@ -0,0 +1,43 @@
+# Video Recording
+
+Capture browser automation sessions as video for debugging, documentation, or verification. Produces WebM (VP8/VP9 codec).
+
+## Basic Recording
+
+```bash
+# Start recording
+playwright-cli video-start
+
+# Perform actions
+playwright-cli open https://example.com
+playwright-cli snapshot
+playwright-cli click e1
+playwright-cli fill e2 "test input"
+
+# Stop and save
+playwright-cli video-stop demo.webm
+```
+
+## Best Practices
+
+### 1. Use Descriptive Filenames
+
+```bash
+# Include context in filename
+playwright-cli video-stop recordings/login-flow-2024-01-15.webm
+playwright-cli video-stop recordings/checkout-test-run-42.webm
+```
+
+## Tracing vs Video
+
+| Feature | Video | Tracing |
+|---------|-------|---------|
+| Output | WebM file | Trace file (viewable in Trace Viewer) |
+| Shows | Visual recording | DOM snapshots, network, console, actions |
+| Use case | Demos, documentation | Debugging, analysis |
+| Size | Larger | Smaller |
+
+## Limitations
+
+- Recording adds slight overhead to automation
+- Large recordings can consume significant disk space

From e9873a2642ebd1967aecc4e59da55d1f44faf7c8 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Wed, 11 Feb 2026 13:37:03 +0200
Subject: [PATCH 07/14] feat: migrate browser automation from Playwright MCP to
 CLI, fix headless setting

Major changes across 21 files (755 additions, 196 deletions):

Browser Automation Migration:
- Add versioned project migration system (prompts.py) with content-based
  detection and section-level regex replacement for coding/testing prompts
- Migrate STEP 5 (browser verification) and BROWSER AUTOMATION sections
  in coding prompt template to use playwright-cli commands
- Migrate STEP 2 and AVAILABLE TOOLS sections in testing prompt template
- Migration auto-runs at agent startup (autonomous_agent_demo.py), copies
  playwright-cli skill, scaffolds .playwright/cli.config.json, updates
  .gitignore, and stamps .migration_version file
- Add playwright-cli command validation to security allowlist (security.py)
  with tests for allowed subcommands and blocked eval/run-code

Headless Browser Setting Fix:
- Add _apply_playwright_headless() to process_manager.py that reads/updates
  .playwright/cli.config.json before agent subprocess launch
- Remove dead PLAYWRIGHT_HEADLESS env var that was never consumed
- Settings UI toggle now correctly controls visible browser window

Playwright CLI Auto-Install:
- Add ensurePlaywrightCli() to lib/cli.js for npm global entry point
- Add playwright-cli detection + npm install to start.bat, start.sh,
  start_ui.bat, start_ui.sh for all startup paths

Other Improvements:
- Add project folder path tooltip to ProjectSelector.tsx dropdown items
- Remove legacy Playwright MCP server configuration from client.py
- Update CLAUDE.md with playwright-cli skill documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .claude/templates/coding_prompt.template.md  |  43 +-
 .claude/templates/testing_prompt.template.md |  58 +--
 .gitignore                                   |   4 +
 .npmignore                                   |   1 -
 CLAUDE.md                                    |  10 +-
 agent.py                                     |  12 +-
 autoforge_paths.py                           |   1 +
 autonomous_agent_demo.py                     |   6 +
 client.py                                    | 131 +-----
 lib/cli.js                                   |  43 ++
 package.json                                 |   1 +
 prompts.py                                   | 395 ++++++++++++++++++-
 security.py                                  |  39 +-
 server/services/process_manager.py           |  46 ++-
 start.bat                                    |  10 +
 start.sh                                     |   9 +
 start_ui.bat                                 |  10 +
 start_ui.sh                                  |   9 +
 temp_cleanup.py                              |  40 +-
 test_security.py                             |  79 ++++
 ui/src/components/ProjectSelector.tsx        |   2 +
 21 files changed, 754 insertions(+), 195 deletions(-)

diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md
index 65243d8..832eb59 100644
--- a/.claude/templates/coding_prompt.template.md
+++ b/.claude/templates/coding_prompt.template.md
@@ -86,24 +86,33 @@ Implement the chosen feature thoroughly:
 
 **CRITICAL:** You MUST verify features through the actual UI.
 
-Use browser automation tools:
+Use `playwright-cli` for browser automation:
 
-- Navigate to the app in a real browser
-- Interact like a human user (click, type, scroll)
-- Take screenshots at each step (use inline screenshots only -- do NOT save screenshot files to disk)
-- Verify both functionality AND visual appearance
+- Open the browser: `playwright-cli open http://localhost:PORT`
+- Take a snapshot to see page elements: `playwright-cli snapshot`
+- Read the snapshot YAML file to see element refs
+- Click elements by ref: `playwright-cli click e5`
+- Type text: `playwright-cli type "search query"`
+- Fill form fields: `playwright-cli fill e3 "value"`
+- Take screenshots: `playwright-cli screenshot`
+- Read the screenshot file to verify visual appearance
+- Check console errors: `playwright-cli console`
+- Close browser when done: `playwright-cli close`
+
+**Token-efficient workflow:** `playwright-cli screenshot` and `snapshot` save files
+to `.playwright-cli/`. You will see a file link in the output. Read the file only
+when you need to verify visual appearance or find element refs.
 
 **DO:**
-
 - Test through the UI with clicks and keyboard input
-- Take screenshots to verify visual appearance (inline only, never save to disk)
-- Check for console errors in browser
+- Take screenshots and read them to verify visual appearance
+- Check for console errors with `playwright-cli console`
 - Verify complete user workflows end-to-end
+- Always run `playwright-cli close` when finished testing
 
 **DON'T:**
-
-- Only test with curl commands (backend testing alone is insufficient)
-- Use JavaScript evaluation to bypass UI (no shortcuts)
+- Only test with curl commands
+- Use JavaScript evaluation to bypass UI (`eval` and `run-code` are blocked)
 - Skip visual verification
 - Mark tests passing without thorough verification
 
@@ -145,7 +154,7 @@ Use the feature_mark_passing tool with feature_id=42
 - Combine or consolidate features
 - Reorder features
 
-**ONLY MARK A FEATURE AS PASSING AFTER VERIFICATION WITH SCREENSHOTS.**
+**ONLY MARK A FEATURE AS PASSING AFTER VERIFICATION WITH BROWSER AUTOMATION.**
 
 ### STEP 7: COMMIT YOUR PROGRESS
 
@@ -192,11 +201,15 @@ Before context fills up:
 
 ## BROWSER AUTOMATION
 
-Use Playwright MCP tools (`browser_*`) for UI verification. Key tools: `navigate`, `click`, `type`, `fill_form`, `take_screenshot`, `console_messages`, `network_requests`. All tools have auto-wait built in.
+Use `playwright-cli` commands for UI verification. Key commands: `open`, `goto`,
+`snapshot`, `click`, `type`, `fill`, `screenshot`, `console`, `close`.
 
-**Screenshot rule:** Always use inline mode (base64). NEVER save screenshots as files to disk.
+**How it works:** `playwright-cli` uses a persistent browser daemon. `open` starts it,
+subsequent commands interact via socket, `close` shuts it down. Screenshots and snapshots
+save to `.playwright-cli/` -- read the files when you need to verify content.
 
-Test like a human user with mouse and keyboard. Use `browser_console_messages` to detect errors. Don't bypass UI with JavaScript evaluation.
+Test like a human user with mouse and keyboard. Use `playwright-cli console` to detect
+JS errors. Don't bypass UI with JavaScript evaluation.
 
 ---
 
diff --git a/.claude/templates/testing_prompt.template.md b/.claude/templates/testing_prompt.template.md
index 3714d47..ee6a08f 100644
--- a/.claude/templates/testing_prompt.template.md
+++ b/.claude/templates/testing_prompt.template.md
@@ -31,26 +31,32 @@ For the feature returned:
 1. Read and understand the feature's verification steps
 2. Navigate to the relevant part of the application
 3. Execute each verification step using browser automation
-4. Take screenshots to document the verification (inline only -- do NOT save to disk)
+4. Take screenshots and read them to verify visual appearance
 5. Check for console errors
 
-Use browser automation tools:
+### Browser Automation (Playwright CLI)
 
 **Navigation & Screenshots:**
-- browser_navigate - Navigate to a URL
-- browser_take_screenshot - Capture screenshot (inline mode only -- never save to disk)
-- browser_snapshot - Get accessibility tree snapshot
+- `playwright-cli open <url>` - Open browser and navigate
+- `playwright-cli goto <url>` - Navigate to URL
+- `playwright-cli screenshot` - Save screenshot to `.playwright-cli/`
+- `playwright-cli snapshot` - Save page snapshot with element refs to `.playwright-cli/`
 
 **Element Interaction:**
-- browser_click - Click elements
-- browser_type - Type text into editable elements
-- browser_fill_form - Fill multiple form fields
-- browser_select_option - Select dropdown options
-- browser_press_key - Press keyboard keys
+- `playwright-cli click <ref>` - Click elements (ref from snapshot)
+- `playwright-cli type <text>` - Type text
+- `playwright-cli fill <ref> <text>` - Fill form fields
+- `playwright-cli select <ref> <val>` - Select dropdown
+- `playwright-cli press <key>` - Keyboard input
 
 **Debugging:**
-- browser_console_messages - Get browser console output (check for errors)
-- browser_network_requests - Monitor API calls
+- `playwright-cli console` - Check for JS errors
+- `playwright-cli network` - Monitor API calls
+
+**Cleanup:**
+- `playwright-cli close` - Close browser when done (ALWAYS do this)
+
+**Note:** Screenshots and snapshots save to files. Read the file to see the content.
 
 ### STEP 3: HANDLE RESULTS
 
@@ -79,7 +85,7 @@ A regression has been introduced. You MUST fix it:
 
 4. **Verify the fix:**
    - Run through all verification steps again
-   - Take screenshots confirming the fix (inline only, never save to disk)
+   - Take screenshots and read them to confirm the fix
 
 5. **Mark as passing after fix:**
    ```
@@ -98,7 +104,7 @@ A regression has been introduced. You MUST fix it:
 
 ---
 
-## AVAILABLE MCP TOOLS
+## AVAILABLE TOOLS
 
 ### Feature Management
 - `feature_get_stats` - Get progress overview (passing/in_progress/total counts)
@@ -106,19 +112,17 @@ A regression has been introduced. You MUST fix it:
 - `feature_mark_failing` - Mark a feature as failing (when you find a regression)
 - `feature_mark_passing` - Mark a feature as passing (after fixing a regression)
 
-### Browser Automation (Playwright)
-All interaction tools have **built-in auto-wait** -- no manual timeouts needed.
-
-- `browser_navigate` - Navigate to URL
-- `browser_take_screenshot` - Capture screenshot (inline only, never save to disk)
-- `browser_snapshot` - Get accessibility tree
-- `browser_click` - Click elements
-- `browser_type` - Type text
-- `browser_fill_form` - Fill form fields
-- `browser_select_option` - Select dropdown
-- `browser_press_key` - Keyboard input
-- `browser_console_messages` - Check for JS errors
-- `browser_network_requests` - Monitor API calls
+### Browser Automation (Playwright CLI)
+Use `playwright-cli` commands for browser interaction. Key commands:
+- `playwright-cli open <url>` - Open browser
+- `playwright-cli goto <url>` - Navigate to URL
+- `playwright-cli screenshot` - Take screenshot (saved to `.playwright-cli/`)
+- `playwright-cli snapshot` - Get page snapshot with element refs
+- `playwright-cli click <ref>` - Click element
+- `playwright-cli type <text>` - Type text
+- `playwright-cli fill <ref> <text>` - Fill form field
+- `playwright-cli console` - Check for JS errors
+- `playwright-cli close` - Close browser (always do this when done)
 
 ---
 
diff --git a/.gitignore b/.gitignore
index 6a01793..d63e64e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,10 @@ issues/
 # Browser profiles for parallel agent execution
 .browser-profiles/
 
+# Playwright CLI daemon artifacts
+.playwright-cli/
+.playwright/
+
 # Log files
 logs/
 *.log
diff --git a/.npmignore b/.npmignore
index 9c4ada3..6bf112b 100644
--- a/.npmignore
+++ b/.npmignore
@@ -28,5 +28,4 @@ start.sh
 start_ui.sh
 start_ui.py
 .claude/agents/
-.claude/skills/
 .claude/settings.json
diff --git a/CLAUDE.md b/CLAUDE.md
index e0f9ea3..8665260 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -85,7 +85,7 @@ python autonomous_agent_demo.py --project-dir my-app --yolo
 
 **What's different in YOLO mode:**
 - No regression testing
-- No Playwright MCP server (browser automation disabled)
+- No Playwright CLI (browser automation disabled)
 - Features marked passing after lint/type-check succeeds
 - Faster iteration for prototyping
 
@@ -163,7 +163,7 @@ Publishing: `npm publish` (triggers `prepublishOnly` which builds UI, then publi
 - `autonomous_agent_demo.py` - Entry point for running the agent (supports `--yolo`, `--parallel`, `--batch-size`, `--batch-features`)
 - `autoforge_paths.py` - Central path resolution with dual-path backward compatibility and migration
 - `agent.py` - Agent session loop using Claude Agent SDK
-- `client.py` - ClaudeSDKClient configuration with security hooks, MCP servers, and Vertex AI support
+- `client.py` - ClaudeSDKClient configuration with security hooks, feature MCP server, and Vertex AI support
 - `security.py` - Bash command allowlist validation (ALLOWED_COMMANDS whitelist)
 - `prompts.py` - Prompt template loading with project-specific fallback and batch feature prompts
 - `progress.py` - Progress tracking, database queries, webhook notifications
@@ -288,6 +288,9 @@ Projects can be stored in any directory (registered in `~/.autoforge/registry.db
 - `.autoforge/.agent.lock` - Lock file to prevent multiple agent instances
 - `.autoforge/allowed_commands.yaml` - Project-specific bash command allowlist (optional)
 - `.autoforge/.gitignore` - Ignores runtime files
+- `.claude/skills/playwright-cli/` - Playwright CLI skill for browser automation
+- `.playwright/cli.config.json` - Browser configuration (headless, viewport, etc.)
+- `.playwright-cli/` - Playwright CLI daemon artifacts (screenshots, snapshots) - gitignored
 - `CLAUDE.md` - Stays at project root (SDK convention)
 - `app_spec.txt` - Root copy for agent template compatibility
 
@@ -445,6 +448,7 @@ Alternative providers are configured via the **Settings UI** (gear icon > API Pr
 **Skills** (`.claude/skills/`):
 - `frontend-design` - Distinctive, production-grade UI design
 - `gsd-to-autoforge-spec` - Convert GSD codebase mapping to AutoForge app_spec format
+- `playwright-cli` - Browser automation via Playwright CLI (copied to each project)
 
 **Other:**
 - `.claude/templates/` - Prompt templates copied to new projects
@@ -479,7 +483,7 @@ When running with `--parallel`, the orchestrator:
 1. Spawns multiple Claude agents as subprocesses (up to `--max-concurrency`)
 2. Each agent claims features atomically via `feature_claim_and_get`
 3. Features blocked by unmet dependencies are skipped
-4. Browser contexts are isolated per agent using `--isolated` flag
+4. Browser sessions are isolated per agent via `PLAYWRIGHT_CLI_SESSION` environment variable
 5. AgentTracker parses output and emits `agent_update` messages for UI
 
 ### Process Limits (Parallel Mode)
diff --git a/agent.py b/agent.py
index a3daaf8..e837628 100644
--- a/agent.py
+++ b/agent.py
@@ -240,17 +240,7 @@ async def run_autonomous_agent(
         print_session_header(iteration, is_initializer)
 
         # Create client (fresh context)
-        # Pass agent_id for browser isolation in multi-agent scenarios
-        import os
-        if agent_type == "testing":
-            agent_id = f"testing-{os.getpid()}"  # Unique ID for testing agents
-        elif feature_ids and len(feature_ids) > 1:
-            agent_id = f"batch-{feature_ids[0]}"
-        elif feature_id:
-            agent_id = f"feature-{feature_id}"
-        else:
-            agent_id = None
-        client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_id=agent_id, agent_type=agent_type)
+        client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_type=agent_type)
 
         # Choose prompt based on agent type
         if agent_type == "initializer":
diff --git a/autoforge_paths.py b/autoforge_paths.py
index 8283a9b..076d0be 100644
--- a/autoforge_paths.py
+++ b/autoforge_paths.py
@@ -43,6 +43,7 @@ assistant.db-shm
 .claude_assistant_settings.json
 .claude_settings.expand.*.json
 .progress_cache
+.migration_version
 """
 
 
diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py
index 918b2c1..f24908f 100644
--- a/autonomous_agent_demo.py
+++ b/autonomous_agent_demo.py
@@ -237,6 +237,12 @@ def main() -> None:
     if migrated:
         print(f"Migrated project files to .autoforge/: {', '.join(migrated)}", flush=True)
 
+    # Migrate project to current AutoForge version (idempotent, safe)
+    from prompts import migrate_project_to_current
+    version_migrated = migrate_project_to_current(project_dir)
+    if version_migrated:
+        print(f"Upgraded project: {', '.join(version_migrated)}", flush=True)
+
     # Parse batch testing feature IDs (comma-separated string -> list[int])
     testing_feature_ids: list[int] | None = None
     if args.testing_feature_ids:
diff --git a/client.py b/client.py
index 4d06816..e752cad 100644
--- a/client.py
+++ b/client.py
@@ -21,16 +21,6 @@ from security import SENSITIVE_DIRECTORIES, bash_security_hook
 # Load environment variables from .env file if present
 load_dotenv()
 
-# Default Playwright headless mode - can be overridden via PLAYWRIGHT_HEADLESS env var
-# When True, browser runs invisibly in background (default - saves CPU)
-# When False, browser window is visible (useful for monitoring agent progress)
-DEFAULT_PLAYWRIGHT_HEADLESS = True
-
-# Default browser for Playwright - can be overridden via PLAYWRIGHT_BROWSER env var
-# Options: chrome, firefox, webkit, msedge
-# Firefox is recommended for lower CPU usage
-DEFAULT_PLAYWRIGHT_BROWSER = "firefox"
-
 # Extra read paths for cross-project file access (read-only)
 # Set EXTRA_READ_PATHS environment variable with comma-separated absolute paths
 # Example: EXTRA_READ_PATHS=/Volumes/Data/dev,/Users/shared/libs
@@ -41,6 +31,7 @@ EXTRA_READ_PATHS_VAR = "EXTRA_READ_PATHS"
 # this blocklist and the filesystem browser API share a single source of truth.
 EXTRA_READ_PATHS_BLOCKLIST = SENSITIVE_DIRECTORIES
 
+
 def convert_model_for_vertex(model: str) -> str:
     """
     Convert model name format for Vertex AI compatibility.
@@ -72,43 +63,6 @@ def convert_model_for_vertex(model: str) -> str:
     return model
 
 
-def get_playwright_headless() -> bool:
-    """
-    Get the Playwright headless mode setting.
-
-    Reads from PLAYWRIGHT_HEADLESS environment variable, defaults to True.
-    Returns True for headless mode (invisible browser), False for visible browser.
-    """
-    value = os.getenv("PLAYWRIGHT_HEADLESS", str(DEFAULT_PLAYWRIGHT_HEADLESS).lower()).strip().lower()
-    truthy = {"true", "1", "yes", "on"}
-    falsy = {"false", "0", "no", "off"}
-    if value not in truthy | falsy:
-        print(f"   - Warning: Invalid PLAYWRIGHT_HEADLESS='{value}', defaulting to {DEFAULT_PLAYWRIGHT_HEADLESS}")
-        return DEFAULT_PLAYWRIGHT_HEADLESS
-    return value in truthy
-
-
-# Valid browsers supported by Playwright MCP
-VALID_PLAYWRIGHT_BROWSERS = {"chrome", "firefox", "webkit", "msedge"}
-
-
-def get_playwright_browser() -> str:
-    """
-    Get the browser to use for Playwright.
-
-    Reads from PLAYWRIGHT_BROWSER environment variable, defaults to firefox.
-    Options: chrome, firefox, webkit, msedge
-    Firefox is recommended for lower CPU usage.
-    """
-    value = os.getenv("PLAYWRIGHT_BROWSER", DEFAULT_PLAYWRIGHT_BROWSER).strip().lower()
-    if value not in VALID_PLAYWRIGHT_BROWSERS:
-        print(f"   - Warning: Invalid PLAYWRIGHT_BROWSER='{value}', "
-              f"valid options: {', '.join(sorted(VALID_PLAYWRIGHT_BROWSERS))}. "
-              f"Defaulting to {DEFAULT_PLAYWRIGHT_BROWSER}")
-        return DEFAULT_PLAYWRIGHT_BROWSER
-    return value
-
-
 def get_extra_read_paths() -> list[Path]:
     """
     Get extra read-only paths from EXTRA_READ_PATHS environment variable.
@@ -228,41 +182,6 @@ ALL_FEATURE_MCP_TOOLS = sorted(
     set(CODING_AGENT_TOOLS) | set(TESTING_AGENT_TOOLS) | set(INITIALIZER_AGENT_TOOLS)
 )
 
-# Playwright MCP tools for browser automation.
-# Full set of tools for comprehensive UI testing including drag-and-drop,
-# hover menus, file uploads, tab management, etc.
-PLAYWRIGHT_TOOLS = [
-    # Core navigation & screenshots
-    "mcp__playwright__browser_navigate",
-    "mcp__playwright__browser_navigate_back",
-    "mcp__playwright__browser_take_screenshot",
-    "mcp__playwright__browser_snapshot",
-
-    # Element interaction
-    "mcp__playwright__browser_click",
-    "mcp__playwright__browser_type",
-    "mcp__playwright__browser_fill_form",
-    "mcp__playwright__browser_select_option",
-    "mcp__playwright__browser_press_key",
-    "mcp__playwright__browser_drag",
-    "mcp__playwright__browser_hover",
-    "mcp__playwright__browser_file_upload",
-
-    # JavaScript & debugging
-    "mcp__playwright__browser_evaluate",
-    # "mcp__playwright__browser_run_code",  # REMOVED - causes Playwright MCP server crash
-    "mcp__playwright__browser_console_messages",
-    "mcp__playwright__browser_network_requests",
-
-    # Browser management
-    "mcp__playwright__browser_resize",
-    "mcp__playwright__browser_wait_for",
-    "mcp__playwright__browser_handle_dialog",
-    "mcp__playwright__browser_install",
-    "mcp__playwright__browser_close",
-    "mcp__playwright__browser_tabs",
-]
-
 # Built-in tools available to agents.
 # WebFetch and WebSearch are included so coding agents can look up current
 # documentation for frameworks and libraries they are implementing.
@@ -282,7 +201,6 @@ def create_client(
     project_dir: Path,
     model: str,
     yolo_mode: bool = False,
-    agent_id: str | None = None,
     agent_type: str = "coding",
 ):
     """
@@ -291,9 +209,7 @@ def create_client(
     Args:
         project_dir: Directory for the project
         model: Claude model to use
-        yolo_mode: If True, skip Playwright MCP server for rapid prototyping
-        agent_id: Optional unique identifier for browser isolation in parallel mode.
-                  When provided, each agent gets its own browser profile.
+        yolo_mode: If True, skip browser testing for rapid prototyping
         agent_type: One of "coding", "testing", or "initializer". Controls which
                     MCP tools are exposed and the max_turns limit.
 
@@ -327,11 +243,8 @@ def create_client(
     }
     max_turns = max_turns_map.get(agent_type, 300)
 
-    # Build allowed tools list based on mode and agent type.
-    # In YOLO mode, exclude Playwright tools for faster prototyping.
+    # Build allowed tools list based on agent type.
     allowed_tools = [*BUILTIN_TOOLS, *feature_tools]
-    if not yolo_mode:
-        allowed_tools.extend(PLAYWRIGHT_TOOLS)
 
     # Build permissions list.
     # We permit ALL feature MCP tools at the security layer (so the MCP server
@@ -363,10 +276,6 @@ def create_client(
         permissions_list.append(f"Glob({path}/**)")
         permissions_list.append(f"Grep({path}/**)")
 
-    if not yolo_mode:
-        # Allow Playwright MCP tools for browser automation (standard mode only)
-        permissions_list.extend(PLAYWRIGHT_TOOLS)
-
     # Create comprehensive security settings
     # Note: Using relative paths ("./**") restricts access to project directory
     # since cwd is set to project_dir
@@ -395,9 +304,9 @@ def create_client(
         print(f"   - Extra read paths (validated): {', '.join(str(p) for p in extra_read_paths)}")
     print("   - Bash commands restricted to allowlist (see security.py)")
     if yolo_mode:
-        print("   - MCP servers: features (database) - YOLO MODE (no Playwright)")
+        print("   - MCP servers: features (database) - YOLO MODE (no browser testing)")
     else:
-        print("   - MCP servers: playwright (browser), features (database)")
+        print("   - MCP servers: features (database)")
     print("   - Project settings enabled (skills, commands, CLAUDE.md)")
     print()
 
@@ -421,36 +330,6 @@ def create_client(
             },
         },
     }
-    if not yolo_mode:
-        # Include Playwright MCP server for browser automation (standard mode only)
-        # Browser and headless mode configurable via environment variables
-        browser = get_playwright_browser()
-        playwright_args = [
-            "@playwright/mcp@latest",
-            "--viewport-size", "1280x720",
-            "--browser", browser,
-        ]
-        if get_playwright_headless():
-            playwright_args.append("--headless")
-        print(f"   - Browser: {browser} (headless={get_playwright_headless()})")
-
-        # Browser isolation for parallel execution
-        # Each agent gets its own isolated browser context to prevent tab conflicts
-        if agent_id:
-            # Use --isolated for ephemeral browser context
-            # This creates a fresh, isolated context without persistent state
-            # Note: --isolated and --user-data-dir are mutually exclusive
-            playwright_args.append("--isolated")
-            print(f"   - Browser isolation enabled for agent: {agent_id}")
-
-        mcp_servers["playwright"] = {
-            "command": "npx",
-            "args": playwright_args,
-            "env": {
-                "NODE_COMPILE_CACHE": "",  # Disable V8 compile caching to prevent .node file accumulation in %TEMP%
-            },
-        }
-
     # Build environment overrides for API endpoint configuration
     # Uses get_effective_sdk_env() which reads provider settings from the database,
     # ensuring UI-configured alternative providers (GLM, Ollama, Kimi, Custom) propagate
diff --git a/lib/cli.js b/lib/cli.js
index d0d4789..682ba84 100644
--- a/lib/cli.js
+++ b/lib/cli.js
@@ -517,6 +517,41 @@ function killProcess(pid) {
   }
 }
 
+// ---------------------------------------------------------------------------
+// Playwright CLI
+// ---------------------------------------------------------------------------
+
+/**
+ * Ensure playwright-cli is available globally for browser automation.
+ * Returns true if available (already installed or freshly installed).
+ *
+ * @param {boolean} showProgress - If true, print install progress
+ */
+function ensurePlaywrightCli(showProgress) {
+  try {
+    execSync('playwright-cli --version', {
+      timeout: 10_000,
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+    return true;
+  } catch {
+    // Not installed — try to install
+  }
+
+  if (showProgress) {
+    log('      Installing playwright-cli for browser automation...');
+  }
+  try {
+    execSync('npm install -g @playwright/cli', {
+      timeout: 120_000,
+      stdio: ['pipe', 'pipe', 'pipe'],
+    });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
 // ---------------------------------------------------------------------------
 // CLI commands
 // ---------------------------------------------------------------------------
@@ -613,6 +648,14 @@ function startServer(opts) {
   }
   const wasAlreadyReady = ensureVenv(python, repair);
 
+  // Ensure playwright-cli for browser automation (quick check, installs once)
+  if (!ensurePlaywrightCli(!wasAlreadyReady)) {
+    log('');
+    log('  Note: playwright-cli not available (browser automation will be limited)');
+    log('  Install manually: npm install -g @playwright/cli');
+    log('');
+  }
+
   // Step 3: Config file
   const configCreated = ensureEnvFile();
 
diff --git a/package.json b/package.json
index f9a47c6..638e803 100644
--- a/package.json
+++ b/package.json
@@ -19,6 +19,7 @@
     "ui/dist/",
     "ui/package.json",
     ".claude/commands/",
+    ".claude/skills/",
     ".claude/templates/",
     "examples/",
     "start.py",
diff --git a/prompts.py b/prompts.py
index 40d0494..dedead0 100644
--- a/prompts.py
+++ b/prompts.py
@@ -16,6 +16,9 @@ from pathlib import Path
 # Base templates location (generic templates)
 TEMPLATES_DIR = Path(__file__).parent / ".claude" / "templates"
 
+# Migration version — bump when adding new migration steps
+CURRENT_MIGRATION_VERSION = 1
+
 
 def get_project_prompts_dir(project_dir: Path) -> Path:
     """Get the prompts directory for a specific project."""
@@ -99,9 +102,9 @@ def _strip_browser_testing_sections(prompt: str) -> str:
         flags=re.DOTALL,
     )
 
-    # Replace the screenshots-only marking rule with YOLO-appropriate wording
+    # Replace the marking rule with YOLO-appropriate wording
     prompt = prompt.replace(
-        "**ONLY MARK A FEATURE AS PASSING AFTER VERIFICATION WITH SCREENSHOTS.**",
+        "**ONLY MARK A FEATURE AS PASSING AFTER VERIFICATION WITH BROWSER AUTOMATION.**",
         "**YOLO mode: Mark a feature as passing after lint/type-check succeeds and server starts cleanly.**",
     )
 
@@ -351,9 +354,70 @@ def scaffold_project_prompts(project_dir: Path) -> Path:
         except (OSError, PermissionError) as e:
             print(f"  Warning: Could not copy allowed_commands.yaml: {e}")
 
+    # Copy Playwright CLI skill for browser automation
+    skills_src = Path(__file__).parent / ".claude" / "skills" / "playwright-cli"
+    skills_dest = project_dir / ".claude" / "skills" / "playwright-cli"
+    if skills_src.exists() and not skills_dest.exists():
+        try:
+            shutil.copytree(skills_src, skills_dest)
+            copied_files.append(".claude/skills/playwright-cli/")
+        except (OSError, PermissionError) as e:
+            print(f"  Warning: Could not copy playwright-cli skill: {e}")
+
+    # Ensure .playwright-cli/ and .playwright/ are in project .gitignore
+    project_gitignore = project_dir / ".gitignore"
+    entries_to_add = [".playwright-cli/", ".playwright/"]
+    existing_lines: list[str] = []
+    if project_gitignore.exists():
+        try:
+            existing_lines = project_gitignore.read_text(encoding="utf-8").splitlines()
+        except (OSError, PermissionError):
+            pass
+    missing_entries = [e for e in entries_to_add if e not in existing_lines]
+    if missing_entries:
+        try:
+            with open(project_gitignore, "a", encoding="utf-8") as f:
+                # Add newline before entries if file doesn't end with one
+                if existing_lines and existing_lines[-1].strip():
+                    f.write("\n")
+                for entry in missing_entries:
+                    f.write(f"{entry}\n")
+        except (OSError, PermissionError) as e:
+            print(f"  Warning: Could not update .gitignore: {e}")
+
+    # Scaffold .playwright/cli.config.json for browser settings
+    playwright_config_dir = project_dir / ".playwright"
+    playwright_config_file = playwright_config_dir / "cli.config.json"
+    if not playwright_config_file.exists():
+        try:
+            playwright_config_dir.mkdir(parents=True, exist_ok=True)
+            import json
+            config = {
+                "browser": {
+                    "browserName": "chromium",
+                    "launchOptions": {
+                        "channel": "chrome",
+                        "headless": True,
+                    },
+                    "contextOptions": {
+                        "viewport": {"width": 1280, "height": 720},
+                    },
+                    "isolated": True,
+                },
+            }
+            with open(playwright_config_file, "w", encoding="utf-8") as f:
+                json.dump(config, f, indent=2)
+                f.write("\n")
+            copied_files.append(".playwright/cli.config.json")
+        except (OSError, PermissionError) as e:
+            print(f"  Warning: Could not create playwright config: {e}")
+
     if copied_files:
         print(f"  Created project files: {', '.join(copied_files)}")
 
+    # Stamp new projects at the current migration version so they never trigger migration
+    _set_migration_version(project_dir, CURRENT_MIGRATION_VERSION)
+
     return project_prompts
 
 
@@ -425,3 +489,330 @@ def copy_spec_to_project(project_dir: Path) -> None:
             return
 
     print("Warning: No app_spec.txt found to copy to project directory")
+
+
+# ---------------------------------------------------------------------------
+# Project version migration
+# ---------------------------------------------------------------------------
+
+# Replacement content: coding_prompt.md STEP 5 section (Playwright CLI)
+_CLI_STEP5_CONTENT = """\
+### STEP 5: VERIFY WITH BROWSER AUTOMATION
+
+**CRITICAL:** You MUST verify features through the actual UI.
+
+Use `playwright-cli` for browser automation:
+
+- Open the browser: `playwright-cli open http://localhost:PORT`
+- Take a snapshot to see page elements: `playwright-cli snapshot`
+- Read the snapshot YAML file to see element refs
+- Click elements by ref: `playwright-cli click e5`
+- Type text: `playwright-cli type "search query"`
+- Fill form fields: `playwright-cli fill e3 "value"`
+- Take screenshots: `playwright-cli screenshot`
+- Read the screenshot file to verify visual appearance
+- Check console errors: `playwright-cli console`
+- Close browser when done: `playwright-cli close`
+
+**Token-efficient workflow:** `playwright-cli screenshot` and `snapshot` save files
+to `.playwright-cli/`. You will see a file link in the output. Read the file only
+when you need to verify visual appearance or find element refs.
+
+**DO:**
+- Test through the UI with clicks and keyboard input
+- Take screenshots and read them to verify visual appearance
+- Check for console errors with `playwright-cli console`
+- Verify complete user workflows end-to-end
+- Always run `playwright-cli close` when finished testing
+
+**DON'T:**
+- Only test with curl commands
+- Use JavaScript evaluation to bypass UI (`eval` and `run-code` are blocked)
+- Skip visual verification
+- Mark tests passing without thorough verification
+
+"""
+
+# Replacement content: coding_prompt.md BROWSER AUTOMATION reference section
+_CLI_BROWSER_SECTION = """\
+## BROWSER AUTOMATION
+
+Use `playwright-cli` commands for UI verification. Key commands: `open`, `goto`,
+`snapshot`, `click`, `type`, `fill`, `screenshot`, `console`, `close`.
+
+**How it works:** `playwright-cli` uses a persistent browser daemon. `open` starts it,
+subsequent commands interact via socket, `close` shuts it down. Screenshots and snapshots
+save to `.playwright-cli/` -- read the files when you need to verify content.
+
+Test like a human user with mouse and keyboard. Use `playwright-cli console` to detect
+JS errors. Don't bypass UI with JavaScript evaluation.
+
+"""
+
+# Replacement content: testing_prompt.md STEP 2 section (Playwright CLI)
+_CLI_TESTING_STEP2 = """\
+### STEP 2: VERIFY THE FEATURE
+
+**CRITICAL:** You MUST verify the feature through the actual UI using browser automation.
+
+For the feature returned:
+1. Read and understand the feature's verification steps
+2. Navigate to the relevant part of the application
+3. Execute each verification step using browser automation
+4. Take screenshots and read them to verify visual appearance
+5. Check for console errors
+
+### Browser Automation (Playwright CLI)
+
+**Navigation & Screenshots:**
+- `playwright-cli open <url>` - Open browser and navigate
+- `playwright-cli goto <url>` - Navigate to URL
+- `playwright-cli screenshot` - Save screenshot to `.playwright-cli/`
+- `playwright-cli snapshot` - Save page snapshot with element refs to `.playwright-cli/`
+
+**Element Interaction:**
+- `playwright-cli click <ref>` - Click elements (ref from snapshot)
+- `playwright-cli type <text>` - Type text
+- `playwright-cli fill <ref> <text>` - Fill form fields
+- `playwright-cli select <ref> <val>` - Select dropdown
+- `playwright-cli press <key>` - Keyboard input
+
+**Debugging:**
+- `playwright-cli console` - Check for JS errors
+- `playwright-cli network` - Monitor API calls
+
+**Cleanup:**
+- `playwright-cli close` - Close browser when done (ALWAYS do this)
+
+**Note:** Screenshots and snapshots save to files. Read the file to see the content.
+
+"""
+
+# Replacement content: testing_prompt.md AVAILABLE TOOLS browser subsection
+_CLI_TESTING_TOOLS = """\
+### Browser Automation (Playwright CLI)
+Use `playwright-cli` commands for browser interaction. Key commands:
+- `playwright-cli open <url>` - Open browser
+- `playwright-cli goto <url>` - Navigate to URL
+- `playwright-cli screenshot` - Take screenshot (saved to `.playwright-cli/`)
+- `playwright-cli snapshot` - Get page snapshot with element refs
+- `playwright-cli click <ref>` - Click element
+- `playwright-cli type <text>` - Type text
+- `playwright-cli fill <ref> <text>` - Fill form field
+- `playwright-cli console` - Check for JS errors
+- `playwright-cli close` - Close browser (always do this when done)
+
+"""
+
+
+def _get_migration_version(project_dir: Path) -> int:
+    """Read the migration version from .autoforge/.migration_version."""
+    from autoforge_paths import get_autoforge_dir
+    version_file = get_autoforge_dir(project_dir) / ".migration_version"
+    if not version_file.exists():
+        return 0
+    try:
+        return int(version_file.read_text().strip())
+    except (ValueError, OSError):
+        return 0
+
+
+def _set_migration_version(project_dir: Path, version: int) -> None:
+    """Write the migration version to .autoforge/.migration_version."""
+    from autoforge_paths import get_autoforge_dir
+    version_file = get_autoforge_dir(project_dir) / ".migration_version"
+    version_file.parent.mkdir(parents=True, exist_ok=True)
+    version_file.write_text(str(version))
+
+
+def _migrate_coding_prompt_to_cli(content: str) -> str:
+    """Replace MCP-based Playwright sections with CLI-based content in coding prompt."""
+    # Replace STEP 5 section (from header to just before STEP 5.5)
+    content = re.sub(
+        r"### STEP 5: VERIFY WITH BROWSER AUTOMATION.*?(?=### STEP 5\.5:)",
+        _CLI_STEP5_CONTENT,
+        content,
+        count=1,
+        flags=re.DOTALL,
+    )
+
+    # Replace BROWSER AUTOMATION reference section (from header to next ---)
+    content = re.sub(
+        r"## BROWSER AUTOMATION\n\n.*?(?=---)",
+        _CLI_BROWSER_SECTION,
+        content,
+        count=1,
+        flags=re.DOTALL,
+    )
+
+    # Replace inline screenshot rule
+    content = content.replace(
+        "**ONLY MARK A FEATURE AS PASSING AFTER VERIFICATION WITH SCREENSHOTS.**",
+        "**ONLY MARK A FEATURE AS PASSING AFTER VERIFICATION WITH BROWSER AUTOMATION.**",
+    )
+
+    # Replace inline screenshot references (various phrasings from old templates)
+    for old_phrase in (
+        "(inline only -- do NOT save to disk)",
+        "(inline only, never save to disk)",
+        "(inline mode only -- never save to disk)",
+    ):
+        content = content.replace(old_phrase, "(saved to `.playwright-cli/`)")
+
+    return content
+
+
+def _migrate_testing_prompt_to_cli(content: str) -> str:
+    """Replace MCP-based Playwright sections with CLI-based content in testing prompt."""
+    # Replace AVAILABLE TOOLS browser subsection FIRST (before STEP 2, to avoid
+    # matching the new CLI subsection header that the STEP 2 replacement inserts).
+    # In old prompts, ### Browser Automation (Playwright) only exists in AVAILABLE TOOLS.
+    content = re.sub(
+        r"### Browser Automation \(Playwright[^)]*\)\n.*?(?=---)",
+        _CLI_TESTING_TOOLS,
+        content,
+        count=1,
+        flags=re.DOTALL,
+    )
+
+    # Replace STEP 2 verification section (from header to just before STEP 3)
+    content = re.sub(
+        r"### STEP 2: VERIFY THE FEATURE.*?(?=### STEP 3:)",
+        _CLI_TESTING_STEP2,
+        content,
+        count=1,
+        flags=re.DOTALL,
+    )
+
+    # Replace inline screenshot references (various phrasings from old templates)
+    for old_phrase in (
+        "(inline only -- do NOT save to disk)",
+        "(inline only, never save to disk)",
+        "(inline mode only -- never save to disk)",
+    ):
+        content = content.replace(old_phrase, "(saved to `.playwright-cli/`)")
+
+    return content
+
+
+def _migrate_v0_to_v1(project_dir: Path) -> list[str]:
+    """Migrate from v0 (MCP-based Playwright) to v1 (Playwright CLI).
+
+    Four idempotent sub-steps:
+    A. Copy playwright-cli skill to project
+    B. Scaffold .playwright/cli.config.json
+    C. Update .gitignore with .playwright-cli/ and .playwright/
+    D. Update coding_prompt.md and testing_prompt.md
+    """
+    import json
+
+    migrated: list[str] = []
+
+    # A. Copy Playwright CLI skill
+    skills_src = Path(__file__).parent / ".claude" / "skills" / "playwright-cli"
+    skills_dest = project_dir / ".claude" / "skills" / "playwright-cli"
+    if skills_src.exists() and not skills_dest.exists():
+        try:
+            shutil.copytree(skills_src, skills_dest)
+            migrated.append("Copied playwright-cli skill")
+        except (OSError, PermissionError) as e:
+            print(f"  Warning: Could not copy playwright-cli skill: {e}")
+
+    # B. Scaffold .playwright/cli.config.json
+    playwright_config_dir = project_dir / ".playwright"
+    playwright_config_file = playwright_config_dir / "cli.config.json"
+    if not playwright_config_file.exists():
+        try:
+            playwright_config_dir.mkdir(parents=True, exist_ok=True)
+            config = {
+                "browser": {
+                    "browserName": "chromium",
+                    "launchOptions": {
+                        "channel": "chrome",
+                        "headless": True,
+                    },
+                    "contextOptions": {
+                        "viewport": {"width": 1280, "height": 720},
+                    },
+                    "isolated": True,
+                },
+            }
+            with open(playwright_config_file, "w", encoding="utf-8") as f:
+                json.dump(config, f, indent=2)
+                f.write("\n")
+            migrated.append("Created .playwright/cli.config.json")
+        except (OSError, PermissionError) as e:
+            print(f"  Warning: Could not create playwright config: {e}")
+
+    # C. Update .gitignore
+    project_gitignore = project_dir / ".gitignore"
+    entries_to_add = [".playwright-cli/", ".playwright/"]
+    existing_lines: list[str] = []
+    if project_gitignore.exists():
+        try:
+            existing_lines = project_gitignore.read_text(encoding="utf-8").splitlines()
+        except (OSError, PermissionError):
+            pass
+    missing_entries = [e for e in entries_to_add if e not in existing_lines]
+    if missing_entries:
+        try:
+            with open(project_gitignore, "a", encoding="utf-8") as f:
+                if existing_lines and existing_lines[-1].strip():
+                    f.write("\n")
+                for entry in missing_entries:
+                    f.write(f"{entry}\n")
+            migrated.append(f"Added {', '.join(missing_entries)} to .gitignore")
+        except (OSError, PermissionError) as e:
+            print(f"  Warning: Could not update .gitignore: {e}")
+
+    # D. Update prompts
+    prompts_dir = get_project_prompts_dir(project_dir)
+
+    # D1. Update coding_prompt.md
+    coding_prompt_path = prompts_dir / "coding_prompt.md"
+    if coding_prompt_path.exists():
+        try:
+            content = coding_prompt_path.read_text(encoding="utf-8")
+            if "Playwright MCP" in content or "browser_navigate" in content or "browser_take_screenshot" in content:
+                updated = _migrate_coding_prompt_to_cli(content)
+                if updated != content:
+                    coding_prompt_path.write_text(updated, encoding="utf-8")
+                    migrated.append("Updated coding_prompt.md to Playwright CLI")
+        except (OSError, PermissionError) as e:
+            print(f"  Warning: Could not update coding_prompt.md: {e}")
+
+    # D2. Update testing_prompt.md
+    testing_prompt_path = prompts_dir / "testing_prompt.md"
+    if testing_prompt_path.exists():
+        try:
+            content = testing_prompt_path.read_text(encoding="utf-8")
+            if "browser_navigate" in content or "browser_take_screenshot" in content:
+                updated = _migrate_testing_prompt_to_cli(content)
+                if updated != content:
+                    testing_prompt_path.write_text(updated, encoding="utf-8")
+                    migrated.append("Updated testing_prompt.md to Playwright CLI")
+        except (OSError, PermissionError) as e:
+            print(f"  Warning: Could not update testing_prompt.md: {e}")
+
+    return migrated
+
+
+def migrate_project_to_current(project_dir: Path) -> list[str]:
+    """Migrate an existing project to the current AutoForge version.
+
+    Idempotent — safe to call on every agent start. Returns list of
+    human-readable descriptions of what was migrated.
+    """
+    current = _get_migration_version(project_dir)
+    if current >= CURRENT_MIGRATION_VERSION:
+        return []
+
+    migrated: list[str] = []
+
+    if current < 1:
+        migrated.extend(_migrate_v0_to_v1(project_dir))
+
+    # Future: if current < 2: migrated.extend(_migrate_v1_to_v2(project_dir))
+
+    _set_migration_version(project_dir, CURRENT_MIGRATION_VERSION)
+    return migrated
diff --git a/security.py b/security.py
index 8ed9ce7..9d928b5 100644
--- a/security.py
+++ b/security.py
@@ -66,10 +66,12 @@ ALLOWED_COMMANDS = {
     "bash",
     # Script execution
     "init.sh",  # Init scripts; validated separately
+    # Browser automation
+    "playwright-cli",  # Playwright CLI for browser testing; validated separately
 }
 
 # Commands that need additional validation even when in the allowlist
-COMMANDS_NEEDING_EXTRA_VALIDATION = {"pkill", "chmod", "init.sh"}
+COMMANDS_NEEDING_EXTRA_VALIDATION = {"pkill", "chmod", "init.sh", "playwright-cli"}
 
 # Commands that are NEVER allowed, even with user approval
 # These commands can cause permanent system damage or security breaches
@@ -438,6 +440,37 @@ def validate_init_script(command_string: str) -> tuple[bool, str]:
     return False, f"Only ./init.sh is allowed, got: {script}"
 
 
+def validate_playwright_command(command_string: str) -> tuple[bool, str]:
+    """
+    Validate playwright-cli commands - block dangerous subcommands.
+
+    Blocks `run-code` (arbitrary Node.js execution) and `eval` (arbitrary JS
+    evaluation) which bypass the security sandbox.
+
+    Returns:
+        Tuple of (is_allowed, reason_if_blocked)
+    """
+    try:
+        tokens = shlex.split(command_string)
+    except ValueError:
+        return False, "Could not parse playwright-cli command"
+
+    if not tokens:
+        return False, "Empty command"
+
+    BLOCKED_SUBCOMMANDS = {"run-code", "eval"}
+
+    # Find the subcommand: first non-flag token after 'playwright-cli'
+    for token in tokens[1:]:
+        if token.startswith("-"):
+            continue  # skip flags like -s=agent-1
+        if token in BLOCKED_SUBCOMMANDS:
+            return False, f"playwright-cli '{token}' is not allowed"
+        break  # first non-flag token is the subcommand
+
+    return True, ""
+
+
 def matches_pattern(command: str, pattern: str) -> bool:
     """
     Check if a command matches a pattern.
@@ -955,5 +988,9 @@ async def bash_security_hook(input_data, tool_use_id=None, context=None):
                 allowed, reason = validate_init_script(cmd_segment)
                 if not allowed:
                     return {"decision": "block", "reason": reason}
+            elif cmd == "playwright-cli":
+                allowed, reason = validate_playwright_command(cmd_segment)
+                if not allowed:
+                    return {"decision": "block", "reason": reason}
 
     return {}
diff --git a/server/services/process_manager.py b/server/services/process_manager.py
index 9a4bd5c..3054add 100644
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -227,6 +227,28 @@ class AgentProcessManager:
         """Remove lock file."""
         self.lock_file.unlink(missing_ok=True)
 
+    def _apply_playwright_headless(self, headless: bool) -> None:
+        """Update .playwright/cli.config.json with the current headless setting.
+
+        playwright-cli reads this config file on each ``open`` command, so
+        updating it before the agent starts is sufficient.
+        """
+        config_file = self.project_dir / ".playwright" / "cli.config.json"
+        if not config_file.exists():
+            return
+        try:
+            import json
+            config = json.loads(config_file.read_text(encoding="utf-8"))
+            launch_opts = config.get("browser", {}).get("launchOptions", {})
+            if launch_opts.get("headless") == headless:
+                return  # already correct
+            launch_opts["headless"] = headless
+            config.setdefault("browser", {})["launchOptions"] = launch_opts
+            config_file.write_text(json.dumps(config, indent=2) + "\n", encoding="utf-8")
+            logger.info("Set playwright headless=%s for %s", headless, self.project_name)
+        except Exception:
+            logger.warning("Failed to update playwright config", exc_info=True)
+
     def _cleanup_stale_features(self) -> None:
         """Clear in_progress flag for all features when agent stops/crashes.
 
@@ -361,6 +383,15 @@ class AgentProcessManager:
         if not self._check_lock():
             return False, "Another agent instance is already running for this project"
 
+        # Clean up stale browser daemons from previous runs
+        try:
+            subprocess.run(
+                ["playwright-cli", "kill-all"],
+                timeout=5, capture_output=True,
+            )
+        except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+            pass
+
         # Clean up features stuck from a previous crash/stop
         self._cleanup_stale_features()
 
@@ -397,6 +428,10 @@ class AgentProcessManager:
         # Add --batch-size flag for multi-feature batching
         cmd.extend(["--batch-size", str(batch_size)])
 
+        # Apply headless setting to .playwright/cli.config.json so playwright-cli
+        # picks it up (the only mechanism it supports for headless control)
+        self._apply_playwright_headless(playwright_headless)
+
         try:
             # Start subprocess with piped stdout/stderr
             # Use project_dir as cwd so Claude SDK sandbox allows access to project files
@@ -409,7 +444,7 @@ class AgentProcessManager:
             subprocess_env = {
                 **os.environ,
                 "PYTHONUNBUFFERED": "1",
-                "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false",
+                "PLAYWRIGHT_CLI_SESSION": f"agent-{self.project_name}-{os.getpid()}",
                 "NODE_COMPILE_CACHE": "",  # Disable V8 compile caching to prevent .node file accumulation in %TEMP%
                 **api_env,
             }
@@ -469,6 +504,15 @@ class AgentProcessManager:
                 except asyncio.CancelledError:
                     pass
 
+            # Kill browser daemons before stopping agent
+            try:
+                subprocess.run(
+                    ["playwright-cli", "kill-all"],
+                    timeout=5, capture_output=True,
+                )
+            except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+                pass
+
             # CRITICAL: Kill entire process tree, not just orchestrator
             # This ensures all spawned coding/testing agents are also terminated
             proc = self.process  # Capture reference before async call
diff --git a/start.bat b/start.bat
index 9931c38..9d1e95d 100644
--- a/start.bat
+++ b/start.bat
@@ -54,5 +54,15 @@ REM Install dependencies
 echo Installing dependencies...
 pip install -r requirements.txt --quiet
 
+REM Ensure playwright-cli is available for browser automation
+where playwright-cli >nul 2>&1
+if %ERRORLEVEL% neq 0 (
+    echo Installing playwright-cli for browser automation...
+    call npm install -g @playwright/cli >nul 2>&1
+    if %ERRORLEVEL% neq 0 (
+        echo Note: Could not install playwright-cli. Install manually: npm install -g @playwright/cli
+    )
+)
+
 REM Run the app
 python start.py
diff --git a/start.sh b/start.sh
index 25c8751..9b938af 100755
--- a/start.sh
+++ b/start.sh
@@ -74,5 +74,14 @@ fi
 echo "Installing dependencies..."
 pip install -r requirements.txt --quiet
 
+# Ensure playwright-cli is available for browser automation
+if ! command -v playwright-cli &> /dev/null; then
+    echo "Installing playwright-cli for browser automation..."
+    npm install -g @playwright/cli --quiet 2>/dev/null
+    if [ $? -ne 0 ]; then
+        echo "Note: Could not install playwright-cli. Install manually: npm install -g @playwright/cli"
+    fi
+fi
+
 # Run the app
 python start.py
diff --git a/start_ui.bat b/start_ui.bat
index 3fc67f5..edbc60a 100644
--- a/start_ui.bat
+++ b/start_ui.bat
@@ -37,5 +37,15 @@ REM Install dependencies
 echo Installing dependencies...
 pip install -r requirements.txt --quiet
 
+REM Ensure playwright-cli is available for browser automation
+where playwright-cli >nul 2>&1
+if %ERRORLEVEL% neq 0 (
+    echo Installing playwright-cli for browser automation...
+    call npm install -g @playwright/cli >nul 2>&1
+    if %ERRORLEVEL% neq 0 (
+        echo Note: Could not install playwright-cli. Install manually: npm install -g @playwright/cli
+    )
+)
+
 REM Run the Python launcher
 python "%~dp0start_ui.py" %*
diff --git a/start_ui.sh b/start_ui.sh
index 4381bbe..8c63ff9 100755
--- a/start_ui.sh
+++ b/start_ui.sh
@@ -80,5 +80,14 @@ fi
 echo "Installing dependencies..."
 pip install -r requirements.txt --quiet
 
+# Ensure playwright-cli is available for browser automation
+if ! command -v playwright-cli &> /dev/null; then
+    echo "Installing playwright-cli for browser automation..."
+    npm install -g @playwright/cli --quiet 2>/dev/null
+    if [ $? -ne 0 ]; then
+        echo "Note: Could not install playwright-cli. Install manually: npm install -g @playwright/cli"
+    fi
+fi
+
 # Run the Python launcher
 python start_ui.py "$@"
diff --git a/temp_cleanup.py b/temp_cleanup.py
index 5cfda06..5907908 100644
--- a/temp_cleanup.py
+++ b/temp_cleanup.py
@@ -125,14 +125,18 @@ def cleanup_stale_temp(max_age_seconds: int = MAX_AGE_SECONDS) -> dict:
 
 def cleanup_project_screenshots(project_dir: Path, max_age_seconds: int = 300) -> dict:
     """
-    Clean up stale screenshot files from the project root.
+    Clean up stale Playwright CLI artifacts from the project.
 
-    Playwright browser verification can leave .png files in the project
-    directory. This removes them after they've aged out (default 5 minutes).
+    The Playwright CLI daemon saves screenshots, snapshots, and other artifacts
+    to `{project_dir}/.playwright-cli/`. This removes them after they've aged
+    out (default 5 minutes).
+
+    Also cleans up legacy screenshot patterns from the project root (from the
+    old Playwright MCP server approach).
 
     Args:
         project_dir: Path to the project directory.
-        max_age_seconds: Maximum age in seconds before a screenshot is deleted.
+        max_age_seconds: Maximum age in seconds before an artifact is deleted.
                         Defaults to 5 minutes (300 seconds).
 
     Returns:
@@ -141,13 +145,33 @@ def cleanup_project_screenshots(project_dir: Path, max_age_seconds: int = 300) -
     cutoff_time = time.time() - max_age_seconds
     stats: dict = {"files_deleted": 0, "bytes_freed": 0, "errors": []}
 
-    screenshot_patterns = [
+    # Clean up .playwright-cli/ directory (new CLI approach)
+    playwright_cli_dir = project_dir / ".playwright-cli"
+    if playwright_cli_dir.exists():
+        for item in playwright_cli_dir.iterdir():
+            if not item.is_file():
+                continue
+            try:
+                mtime = item.stat().st_mtime
+                if mtime < cutoff_time:
+                    size = item.stat().st_size
+                    item.unlink(missing_ok=True)
+                    if not item.exists():
+                        stats["files_deleted"] += 1
+                        stats["bytes_freed"] += size
+                        logger.debug(f"Deleted playwright-cli artifact: {item}")
+            except Exception as e:
+                stats["errors"].append(f"Failed to delete {item}: {e}")
+                logger.debug(f"Failed to delete artifact {item}: {e}")
+
+    # Legacy cleanup: root-level screenshot patterns (from old MCP server approach)
+    legacy_patterns = [
         "feature*-*.png",
         "screenshot-*.png",
         "step-*.png",
     ]
 
-    for pattern in screenshot_patterns:
+    for pattern in legacy_patterns:
         for item in project_dir.glob(pattern):
             if not item.is_file():
                 continue
@@ -159,14 +183,14 @@ def cleanup_project_screenshots(project_dir: Path, max_age_seconds: int = 300) -
                     if not item.exists():
                         stats["files_deleted"] += 1
                         stats["bytes_freed"] += size
-                        logger.debug(f"Deleted project screenshot: {item}")
+                        logger.debug(f"Deleted legacy screenshot: {item}")
             except Exception as e:
                 stats["errors"].append(f"Failed to delete {item}: {e}")
                 logger.debug(f"Failed to delete screenshot {item}: {e}")
 
     if stats["files_deleted"] > 0:
         mb_freed = stats["bytes_freed"] / (1024 * 1024)
-        logger.info(f"Screenshot cleanup: {stats['files_deleted']} files, {mb_freed:.1f} MB freed")
+        logger.info(f"Artifact cleanup: {stats['files_deleted']} files, {mb_freed:.1f} MB freed")
 
     return stats
 
diff --git a/test_security.py b/test_security.py
index 1017d1b..ccd2346 100644
--- a/test_security.py
+++ b/test_security.py
@@ -25,6 +25,7 @@ from security import (
     validate_chmod_command,
     validate_init_script,
     validate_pkill_command,
+    validate_playwright_command,
     validate_project_command,
 )
 
@@ -923,6 +924,70 @@ pkill_processes:
     return passed, failed
 
 
+def test_playwright_cli_validation():
+    """Test playwright-cli subcommand validation."""
+    print("\nTesting playwright-cli validation:\n")
+    passed = 0
+    failed = 0
+
+    # Test cases: (command, should_be_allowed, description)
+    test_cases = [
+        # Allowed cases
+        ("playwright-cli screenshot", True, "screenshot allowed"),
+        ("playwright-cli snapshot", True, "snapshot allowed"),
+        ("playwright-cli click e5", True, "click with ref"),
+        ("playwright-cli open http://localhost:3000", True, "open URL"),
+        ("playwright-cli -s=agent-1 click e5", True, "session flag with click"),
+        ("playwright-cli close", True, "close browser"),
+        ("playwright-cli goto http://localhost:3000/page", True, "goto URL"),
+        ("playwright-cli fill e3 'test value'", True, "fill form field"),
+        ("playwright-cli console", True, "console messages"),
+        # Blocked cases
+        ("playwright-cli run-code 'await page.evaluate(() => {})'", False, "run-code blocked"),
+        ("playwright-cli eval 'document.title'", False, "eval blocked"),
+        ("playwright-cli -s=test eval 'document.title'", False, "eval with session flag blocked"),
+    ]
+
+    for cmd, should_allow, description in test_cases:
+        allowed, reason = validate_playwright_command(cmd)
+        if allowed == should_allow:
+            print(f"  PASS: {cmd!r} ({description})")
+            passed += 1
+        else:
+            expected = "allowed" if should_allow else "blocked"
+            actual = "allowed" if allowed else "blocked"
+            print(f"  FAIL: {cmd!r} ({description})")
+            print(f"         Expected: {expected}, Got: {actual}")
+            if reason:
+                print(f"         Reason: {reason}")
+            failed += 1
+
+    # Integration test: verify through the security hook
+    print("\n  Integration tests (via security hook):\n")
+
+    # playwright-cli screenshot should be allowed
+    input_data = {"tool_name": "Bash", "tool_input": {"command": "playwright-cli screenshot"}}
+    result = asyncio.run(bash_security_hook(input_data))
+    if result.get("decision") != "block":
+        print("  PASS: playwright-cli screenshot allowed via hook")
+        passed += 1
+    else:
+        print(f"  FAIL: playwright-cli screenshot should be allowed: {result.get('reason')}")
+        failed += 1
+
+    # playwright-cli run-code should be blocked
+    input_data = {"tool_name": "Bash", "tool_input": {"command": "playwright-cli run-code 'code'"}}
+    result = asyncio.run(bash_security_hook(input_data))
+    if result.get("decision") == "block":
+        print("  PASS: playwright-cli run-code blocked via hook")
+        passed += 1
+    else:
+        print("  FAIL: playwright-cli run-code should be blocked via hook")
+        failed += 1
+
+    return passed, failed
+
+
 def main():
     print("=" * 70)
     print("  SECURITY HOOK TESTS")
@@ -991,6 +1056,11 @@ def main():
     passed += pkill_passed
     failed += pkill_failed
 
+    # Test playwright-cli validation
+    pw_passed, pw_failed = test_playwright_cli_validation()
+    passed += pw_passed
+    failed += pw_failed
+
     # Commands that SHOULD be blocked
     # Note: blocklisted commands (sudo, shutdown, dd, aws) are tested in
     # test_blocklist_enforcement(). chmod validation is tested in
@@ -1012,6 +1082,9 @@ def main():
         # Shell injection attempts
         "$(echo pkill) node",
         'eval "pkill node"',
+        # playwright-cli dangerous subcommands
+        "playwright-cli run-code 'await page.goto(\"http://evil.com\")'",
+        "playwright-cli eval 'document.cookie'",
     ]
 
     for cmd in dangerous:
@@ -1077,6 +1150,12 @@ def main():
         "/usr/local/bin/node app.js",
         # Combined chmod and init.sh (integration test for both validators)
         "chmod +x init.sh && ./init.sh",
+        # Playwright CLI allowed commands
+        "playwright-cli open http://localhost:3000",
+        "playwright-cli screenshot",
+        "playwright-cli snapshot",
+        "playwright-cli click e5",
+        "playwright-cli -s=agent-1 close",
     ]
 
     for cmd in safe:
diff --git a/ui/src/components/ProjectSelector.tsx b/ui/src/components/ProjectSelector.tsx
index 10b4839..06eb8bf 100644
--- a/ui/src/components/ProjectSelector.tsx
+++ b/ui/src/components/ProjectSelector.tsx
@@ -75,6 +75,7 @@ export function ProjectSelector({
             variant="outline"
             className="min-w-[140px] sm:min-w-[200px] justify-between"
             disabled={isLoading}
+            title={selectedProjectData?.path}
           >
             {isLoading ? (
               <Loader2 size={18} className="animate-spin" />
@@ -101,6 +102,7 @@ export function ProjectSelector({
               {projects.map(project => (
                 <DropdownMenuItem
                   key={project.name}
+                  title={project.path}
                   className={`flex items-center justify-between cursor-pointer ${
                     project.name === selectedProject ? 'bg-primary/10' : ''
                   }`}

From 9fb7926df11322c1a7a953b90c5bec3ab598dd97 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Wed, 11 Feb 2026 13:38:30 +0200
Subject: [PATCH 08/14] 0.1.11

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 638e803..109bca4 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "autoforge-ai",
-  "version": "0.1.10",
+  "version": "0.1.11",
   "description": "Autonomous coding agent with web UI - build complete apps with AI",
   "license": "AGPL-3.0",
   "bin": {

From d1b0b73b20d06abc15b903f4774584eebabb49f1 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Wed, 11 Feb 2026 13:38:55 +0200
Subject: [PATCH 09/14] version patch

---
 ui/package-lock.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/package-lock.json b/ui/package-lock.json
index e19d991..a8296ee 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -56,7 +56,7 @@
     },
     "..": {
       "name": "autoforge-ai",
-      "version": "0.1.10",
+      "version": "0.1.11",
       "license": "AGPL-3.0",
       "bin": {
         "autoforge": "bin/autoforge.js"

From 81e8c37f2987b4eb7335abf53aff20ea7c9beef6 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Wed, 11 Feb 2026 17:09:22 +0200
Subject: [PATCH 10/14] feat: expose read-only MCP tools to all agent types,
 fix settings base URL handling

Add feature_get_ready, feature_get_blocked, and feature_get_graph to
CODING_AGENT_TOOLS, TESTING_AGENT_TOOLS, and INITIALIZER_AGENT_TOOLS.
These read-only tools were available on the MCP server but blocked by
the allowed_tools lists, causing "blocked/not allowed" errors when
agents tried to query project state.

Fix SettingsModal custom base URL input:
- Remove fallback to current settings value when saving, so empty input
  is not silently replaced with the existing URL
- Remove .trim() on the input value to prevent cursor jumping while typing
- Fix "Change" button pre-fill using empty string instead of space

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 client.py                           | 10 +++++++++-
 ui/src/components/SettingsModal.tsx |  9 ++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/client.py b/client.py
index d44ab41..7547121 100644
--- a/client.py
+++ b/client.py
@@ -141,7 +141,6 @@ def get_extra_read_paths() -> list[Path]:
 # overhead and preventing agents from calling tools meant for other roles.
 #
 # Tools intentionally omitted from ALL agent lists (UI/orchestrator only):
-#   feature_get_ready, feature_get_blocked, feature_get_graph,
 #   feature_remove_dependency
 #
 # The ghost tool "feature_release_testing" was removed entirely -- it was
@@ -151,6 +150,9 @@ CODING_AGENT_TOOLS = [
     "mcp__features__feature_get_stats",
     "mcp__features__feature_get_by_id",
     "mcp__features__feature_get_summary",
+    "mcp__features__feature_get_ready",
+    "mcp__features__feature_get_blocked",
+    "mcp__features__feature_get_graph",
     "mcp__features__feature_claim_and_get",
     "mcp__features__feature_mark_in_progress",
     "mcp__features__feature_mark_passing",
@@ -163,12 +165,18 @@ TESTING_AGENT_TOOLS = [
     "mcp__features__feature_get_stats",
     "mcp__features__feature_get_by_id",
     "mcp__features__feature_get_summary",
+    "mcp__features__feature_get_ready",
+    "mcp__features__feature_get_blocked",
+    "mcp__features__feature_get_graph",
     "mcp__features__feature_mark_passing",
     "mcp__features__feature_mark_failing",
 ]
 
 INITIALIZER_AGENT_TOOLS = [
     "mcp__features__feature_get_stats",
+    "mcp__features__feature_get_ready",
+    "mcp__features__feature_get_blocked",
+    "mcp__features__feature_get_graph",
     "mcp__features__feature_create_bulk",
     "mcp__features__feature_create",
     "mcp__features__feature_add_dependency",
diff --git a/ui/src/components/SettingsModal.tsx b/ui/src/components/SettingsModal.tsx
index dbd9ed4..284e6f1 100644
--- a/ui/src/components/SettingsModal.tsx
+++ b/ui/src/components/SettingsModal.tsx
@@ -83,9 +83,8 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
   }
 
   const handleSaveCustomBaseUrl = () => {
-    const effectiveBaseUrl = customBaseUrlInput || settings?.api_base_url || ''
-    if (effectiveBaseUrl.trim() && !updateSettings.isPending) {
-      updateSettings.mutate({ api_base_url: effectiveBaseUrl.trim() })
+    if (customBaseUrlInput.trim() && !updateSettings.isPending) {
+      updateSettings.mutate({ api_base_url: customBaseUrlInput.trim() })
       setCustomBaseUrlInput('')
     }
   }
@@ -299,7 +298,7 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
                         variant="ghost"
                         size="sm"
                         className="h-auto py-0.5 px-2 text-xs shrink-0"
-                        onClick={() => setCustomBaseUrlInput(settings.api_base_url || ' ')}
+                        onClick={() => setCustomBaseUrlInput(settings.api_base_url || '')}
                       >
                         Change
                       </Button>
@@ -309,7 +308,7 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
                     <div className="flex gap-2">
                       <input
                         type="text"
-                        value={customBaseUrlInput.trim()}
+                        value={customBaseUrlInput}
                         onChange={(e) => setCustomBaseUrlInput(e.target.value)}
                         placeholder={currentProvider === 'azure' ? 'https://your-resource.services.ai.azure.com/anthropic' : 'https://api.example.com/v1'}
                         className="flex-1 py-1.5 px-3 text-sm border rounded-md bg-background"

From 8a934c33744b8c56efefbfaad8b8297903ff1ec6 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Wed, 11 Feb 2026 18:48:19 +0200
Subject: [PATCH 11/14] fix: isolate Playwright CLI browser sessions per agent
 in parallel mode

Set unique PLAYWRIGHT_CLI_SESSION environment variable for each spawned
agent subprocess to prevent concurrent agents from sharing a single
browser instance and interfering with each other's navigation.

- _spawn_coding_agent: session named "coding-{feature_id}"
- _spawn_coding_agent_batch: session named "coding-{primary_id}"
- _spawn_testing_agent: session named "testing-{counter}" using an
  incrementing counter (since multiple testing agents can test
  overlapping features, feature ID alone isn't sufficient)

Previously, after migrating from Playwright MCP to CLI, all parallel
agents shared the default browser session, causing them to navigate
away from each other's pages.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 parallel_orchestrator.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py
index fc7fe7a..419e093 100644
--- a/parallel_orchestrator.py
+++ b/parallel_orchestrator.py
@@ -194,6 +194,7 @@ class ParallelOrchestrator:
         # Legacy alias for backward compatibility
         self.running_agents = self.running_coding_agents
         self.abort_events: dict[int, threading.Event] = {}
+        self._testing_session_counter = 0
         self.is_running = False
 
         # Track feature failures to prevent infinite retry loops
@@ -846,7 +847,7 @@ class ParallelOrchestrator:
                 "encoding": "utf-8",
                 "errors": "replace",
                 "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
+                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": "", "PLAYWRIGHT_CLI_SESSION": f"coding-{feature_id}"},
             }
             if sys.platform == "win32":
                 popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -909,7 +910,7 @@ class ParallelOrchestrator:
                 "encoding": "utf-8",
                 "errors": "replace",
                 "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
+                "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": "", "PLAYWRIGHT_CLI_SESSION": f"coding-{primary_id}"},
             }
             if sys.platform == "win32":
                 popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
@@ -1013,8 +1014,9 @@ class ParallelOrchestrator:
                     "encoding": "utf-8",
                     "errors": "replace",
                     "cwd": str(self.project_dir),  # Run from project dir so CLI creates .claude/ in project
-                    "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": ""},
+                    "env": {**os.environ, "PYTHONUNBUFFERED": "1", "NODE_COMPILE_CACHE": "", "PLAYWRIGHT_CLI_SESSION": f"testing-{self._testing_session_counter}"},
                 }
+                self._testing_session_counter += 1
                 if sys.platform == "win32":
                     popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW
 

From 4e84de38390aafc9c7545518fde520385b6d5324 Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Wed, 11 Feb 2026 18:48:21 +0200
Subject: [PATCH 12/14] 0.1.12

---
 package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package.json b/package.json
index 109bca4..e66499b 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "autoforge-ai",
-  "version": "0.1.11",
+  "version": "0.1.12",
   "description": "Autonomous coding agent with web UI - build complete apps with AI",
   "license": "AGPL-3.0",
   "bin": {

From 76dd4b8d80b5546c8721357b506c3a3748319afc Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Wed, 11 Feb 2026 18:48:44 +0200
Subject: [PATCH 13/14] version patch

---
 ui/package-lock.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/package-lock.json b/ui/package-lock.json
index a8296ee..508a7fc 100644
--- a/ui/package-lock.json
+++ b/ui/package-lock.json
@@ -56,7 +56,7 @@
     },
     "..": {
       "name": "autoforge-ai",
-      "version": "0.1.11",
+      "version": "0.1.12",
       "license": "AGPL-3.0",
       "bin": {
         "autoforge": "bin/autoforge.js"

From f4636fdfd5e33ac954360269b249f1f9cb561d4e Mon Sep 17 00:00:00 2001
From: Auto <leon.vanzyl@gmail.com>
Date: Thu, 12 Feb 2026 07:28:37 +0200
Subject: [PATCH 14/14] fix: handle pausing/draining states in UI guards and
 process cleanup

Follow-up fixes after merging PR #183 (graceful pause/drain mode):

- process_manager: _stream_output finally block now transitions from
  pausing/paused_graceful to crashed/stopped (not just running), and
  cleans up the drain signal file on process exit
- App.tsx: block Reset button and R shortcut during pausing/paused_graceful
- AgentThought/ProgressDashboard: keep thought bubble visible while pausing
- OrchestratorAvatar: add draining/paused cases to animation, glow, and
  description switch statements
- AgentMissionControl: show Draining/Paused badge text for new states
- registry.py: remove redundant type annotation to fix mypy no-redef
- process_manager.py: add type:ignore for SQLAlchemy Column assignment
- websocket.py: reclassify test-pass lines as 'testing' not 'success'
- review-pr.md: add post-review recommended action guidance

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .claude/commands/review-pr.md             | 19 ++++++++++++++++++-
 registry.py                               |  2 +-
 server/services/process_manager.py        | 12 +++++++++---
 server/websocket.py                       |  2 +-
 ui/src/App.tsx                            |  6 +++---
 ui/src/components/AgentMissionControl.tsx | 10 +++++++---
 ui/src/components/AgentThought.tsx        |  2 +-
 ui/src/components/OrchestratorAvatar.tsx  | 12 ++++++++++++
 ui/src/components/ProgressDashboard.tsx   |  2 +-
 9 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/.claude/commands/review-pr.md b/.claude/commands/review-pr.md
index 027c807..d176111 100644
--- a/.claude/commands/review-pr.md
+++ b/.claude/commands/review-pr.md
@@ -72,4 +72,21 @@ Pull request(s): $ARGUMENTS
      - What this PR is actually about (one sentence)
      - The key concerns, if any (or "no significant concerns")
      - **Verdict: MERGE** / **MERGE (with minor follow-up)** / **DON'T MERGE** with a one-line reason
-   - This section should be scannable in under 10 seconds
\ No newline at end of file
+   - This section should be scannable in under 10 seconds
+
+10. **Post-Review Action**
+    - Immediately after the TLDR, provide a `## Recommended Action` section
+    - Based on the verdict, recommend one of the following actions:
+
+    **If verdict is MERGE (no concerns):**
+    - Recommend merging as-is. No further action needed.
+
+    **If verdict is MERGE (with minor follow-up):**
+    - If the concerns are low-risk and straightforward to fix (e.g., naming tweaks, small refactors, missing type annotations, minor style issues, trivial bug fixes), recommend merging the PR now and offer to immediately address the concerns in a follow-up commit directly on the target branch
+    - List the specific changes you would make in the follow-up
+    - Ask the user: *"Should I merge this PR and push a follow-up commit addressing these concerns?"*
+
+    **If verdict is DON'T MERGE:**
+    - If the blocking concerns are still relatively contained and you are confident you can resolve them quickly (e.g., a small bug fix, a missing validation, a straightforward architectural adjustment), recommend merging the PR and immediately addressing the issues in a follow-up commit — but only if the fixes are low-risk and well-understood
+    - If the issues are too complex, risky, or require author input (e.g., design decisions, major refactors, unclear intent), recommend sending the PR back to the author with specific feedback on what needs to change
+    - Be honest about your confidence level — if you're unsure whether you can address the concerns correctly, say so and defer to the author
\ No newline at end of file
diff --git a/registry.py b/registry.py
index 60aed4e..3940d58 100644
--- a/registry.py
+++ b/registry.py
@@ -743,7 +743,7 @@ def get_effective_sdk_env() -> dict[str, str]:
                 sdk_env[var] = value
         return sdk_env
 
-    sdk_env: dict[str, str] = {}
+    sdk_env = {}
 
     # Explicitly clear credentials that could leak from the server process env.
     # For providers using ANTHROPIC_AUTH_TOKEN (GLM, Custom), clear ANTHROPIC_API_KEY.
diff --git a/server/services/process_manager.py b/server/services/process_manager.py
index c38e6ec..e21ffef 100644
--- a/server/services/process_manager.py
+++ b/server/services/process_manager.py
@@ -277,7 +277,7 @@ class AgentProcessManager:
                 ).all()
                 if stuck:
                     for f in stuck:
-                        f.in_progress = False
+                        f.in_progress = False  # type: ignore[assignment]
                     session.commit()
                     logger.info(
                         "Cleaned up %d stuck feature(s) for %s",
@@ -346,7 +346,7 @@ class AgentProcessManager:
             # Check if process ended
             if self.process and self.process.poll() is not None:
                 exit_code = self.process.returncode
-                if exit_code != 0 and self.status == "running":
+                if exit_code != 0 and self.status in ("running", "pausing", "paused_graceful"):
                     # Check buffered output for auth errors if we haven't detected one yet
                     if not auth_error_detected:
                         combined_output = '\n'.join(output_buffer)
@@ -354,10 +354,16 @@ class AgentProcessManager:
                             for help_line in AUTH_ERROR_HELP.strip().split('\n'):
                                 await self._broadcast_output(help_line)
                     self.status = "crashed"
-                elif self.status == "running":
+                elif self.status in ("running", "pausing", "paused_graceful"):
                     self.status = "stopped"
                 self._cleanup_stale_features()
                 self._remove_lock()
+                # Clean up drain signal file if present
+                try:
+                    from autoforge_paths import get_pause_drain_path
+                    get_pause_drain_path(self.project_dir).unlink(missing_ok=True)
+                except Exception:
+                    pass
 
     async def start(
         self,
diff --git a/server/websocket.py b/server/websocket.py
index ef57bf0..6f61ebc 100644
--- a/server/websocket.py
+++ b/server/websocket.py
@@ -61,7 +61,7 @@ THOUGHT_PATTERNS = [
     (re.compile(r'(?:Testing|Verifying|Running tests|Validating)\s+(.+)', re.I), 'testing'),
     (re.compile(r'(?:Error|Failed|Cannot|Unable to|Exception)\s+(.+)', re.I), 'struggling'),
     # Test results
-    (re.compile(r'(?:PASS|passed|success)', re.I), 'success'),
+    (re.compile(r'(?:PASS|passed|success)', re.I), 'testing'),
     (re.compile(r'(?:FAIL|failed|error)', re.I), 'struggling'),
 ]
 
diff --git a/ui/src/App.tsx b/ui/src/App.tsx
index b6784fc..cb02b2d 100644
--- a/ui/src/App.tsx
+++ b/ui/src/App.tsx
@@ -210,8 +210,8 @@ function App() {
         setShowKeyboardHelp(true)
       }
 
-      // R : Open reset modal (when project selected and agent not running)
-      if ((e.key === 'r' || e.key === 'R') && selectedProject && wsState.agentStatus !== 'running') {
+      // R : Open reset modal (when project selected and agent not running/draining)
+      if ((e.key === 'r' || e.key === 'R') && selectedProject && !['running', 'pausing', 'paused_graceful'].includes(wsState.agentStatus)) {
         e.preventDefault()
         setShowResetModal(true)
       }
@@ -380,7 +380,7 @@ function App() {
                       variant="outline"
                       size="sm"
                       aria-label="Reset Project"
-                      disabled={wsState.agentStatus === 'running'}
+                      disabled={['running', 'pausing', 'paused_graceful'].includes(wsState.agentStatus)}
                     >
                       <RotateCcw size={18} />
                     </Button>
diff --git a/ui/src/components/AgentMissionControl.tsx b/ui/src/components/AgentMissionControl.tsx
index 07fd594..66e608a 100644
--- a/ui/src/components/AgentMissionControl.tsx
+++ b/ui/src/components/AgentMissionControl.tsx
@@ -72,9 +72,13 @@ export function AgentMissionControl({
               ? `${agents.length} ${agents.length === 1 ? 'agent' : 'agents'} active`
               : orchestratorStatus?.state === 'initializing'
                 ? 'Initializing'
-                : orchestratorStatus?.state === 'complete'
-                  ? 'Complete'
-                  : 'Orchestrating'
+                : orchestratorStatus?.state === 'draining'
+                  ? 'Draining'
+                  : orchestratorStatus?.state === 'paused'
+                    ? 'Paused'
+                    : orchestratorStatus?.state === 'complete'
+                      ? 'Complete'
+                      : 'Orchestrating'
             }
           </Badge>
         </div>
diff --git a/ui/src/components/AgentThought.tsx b/ui/src/components/AgentThought.tsx
index df24934..83cab59 100644
--- a/ui/src/components/AgentThought.tsx
+++ b/ui/src/components/AgentThought.tsx
@@ -63,7 +63,7 @@ export function AgentThought({ logs, agentStatus }: AgentThoughtProps) {
   // Determine if component should be visible
   const shouldShow = useMemo(() => {
     if (!thought) return false
-    if (agentStatus === 'running') return true
+    if (agentStatus === 'running' || agentStatus === 'pausing') return true
     if (agentStatus === 'paused') {
       return Date.now() - lastLogTimestamp < IDLE_TIMEOUT
     }
diff --git a/ui/src/components/OrchestratorAvatar.tsx b/ui/src/components/OrchestratorAvatar.tsx
index bbf3dab..2033894 100644
--- a/ui/src/components/OrchestratorAvatar.tsx
+++ b/ui/src/components/OrchestratorAvatar.tsx
@@ -103,6 +103,10 @@ function getStateAnimation(state: OrchestratorState): string {
       return 'animate-working'
     case 'monitoring':
       return 'animate-bounce-gentle'
+    case 'draining':
+      return 'animate-thinking'
+    case 'paused':
+      return ''
     case 'complete':
       return 'animate-celebrate'
     default:
@@ -121,6 +125,10 @@ function getStateGlow(state: OrchestratorState): string {
       return 'shadow-[0_0_16px_rgba(124,58,237,0.6)]'
     case 'monitoring':
       return 'shadow-[0_0_8px_rgba(167,139,250,0.4)]'
+    case 'draining':
+      return 'shadow-[0_0_10px_rgba(251,191,36,0.5)]'
+    case 'paused':
+      return ''
     case 'complete':
       return 'shadow-[0_0_20px_rgba(112,224,0,0.6)]'
     default:
@@ -141,6 +149,10 @@ function getStateDescription(state: OrchestratorState): string {
       return 'spawning agents'
     case 'monitoring':
       return 'monitoring progress'
+    case 'draining':
+      return 'draining active agents'
+    case 'paused':
+      return 'paused'
     case 'complete':
       return 'all features complete'
     default:
diff --git a/ui/src/components/ProgressDashboard.tsx b/ui/src/components/ProgressDashboard.tsx
index 4a1865f..a803bc9 100644
--- a/ui/src/components/ProgressDashboard.tsx
+++ b/ui/src/components/ProgressDashboard.tsx
@@ -55,7 +55,7 @@ export function ProgressDashboard({
 
   const showThought = useMemo(() => {
     if (!thought) return false
-    if (agentStatus === 'running') return true
+    if (agentStatus === 'running' || agentStatus === 'pausing') return true
     if (agentStatus === 'paused') {
       return Date.now() - lastLogTimestamp < IDLE_TIMEOUT
     }