Merge pull request #230 from AutoForgeAI/embed-browser-windows

Embed browser windows
This commit is contained in:
Leon van Zyl
2026-04-01 11:30:07 +02:00
committed by GitHub
14 changed files with 744 additions and 57 deletions

View File

@@ -1,6 +1,6 @@
{
"name": "autoforge-ai",
"version": "0.1.19",
"version": "0.1.20",
"description": "Autonomous coding agent with web UI - build complete apps with AI",
"license": "AGPL-3.0",
"bin": {

View File

@@ -17,11 +17,11 @@ from ..utils.project_helpers import get_project_path as _get_project_path
from ..utils.validation import validate_project_name
def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
def _get_settings_defaults() -> tuple[bool, str, int, int, int]:
"""Get defaults from global settings.
Returns:
Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_batch_size)
Tuple of (yolo_mode, model, testing_agent_ratio, batch_size, testing_batch_size)
"""
import sys
root = Path(__file__).parent.parent.parent
@@ -40,8 +40,6 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
except (ValueError, TypeError):
testing_agent_ratio = 1
playwright_headless = (settings.get("playwright_headless") or "true").lower() == "true"
try:
batch_size = int(settings.get("batch_size", "3"))
except (ValueError, TypeError):
@@ -52,7 +50,7 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
except (ValueError, TypeError):
testing_batch_size = 3
return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_batch_size
return yolo_mode, model, testing_agent_ratio, batch_size, testing_batch_size
router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"])
@@ -101,7 +99,7 @@ async def start_agent(
manager = get_project_manager(project_name)
# Get defaults from global settings if not provided in request
default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size, default_testing_batch_size = _get_settings_defaults()
default_yolo, default_model, default_testing_ratio, default_batch_size, default_testing_batch_size = _get_settings_defaults()
yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo
model = request.model if request.model else default_model
@@ -111,12 +109,13 @@ async def start_agent(
batch_size = default_batch_size
testing_batch_size = default_testing_batch_size
# Always run headless - the embedded browser view panel replaces desktop windows
success, message = await manager.start(
yolo_mode=yolo_mode,
model=model,
max_concurrency=max_concurrency,
testing_agent_ratio=testing_agent_ratio,
playwright_headless=playwright_headless,
playwright_headless=True,
batch_size=batch_size,
testing_batch_size=testing_batch_size,
)

View File

@@ -111,7 +111,7 @@ async def get_settings():
glm_mode=glm_mode,
ollama_mode=ollama_mode,
testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
playwright_headless=True, # Always headless - embedded browser view replaces desktop windows
batch_size=_parse_int(all_settings.get("batch_size"), 3),
testing_batch_size=_parse_int(all_settings.get("testing_batch_size"), 3),
api_provider=api_provider,
@@ -133,8 +133,8 @@ async def update_settings(update: SettingsUpdate):
if update.testing_agent_ratio is not None:
set_setting("testing_agent_ratio", str(update.testing_agent_ratio))
if update.playwright_headless is not None:
set_setting("playwright_headless", "true" if update.playwright_headless else "false")
# playwright_headless is no longer user-configurable; always headless
# with embedded browser view panel in the UI
if update.batch_size is not None:
set_setting("batch_size", str(update.batch_size))
@@ -179,7 +179,7 @@ async def update_settings(update: SettingsUpdate):
glm_mode=glm_mode,
ollama_mode=ollama_mode,
testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
playwright_headless=True, # Always headless - embedded browser view replaces desktop windows
batch_size=_parse_int(all_settings.get("batch_size"), 3),
testing_batch_size=_parse_int(all_settings.get("testing_batch_size"), 3),
api_provider=api_provider,

View File

@@ -0,0 +1,280 @@
"""
Browser View Service
====================
Captures periodic screenshots from active playwright-cli browser sessions
and streams them to the UI via WebSocket callbacks.
Each agent gets an isolated browser session (e.g., coding-5, testing-0).
This service polls those sessions with `playwright-cli screenshot` and
delivers the frames to subscribed UI clients.
"""
import asyncio
import base64
import logging
import shutil
import threading
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Awaitable, Callable
logger = logging.getLogger(__name__)
POLL_INTERVAL = 2.0 # seconds between screenshot captures
BACKOFF_INTERVAL = 10.0 # seconds after repeated failures
MAX_FAILURES_BEFORE_BACKOFF = 10
MAX_FAILURES_BEFORE_STOP = 90 # ~3 minutes at normal rate before giving up
SCREENSHOT_TIMEOUT = 5 # seconds
@dataclass
class SessionInfo:
"""Metadata for an active browser session."""
session_name: str
agent_index: int
agent_type: str # "coding" or "testing"
feature_id: int
feature_name: str
consecutive_failures: int = 0
stopped: bool = False
@dataclass
class ScreenshotData:
"""A captured screenshot ready for delivery."""
session_name: str
agent_index: int
agent_type: str
feature_id: int
feature_name: str
image_base64: str # base64-encoded PNG
timestamp: str
class BrowserViewService:
"""Manages screenshot capture for active agent browser sessions.
Follows the same singleton-per-project pattern as DevServerProcessManager.
"""
def __init__(self, project_name: str, project_dir: Path):
self.project_name = project_name
self.project_dir = project_dir
self._active_sessions: dict[str, SessionInfo] = {}
self._subscribers = 0
self._poll_task: asyncio.Task | None = None
self._screenshot_callbacks: set[Callable[[ScreenshotData], Awaitable[None]]] = set()
self._lock = asyncio.Lock()
self._playwright_cli: str | None = None
def _get_playwright_cli(self) -> str | None:
"""Find playwright-cli executable."""
if self._playwright_cli is not None:
return self._playwright_cli
path = shutil.which("playwright-cli")
if path:
self._playwright_cli = path
else:
logger.warning("playwright-cli not found in PATH; browser view disabled")
return self._playwright_cli
async def register_session(
self,
session_name: str,
agent_index: int,
agent_type: str,
feature_id: int,
feature_name: str,
) -> None:
"""Register an agent's browser session for screenshot capture."""
async with self._lock:
self._active_sessions[session_name] = SessionInfo(
session_name=session_name,
agent_index=agent_index,
agent_type=agent_type,
feature_id=feature_id,
feature_name=feature_name,
)
logger.debug("Registered browser session: %s", session_name)
async def unregister_session(self, session_name: str) -> None:
"""Unregister a browser session when agent completes."""
async with self._lock:
removed = self._active_sessions.pop(session_name, None)
if removed:
logger.debug("Unregistered browser session: %s", session_name)
# Clean up screenshot file
self._cleanup_screenshot_file(session_name)
def add_screenshot_callback(self, callback: Callable[[ScreenshotData], Awaitable[None]]) -> None:
self._screenshot_callbacks.add(callback)
def remove_screenshot_callback(self, callback: Callable[[ScreenshotData], Awaitable[None]]) -> None:
self._screenshot_callbacks.discard(callback)
async def add_subscriber(self) -> None:
"""Called when a UI client wants browser screenshots."""
async with self._lock:
self._subscribers += 1
if self._subscribers == 1:
self._start_polling()
async def remove_subscriber(self) -> None:
"""Called when a UI client stops wanting screenshots."""
async with self._lock:
self._subscribers = max(0, self._subscribers - 1)
if self._subscribers == 0:
self._stop_polling()
async def stop(self) -> None:
"""Clean up all sessions and stop polling."""
async with self._lock:
for session_name in list(self._active_sessions):
self._cleanup_screenshot_file(session_name)
self._active_sessions.clear()
self._stop_polling()
def _start_polling(self) -> None:
"""Start the screenshot polling loop."""
if self._poll_task is not None and not self._poll_task.done():
return
self._poll_task = asyncio.create_task(self._poll_loop())
logger.info("Started browser screenshot polling for %s", self.project_name)
def _stop_polling(self) -> None:
"""Stop the screenshot polling loop."""
if self._poll_task is not None and not self._poll_task.done():
self._poll_task.cancel()
self._poll_task = None
logger.info("Stopped browser screenshot polling for %s", self.project_name)
async def _poll_loop(self) -> None:
"""Main polling loop - capture screenshots for all active sessions."""
try:
while True:
async with self._lock:
sessions = list(self._active_sessions.values())
if sessions and self._screenshot_callbacks:
# Capture screenshots with limited concurrency
sem = asyncio.Semaphore(3)
async def capture_with_sem(session: SessionInfo) -> None:
async with sem:
await self._capture_and_deliver(session)
await asyncio.gather(
*(capture_with_sem(s) for s in sessions if not s.stopped),
return_exceptions=True,
)
await asyncio.sleep(POLL_INTERVAL)
except asyncio.CancelledError:
pass
except Exception:
logger.warning("Browser screenshot polling crashed", exc_info=True)
async def _capture_and_deliver(self, session: SessionInfo) -> None:
"""Capture a screenshot for a session and deliver to callbacks."""
cli = self._get_playwright_cli()
if not cli:
return
# Determine interval based on failure count
if session.consecutive_failures >= MAX_FAILURES_BEFORE_BACKOFF:
# In backoff mode - only capture every BACKOFF_INTERVAL/POLL_INTERVAL polls
# We achieve this by checking a simple modulo on failure count
if session.consecutive_failures % int(BACKOFF_INTERVAL / POLL_INTERVAL) != 0:
return
screenshot_dir = self.project_dir / ".playwright-cli"
screenshot_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = screenshot_dir / f"_view_{session.session_name}.png"
try:
proc = await asyncio.create_subprocess_exec(
cli, "-s", session.session_name, "screenshot",
f"--filename={screenshot_path}",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(self.project_dir),
)
_, stderr = await asyncio.wait_for(proc.communicate(), timeout=SCREENSHOT_TIMEOUT)
if proc.returncode != 0:
session.consecutive_failures += 1
if session.consecutive_failures >= MAX_FAILURES_BEFORE_STOP:
session.stopped = True
logger.debug(
"Stopped polling session %s after %d failures",
session.session_name, session.consecutive_failures,
)
return
# Read and encode the screenshot
if not screenshot_path.exists():
session.consecutive_failures += 1
return
image_bytes = screenshot_path.read_bytes()
image_base64 = base64.b64encode(image_bytes).decode("ascii")
# Reset failure counter on success
session.consecutive_failures = 0
# Re-enable if previously stopped
session.stopped = False
screenshot = ScreenshotData(
session_name=session.session_name,
agent_index=session.agent_index,
agent_type=session.agent_type,
feature_id=session.feature_id,
feature_name=session.feature_name,
image_base64=image_base64,
timestamp=datetime.now().isoformat(),
)
# Deliver to all callbacks
for callback in list(self._screenshot_callbacks):
try:
await callback(screenshot)
except Exception:
pass # Connection may be closed
except asyncio.TimeoutError:
session.consecutive_failures += 1
except Exception:
session.consecutive_failures += 1
finally:
# Clean up the screenshot file
try:
screenshot_path.unlink(missing_ok=True)
except Exception:
pass
def _cleanup_screenshot_file(self, session_name: str) -> None:
"""Remove a session's screenshot file."""
try:
path = self.project_dir / ".playwright-cli" / f"_view_{session_name}.png"
path.unlink(missing_ok=True)
except Exception:
pass
# ---------------------------------------------------------------------------
# Global instance management (thread-safe)
# ---------------------------------------------------------------------------
_services: dict[tuple[str, str], BrowserViewService] = {}
_services_lock = threading.Lock()
def get_browser_view_service(project_name: str, project_dir: Path) -> BrowserViewService:
"""Get or create a BrowserViewService for a project (thread-safe)."""
with _services_lock:
key = (project_name, str(project_dir.resolve()))
if key not in _services:
_services[key] = BrowserViewService(project_name, project_dir)
return _services[key]

View File

@@ -16,6 +16,7 @@ from typing import Set
from fastapi import WebSocket, WebSocketDisconnect
from .schemas import AGENT_MASCOTS
from .services.browser_view_service import get_browser_view_service
from .services.chat_constants import ROOT_DIR
from .services.dev_server_manager import get_devserver_manager
from .services.process_manager import get_manager
@@ -787,8 +788,39 @@ async def project_websocket(websocket: WebSocket, project_name: str):
# Create orchestrator tracker for observability
orchestrator_tracker = OrchestratorTracker()
# Get browser view service for embedded browser screenshots
browser_view_service = get_browser_view_service(project_name, project_dir)
browser_view_subscribed = False
# Counter to mirror orchestrator's testing session naming (testing-0, testing-1, ...)
testing_session_counter = 0
# Deferred session registration: store metadata at agent start, register on first browser command.
# This avoids premature polling failures when agents spend time reading/planning before opening a browser.
# Key: session_name -> registration kwargs
pending_browser_sessions: dict[str, dict] = {}
# Track which feature IDs map to which session names (for deferred lookup)
feature_to_session: dict[int, str] = {}
async def on_screenshot(screenshot):
"""Handle browser screenshot - send to this WebSocket."""
try:
await websocket.send_json({
"type": "browser_screenshot",
"sessionName": screenshot.session_name,
"agentIndex": screenshot.agent_index,
"agentType": screenshot.agent_type,
"featureId": screenshot.feature_id,
"featureName": screenshot.feature_name,
"imageData": screenshot.image_base64,
"timestamp": screenshot.timestamp,
})
except Exception:
pass # Connection may be closed
browser_view_service.add_screenshot_callback(on_screenshot)
async def on_output(line: str):
"""Handle agent output - broadcast to this WebSocket."""
nonlocal testing_session_counter
try:
# Extract feature ID from line if present
feature_id = None
@@ -817,6 +849,48 @@ async def project_websocket(websocket: WebSocket, project_name: str):
if agent_update:
await websocket.send_json(agent_update)
# Register/unregister browser sessions based on agent lifecycle
update_state = agent_update.get("state")
update_type = agent_update.get("agentType", "coding")
update_feature_id = agent_update.get("featureId", 0)
update_feature_name = agent_update.get("featureName", "")
update_agent_index = agent_update.get("agentIndex", 0)
if update_state == "thinking" and agent_update.get("thought") in ("Starting work...", "Starting batch work..."):
# Agent just started - defer browser session registration until
# we detect an actual playwright-cli open/goto command. This avoids
# polling failures while the agent is still reading code / planning.
if update_type == "coding":
session_name = f"coding-{update_feature_id}"
else:
session_name = f"testing-{testing_session_counter}"
testing_session_counter += 1
pending_browser_sessions[session_name] = dict(
session_name=session_name,
agent_index=update_agent_index,
agent_type=update_type,
feature_id=update_feature_id,
feature_name=update_feature_name,
)
feature_to_session[update_feature_id] = session_name
elif update_state in ("success", "error"):
# Agent completed - unregister browser session
if update_type == "coding":
session_name = f"coding-{update_feature_id}"
await browser_view_service.unregister_session(session_name)
pending_browser_sessions.pop(session_name, None)
feature_to_session.pop(update_feature_id, None)
# Testing sessions are cleaned up on orchestrator stop
# Detect playwright-cli browser commands and activate deferred sessions
if feature_id is not None and "playwright-cli" in line and any(
kw in line for kw in ("open ", "goto ", "open\t", "goto\t")
):
sess_name = feature_to_session.get(feature_id)
if sess_name and sess_name in pending_browser_sessions:
reg = pending_browser_sessions.pop(sess_name)
await browser_view_service.register_session(**reg)
# Also check for orchestrator events and emit orchestrator_update messages
orch_update = await orchestrator_tracker.process_line(line)
if orch_update:
@@ -826,6 +900,7 @@ async def project_websocket(websocket: WebSocket, project_name: str):
async def on_status_change(status: str):
"""Handle status change - broadcast to this WebSocket."""
nonlocal testing_session_counter
try:
await websocket.send_json({
"type": "agent_status",
@@ -835,6 +910,10 @@ async def project_websocket(websocket: WebSocket, project_name: str):
if status in ("stopped", "crashed"):
await agent_tracker.reset()
await orchestrator_tracker.reset()
await browser_view_service.stop()
testing_session_counter = 0
pending_browser_sessions.clear()
feature_to_session.clear()
except Exception:
pass # Connection may be closed
@@ -908,10 +987,23 @@ async def project_websocket(websocket: WebSocket, project_name: str):
data = await websocket.receive_text()
message = json.loads(data)
msg_type = message.get("type")
# Handle ping
if message.get("type") == "ping":
if msg_type == "ping":
await websocket.send_json({"type": "pong"})
# Handle browser view subscribe/unsubscribe
elif msg_type == "browser_view_subscribe":
if not browser_view_subscribed:
browser_view_subscribed = True
await browser_view_service.add_subscriber()
elif msg_type == "browser_view_unsubscribe":
if browser_view_subscribed:
browser_view_subscribed = False
await browser_view_service.remove_subscriber()
except WebSocketDisconnect:
break
except json.JSONDecodeError:
@@ -935,5 +1027,10 @@ async def project_websocket(websocket: WebSocket, project_name: str):
devserver_manager.remove_output_callback(on_dev_output)
devserver_manager.remove_status_callback(on_dev_status_change)
# Unregister browser view callbacks and subscriber
browser_view_service.remove_screenshot_callback(on_screenshot)
if browser_view_subscribed:
await browser_view_service.remove_subscriber()
# Disconnect from manager
await manager.disconnect(websocket, project_name)

2
ui/package-lock.json generated
View File

@@ -56,7 +56,7 @@
},
"..": {
"name": "autoforge-ai",
"version": "0.1.19",
"version": "0.1.20",
"license": "AGPL-3.0",
"bin": {
"autoforge": "bin/autoforge.js"

View File

@@ -436,6 +436,7 @@ function App() {
orchestratorStatus={wsState.orchestratorStatus}
recentActivity={wsState.recentActivity}
getAgentLogs={wsState.getAgentLogs}
browserScreenshots={wsState.browserScreenshots}
/>
@@ -573,6 +574,9 @@ function App() {
projectName={selectedProject}
activeTab={debugActiveTab}
onTabChange={setDebugActiveTab}
browserScreenshots={wsState.browserScreenshots}
onSubscribeBrowserView={wsState.subscribeBrowserView}
onUnsubscribeBrowserView={wsState.unsubscribeBrowserView}
/>
)}

View File

@@ -1,8 +1,9 @@
import { MessageCircle, ScrollText, X, Copy, Check, Code, FlaskConical } from 'lucide-react'
import { MessageCircle, ScrollText, X, Copy, Check, Code, FlaskConical, Maximize2 } from 'lucide-react'
import { useState } from 'react'
import { createPortal } from 'react-dom'
import { AgentAvatar } from './AgentAvatar'
import type { ActiveAgent, AgentLogEntry, AgentType } from '../lib/types'
import type { ActiveAgent, AgentLogEntry, AgentType, BrowserScreenshot } from '../lib/types'
import { AGENT_MASCOTS } from '../lib/types'
import { Card, CardContent } from '@/components/ui/card'
import { Button } from '@/components/ui/button'
import { Badge } from '@/components/ui/badge'
@@ -10,6 +11,7 @@ import { Badge } from '@/components/ui/badge'
interface AgentCardProps {
agent: ActiveAgent
onShowLogs?: (agentIndex: number) => void
browserScreenshot?: BrowserScreenshot
}
// Get a friendly state description
@@ -69,14 +71,56 @@ function getAgentTypeBadge(agentType: AgentType): { label: string; className: st
}
}
export function AgentCard({ agent, onShowLogs }: AgentCardProps) {
export function AgentCard({ agent, onShowLogs, browserScreenshot }: AgentCardProps) {
const isActive = ['thinking', 'working', 'testing'].includes(agent.state)
const hasLogs = agent.logs && agent.logs.length > 0
const typeBadge = getAgentTypeBadge(agent.agentType || 'coding')
const TypeIcon = typeBadge.icon
const [screenshotExpanded, setScreenshotExpanded] = useState(false)
return (
<Card className={`min-w-[180px] max-w-[220px] py-3 ${isActive ? 'animate-pulse' : ''}`}>
<>
{/* Expanded screenshot overlay */}
{screenshotExpanded && browserScreenshot && createPortal(
<div
className="fixed inset-0 z-50 flex items-center justify-center bg-black/70"
onClick={() => setScreenshotExpanded(false)}
>
<div
className="relative max-w-[90vw] max-h-[90vh] bg-card border-2 border-border rounded-lg overflow-hidden"
onClick={(e) => e.stopPropagation()}
>
<div className="flex items-center justify-between px-3 py-2 bg-muted border-b border-border">
<div className="flex items-center gap-2">
<TypeIcon size={14} className={agent.agentType === 'testing' ? 'text-purple-500' : 'text-blue-500'} />
<span className="font-mono text-sm font-bold">
{AGENT_MASCOTS[agent.agentIndex % AGENT_MASCOTS.length]}
</span>
<Badge variant="outline" className="text-[10px] h-4">
{agent.agentType || 'coding'}
</Badge>
<span className="text-xs text-muted-foreground truncate">
{agent.featureName}
</span>
</div>
<button
onClick={() => setScreenshotExpanded(false)}
className="p-1 hover:bg-accent rounded transition-colors cursor-pointer"
>
<X size={16} />
</button>
</div>
<img
src={browserScreenshot.imageDataUrl}
alt={`Browser view - ${agent.featureName}`}
className="max-w-full max-h-[calc(90vh-3rem)] object-contain"
/>
</div>
</div>,
document.body
)}
<Card className={`min-w-[180px] ${browserScreenshot ? 'max-w-[280px]' : 'max-w-[220px]'} py-3 ${isActive ? 'animate-pulse' : ''}`}>
<CardContent className="p-3 space-y-2">
{/* Agent type badge */}
<div className="flex justify-end">
@@ -133,6 +177,29 @@ export function AgentCard({ agent, onShowLogs }: AgentCardProps) {
)}
</div>
{/* Browser screenshot thumbnail with expand */}
{browserScreenshot && (
<div className="pt-1 relative group">
<div
className="cursor-pointer overflow-hidden rounded border border-border/50"
onClick={() => setScreenshotExpanded(true)}
>
<img
src={browserScreenshot.imageDataUrl}
alt="Browser view"
className="w-full h-auto max-h-[120px] object-cover object-top"
/>
</div>
<button
onClick={() => setScreenshotExpanded(true)}
className="absolute top-2.5 right-1.5 p-0.5 bg-black/50 hover:bg-black/70 rounded transition-opacity opacity-0 group-hover:opacity-100 cursor-pointer"
title="Expand screenshot"
>
<Maximize2 size={12} className="text-white" />
</button>
</div>
)}
{/* Thought bubble */}
{agent.thought && (
<div className="pt-2 border-t border-border/50">
@@ -149,6 +216,7 @@ export function AgentCard({ agent, onShowLogs }: AgentCardProps) {
)}
</CardContent>
</Card>
</>
)
}

View File

@@ -3,7 +3,7 @@ import { useState } from 'react'
import { AgentCard, AgentLogModal } from './AgentCard'
import { ActivityFeed } from './ActivityFeed'
import { OrchestratorStatusCard } from './OrchestratorStatusCard'
import type { ActiveAgent, AgentLogEntry, OrchestratorStatus } from '../lib/types'
import type { ActiveAgent, AgentLogEntry, BrowserScreenshot, OrchestratorStatus } from '../lib/types'
import { Card, CardContent } from '@/components/ui/card'
import { Badge } from '@/components/ui/badge'
import { Button } from '@/components/ui/button'
@@ -21,6 +21,7 @@ interface AgentMissionControlProps {
}>
isExpanded?: boolean
getAgentLogs?: (agentIndex: number) => AgentLogEntry[]
browserScreenshots?: Map<string, BrowserScreenshot>
}
export function AgentMissionControl({
@@ -29,6 +30,7 @@ export function AgentMissionControl({
recentActivity,
isExpanded: defaultExpanded = true,
getAgentLogs,
browserScreenshots,
}: AgentMissionControlProps) {
const [isExpanded, setIsExpanded] = useState(defaultExpanded)
const [activityCollapsed, setActivityCollapsed] = useState(() => {
@@ -105,7 +107,12 @@ export function AgentMissionControl({
{/* Agent Cards Row */}
{agents.length > 0 && (
<div className="flex gap-4 overflow-x-auto pb-4">
{agents.map((agent) => (
{agents.map((agent) => {
// Find browser screenshot for this agent by matching agentIndex
const screenshot = browserScreenshots
? Array.from(browserScreenshots.values()).find(s => s.agentIndex === agent.agentIndex)
: undefined
return (
<AgentCard
key={`agent-${agent.agentIndex}`}
agent={agent}
@@ -115,8 +122,10 @@ export function AgentMissionControl({
setSelectedAgentForLogs(agentToShow)
}
}}
browserScreenshot={screenshot}
/>
))}
)
})}
</div>
)}

View File

@@ -0,0 +1,150 @@
/**
* Browser View Panel
*
* Displays live screenshots from each agent's browser session.
* Subscribes to screenshot streaming on mount, unsubscribes on unmount.
*/
import { useEffect, useState } from 'react'
import { Monitor, X, Maximize2, Code, FlaskConical } from 'lucide-react'
import { Badge } from '@/components/ui/badge'
import { AGENT_MASCOTS } from '@/lib/types'
import type { BrowserScreenshot } from '@/lib/types'
interface BrowserViewPanelProps {
screenshots: Map<string, BrowserScreenshot>
onSubscribe: () => void
onUnsubscribe: () => void
}
export function BrowserViewPanel({ screenshots, onSubscribe, onUnsubscribe }: BrowserViewPanelProps) {
const [expandedSession, setExpandedSession] = useState<string | null>(null)
// Subscribe on mount, unsubscribe on unmount
useEffect(() => {
onSubscribe()
return () => onUnsubscribe()
}, [onSubscribe, onUnsubscribe])
const screenshotList = Array.from(screenshots.values())
if (screenshotList.length === 0) {
return (
<div className="flex flex-col items-center justify-center h-full text-muted-foreground gap-2">
<Monitor size={24} />
<span className="text-sm">No active browser sessions</span>
<span className="text-xs">Screenshots will appear when agents open browsers</span>
</div>
)
}
const expanded = expandedSession ? screenshots.get(expandedSession) : null
return (
<div className="h-full overflow-auto p-3">
{/* Expanded overlay */}
{expanded && (
<div
className="fixed inset-0 z-50 flex items-center justify-center bg-black/70"
onClick={() => setExpandedSession(null)}
>
<div
className="relative max-w-[90vw] max-h-[90vh] bg-card border-2 border-border rounded-lg overflow-hidden"
onClick={(e) => e.stopPropagation()}
>
<div className="flex items-center justify-between px-3 py-2 bg-muted border-b border-border">
<div className="flex items-center gap-2">
{expanded.agentType === 'coding' ? (
<Code size={14} className="text-blue-500" />
) : (
<FlaskConical size={14} className="text-purple-500" />
)}
<span className="font-mono text-sm font-bold">
{AGENT_MASCOTS[expanded.agentIndex % AGENT_MASCOTS.length]}
</span>
<Badge variant="outline" className="text-[10px] h-4">
{expanded.agentType}
</Badge>
<span className="text-xs text-muted-foreground truncate">
{expanded.featureName}
</span>
</div>
<button
onClick={() => setExpandedSession(null)}
className="p-1 hover:bg-accent rounded transition-colors cursor-pointer"
>
<X size={16} />
</button>
</div>
<img
src={expanded.imageDataUrl}
alt={`Browser view - ${expanded.featureName}`}
className="max-w-full max-h-[calc(90vh-3rem)] object-contain"
/>
</div>
</div>
)}
{/* Screenshot grid — responsive 1/2/3 columns */}
<div className="grid gap-3 grid-cols-1 md:grid-cols-2 xl:grid-cols-3">
{screenshotList.map((screenshot) => (
<div
key={screenshot.sessionName}
className="border-2 border-border rounded-lg overflow-hidden bg-card hover:border-foreground/30 transition-colors"
>
{/* Card header */}
<div className="flex items-center justify-between px-2.5 py-1.5 bg-muted border-b border-border">
<div className="flex items-center gap-2 min-w-0">
{screenshot.agentType === 'coding' ? (
<Code size={12} className="text-blue-500 shrink-0" />
) : (
<FlaskConical size={12} className="text-purple-500 shrink-0" />
)}
<span className="font-mono text-xs font-bold">
{AGENT_MASCOTS[screenshot.agentIndex % AGENT_MASCOTS.length]}
</span>
<Badge variant="outline" className="text-[9px] h-3.5 px-1">
{screenshot.agentType}
</Badge>
<span className="text-[11px] text-muted-foreground truncate">
{screenshot.featureName}
</span>
</div>
<button
onClick={() => setExpandedSession(screenshot.sessionName)}
className="p-0.5 hover:bg-accent rounded transition-colors cursor-pointer shrink-0"
title="Expand"
>
<Maximize2 size={12} className="text-muted-foreground" />
</button>
</div>
{/* Screenshot image — capped height for compact grid */}
<div
className="cursor-pointer overflow-hidden"
onClick={() => setExpandedSession(screenshot.sessionName)}
>
<img
src={screenshot.imageDataUrl}
alt={`Browser - ${screenshot.featureName}`}
className="w-full h-auto max-h-[280px] object-cover object-top"
/>
</div>
{/* Timestamp footer */}
<div className="px-2.5 py-1 bg-muted border-t border-border">
<span className="text-[10px] text-muted-foreground font-mono">
{new Date(screenshot.timestamp).toLocaleTimeString('en-US', {
hour12: false,
hour: '2-digit',
minute: '2-digit',
second: '2-digit',
})}
</span>
</div>
</div>
))}
</div>
</div>
)
}

View File

@@ -7,11 +7,12 @@
*/
import { useEffect, useRef, useState, useCallback } from 'react'
import { ChevronUp, ChevronDown, Trash2, Terminal as TerminalIcon, GripHorizontal, Cpu, Server } from 'lucide-react'
import { ChevronUp, ChevronDown, Trash2, Terminal as TerminalIcon, GripHorizontal, Cpu, Server, Monitor } from 'lucide-react'
import { Terminal } from './Terminal'
import { TerminalTabs } from './TerminalTabs'
import { BrowserViewPanel } from './BrowserViewPanel'
import { listTerminals, createTerminal, renameTerminal, deleteTerminal } from '@/lib/api'
import type { TerminalInfo } from '@/lib/types'
import type { TerminalInfo, BrowserScreenshot } from '@/lib/types'
import { Button } from '@/components/ui/button'
import { Badge } from '@/components/ui/badge'
@@ -21,7 +22,7 @@ const DEFAULT_HEIGHT = 288
const STORAGE_KEY = 'debug-panel-height'
const TAB_STORAGE_KEY = 'debug-panel-tab'
type TabType = 'agent' | 'devserver' | 'terminal'
type TabType = 'agent' | 'devserver' | 'terminal' | 'browsers'
interface DebugLogViewerProps {
logs: Array<{ line: string; timestamp: string }>
@@ -34,6 +35,9 @@ interface DebugLogViewerProps {
projectName: string
activeTab?: TabType
onTabChange?: (tab: TabType) => void
browserScreenshots?: Map<string, BrowserScreenshot>
onSubscribeBrowserView?: () => void
onUnsubscribeBrowserView?: () => void
}
type LogLevel = 'error' | 'warn' | 'debug' | 'info'
@@ -49,6 +53,9 @@ export function DebugLogViewer({
projectName,
activeTab: controlledActiveTab,
onTabChange,
browserScreenshots,
onSubscribeBrowserView,
onUnsubscribeBrowserView,
}: DebugLogViewerProps) {
const scrollRef = useRef<HTMLDivElement>(null)
const devScrollRef = useRef<HTMLDivElement>(null)
@@ -395,11 +402,28 @@ export function DebugLogViewer({
T
</Badge>
</Button>
<Button
variant={activeTab === 'browsers' ? 'secondary' : 'ghost'}
size="sm"
onClick={(e: React.MouseEvent) => {
e.stopPropagation()
setActiveTab('browsers')
}}
className="h-7 text-xs font-mono gap-1.5"
>
<Monitor size={12} />
Browsers
{browserScreenshots && browserScreenshots.size > 0 && (
<Badge variant="default" className="h-4 px-1.5 text-[10px]">
{browserScreenshots.size}
</Badge>
)}
</Button>
</div>
)}
{/* Log count and status - only for log tabs */}
{isOpen && activeTab !== 'terminal' && (
{isOpen && activeTab !== 'terminal' && activeTab !== 'browsers' && (
<>
{getCurrentLogCount() > 0 && (
<Badge variant="secondary" className="ml-2 font-mono">
@@ -417,7 +441,7 @@ export function DebugLogViewer({
<div className="flex items-center gap-2">
{/* Clear button - only for log tabs */}
{isOpen && activeTab !== 'terminal' && (
{isOpen && activeTab !== 'terminal' && activeTab !== 'browsers' && (
<Button
variant="ghost"
size="icon"
@@ -576,6 +600,15 @@ export function DebugLogViewer({
</div>
</div>
)}
{/* Browsers Tab */}
{activeTab === 'browsers' && browserScreenshots && onSubscribeBrowserView && onUnsubscribeBrowserView && (
<BrowserViewPanel
screenshots={browserScreenshots}
onSubscribe={onSubscribeBrowserView}
onUnsubscribe={onUnsubscribeBrowserView}
/>
)}
</div>
)}
</div>

View File

@@ -406,24 +406,6 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
/>
</div>
{/* Headless Browser Toggle */}
<div className="flex items-center justify-between">
<div className="space-y-0.5">
<Label htmlFor="playwright-headless" className="font-medium">
Headless Browser
</Label>
<p className="text-sm text-muted-foreground">
Run browser without visible window (saves CPU)
</p>
</div>
<Switch
id="playwright-headless"
checked={settings.playwright_headless}
onCheckedChange={() => updateSettings.mutate({ playwright_headless: !settings.playwright_headless })}
disabled={isSaving}
/>
</div>
{/* Regression Agents */}
<div className="space-y-2">
<Label className="font-medium">Regression Agents</Label>

View File

@@ -10,6 +10,7 @@ import type {
ActiveAgent,
AgentMascot,
AgentLogEntry,
BrowserScreenshot,
OrchestratorStatus,
OrchestratorEvent,
} from '../lib/types'
@@ -53,6 +54,9 @@ interface WebSocketState {
celebration: CelebrationTrigger | null
// Orchestrator state for Mission Control
orchestratorStatus: OrchestratorStatus | null
// Browser view screenshots (sessionName -> latest screenshot)
browserScreenshots: Map<string, BrowserScreenshot>
browserViewSubscribed: boolean
}
const MAX_LOGS = 100 // Keep last 100 log lines
@@ -74,6 +78,8 @@ export function useProjectWebSocket(projectName: string | null) {
celebrationQueue: [],
celebration: null,
orchestratorStatus: null,
browserScreenshots: new Map(),
browserViewSubscribed: false,
})
const wsRef = useRef<WebSocket | null>(null)
@@ -119,11 +125,12 @@ export function useProjectWebSocket(projectName: string | null) {
setState(prev => ({
...prev,
agentStatus: message.status,
// Clear active agents and orchestrator status when process stops OR crashes to prevent stale UI
// Clear active agents, orchestrator status, and browser screenshots when process stops OR crashes
...((message.status === 'stopped' || message.status === 'crashed') && {
activeAgents: [],
recentActivity: [],
orchestratorStatus: null,
browserScreenshots: new Map(),
}),
}))
break
@@ -328,6 +335,22 @@ export function useProjectWebSocket(projectName: string | null) {
}))
break
case 'browser_screenshot':
setState(prev => {
const newScreenshots = new Map(prev.browserScreenshots)
newScreenshots.set(message.sessionName, {
sessionName: message.sessionName,
agentIndex: message.agentIndex,
agentType: message.agentType,
featureId: message.featureId,
featureName: message.featureName,
imageDataUrl: `data:image/png;base64,${message.imageData}`,
timestamp: message.timestamp,
})
return { ...prev, browserScreenshots: newScreenshots }
})
break
case 'pong':
// Heartbeat response
break
@@ -400,6 +423,8 @@ export function useProjectWebSocket(projectName: string | null) {
celebrationQueue: [],
celebration: null,
orchestratorStatus: null,
browserScreenshots: new Map(),
browserViewSubscribed: false,
})
if (!projectName) {
@@ -473,6 +498,22 @@ export function useProjectWebSocket(projectName: string | null) {
})
}, [])
// Subscribe to browser view screenshots
const subscribeBrowserView = useCallback(() => {
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({ type: 'browser_view_subscribe' }))
setState(prev => ({ ...prev, browserViewSubscribed: true }))
}
}, [])
// Unsubscribe from browser view screenshots
const unsubscribeBrowserView = useCallback(() => {
if (wsRef.current?.readyState === WebSocket.OPEN) {
wsRef.current.send(JSON.stringify({ type: 'browser_view_unsubscribe' }))
setState(prev => ({ ...prev, browserViewSubscribed: false }))
}
}, [])
return {
...state,
clearLogs,
@@ -480,5 +521,7 @@ export function useProjectWebSocket(projectName: string | null) {
clearCelebration,
getAgentLogs,
clearAgentLogs,
subscribeBrowserView,
unsubscribeBrowserView,
}
}

View File

@@ -266,7 +266,7 @@ export interface OrchestratorStatus {
}
// WebSocket message types
export type WSMessageType = 'progress' | 'feature_update' | 'log' | 'agent_status' | 'pong' | 'dev_log' | 'dev_server_status' | 'agent_update' | 'orchestrator_update'
export type WSMessageType = 'progress' | 'feature_update' | 'log' | 'agent_status' | 'pong' | 'dev_log' | 'dev_server_status' | 'agent_update' | 'orchestrator_update' | 'browser_screenshot'
export interface WSProgressMessage {
type: 'progress'
@@ -342,6 +342,28 @@ export interface WSOrchestratorUpdateMessage {
featureName?: string
}
export interface WSBrowserScreenshotMessage {
type: 'browser_screenshot'
sessionName: string
agentIndex: number
agentType: AgentType
featureId: number
featureName: string
imageData: string // base64 PNG
timestamp: string
}
// Browser screenshot stored in UI state
export interface BrowserScreenshot {
sessionName: string
agentIndex: number
agentType: AgentType
featureId: number
featureName: string
imageDataUrl: string // "data:image/png;base64,..."
timestamp: string
}
export type WSMessage =
| WSProgressMessage
| WSFeatureUpdateMessage
@@ -352,6 +374,7 @@ export type WSMessage =
| WSDevLogMessage
| WSDevServerStatusMessage
| WSOrchestratorUpdateMessage
| WSBrowserScreenshotMessage
// ============================================================================
// Spec Chat Types
@@ -636,7 +659,6 @@ export interface SettingsUpdate {
yolo_mode?: boolean
model?: string
testing_agent_ratio?: number
playwright_headless?: boolean
batch_size?: number
testing_batch_size?: number
api_provider?: string