mirror of
https://github.com/leonvanzyl/autocoder.git
synced 2026-04-03 11:13:08 +00:00
embed browser windows
This commit is contained in:
@@ -17,11 +17,11 @@ from ..utils.project_helpers import get_project_path as _get_project_path
|
||||
from ..utils.validation import validate_project_name
|
||||
|
||||
|
||||
def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
|
||||
def _get_settings_defaults() -> tuple[bool, str, int, int, int]:
|
||||
"""Get defaults from global settings.
|
||||
|
||||
Returns:
|
||||
Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_batch_size)
|
||||
Tuple of (yolo_mode, model, testing_agent_ratio, batch_size, testing_batch_size)
|
||||
"""
|
||||
import sys
|
||||
root = Path(__file__).parent.parent.parent
|
||||
@@ -40,8 +40,6 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
|
||||
except (ValueError, TypeError):
|
||||
testing_agent_ratio = 1
|
||||
|
||||
playwright_headless = (settings.get("playwright_headless") or "true").lower() == "true"
|
||||
|
||||
try:
|
||||
batch_size = int(settings.get("batch_size", "3"))
|
||||
except (ValueError, TypeError):
|
||||
@@ -52,7 +50,7 @@ def _get_settings_defaults() -> tuple[bool, str, int, bool, int, int]:
|
||||
except (ValueError, TypeError):
|
||||
testing_batch_size = 3
|
||||
|
||||
return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size, testing_batch_size
|
||||
return yolo_mode, model, testing_agent_ratio, batch_size, testing_batch_size
|
||||
|
||||
|
||||
router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"])
|
||||
@@ -101,7 +99,7 @@ async def start_agent(
|
||||
manager = get_project_manager(project_name)
|
||||
|
||||
# Get defaults from global settings if not provided in request
|
||||
default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size, default_testing_batch_size = _get_settings_defaults()
|
||||
default_yolo, default_model, default_testing_ratio, default_batch_size, default_testing_batch_size = _get_settings_defaults()
|
||||
|
||||
yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo
|
||||
model = request.model if request.model else default_model
|
||||
@@ -111,12 +109,13 @@ async def start_agent(
|
||||
batch_size = default_batch_size
|
||||
testing_batch_size = default_testing_batch_size
|
||||
|
||||
# Always run headless - the embedded browser view panel replaces desktop windows
|
||||
success, message = await manager.start(
|
||||
yolo_mode=yolo_mode,
|
||||
model=model,
|
||||
max_concurrency=max_concurrency,
|
||||
testing_agent_ratio=testing_agent_ratio,
|
||||
playwright_headless=playwright_headless,
|
||||
playwright_headless=True,
|
||||
batch_size=batch_size,
|
||||
testing_batch_size=testing_batch_size,
|
||||
)
|
||||
|
||||
@@ -111,7 +111,7 @@ async def get_settings():
|
||||
glm_mode=glm_mode,
|
||||
ollama_mode=ollama_mode,
|
||||
testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
|
||||
playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
|
||||
playwright_headless=True, # Always headless - embedded browser view replaces desktop windows
|
||||
batch_size=_parse_int(all_settings.get("batch_size"), 3),
|
||||
testing_batch_size=_parse_int(all_settings.get("testing_batch_size"), 3),
|
||||
api_provider=api_provider,
|
||||
@@ -133,8 +133,8 @@ async def update_settings(update: SettingsUpdate):
|
||||
if update.testing_agent_ratio is not None:
|
||||
set_setting("testing_agent_ratio", str(update.testing_agent_ratio))
|
||||
|
||||
if update.playwright_headless is not None:
|
||||
set_setting("playwright_headless", "true" if update.playwright_headless else "false")
|
||||
# playwright_headless is no longer user-configurable; always headless
|
||||
# with embedded browser view panel in the UI
|
||||
|
||||
if update.batch_size is not None:
|
||||
set_setting("batch_size", str(update.batch_size))
|
||||
@@ -179,7 +179,7 @@ async def update_settings(update: SettingsUpdate):
|
||||
glm_mode=glm_mode,
|
||||
ollama_mode=ollama_mode,
|
||||
testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1),
|
||||
playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True),
|
||||
playwright_headless=True, # Always headless - embedded browser view replaces desktop windows
|
||||
batch_size=_parse_int(all_settings.get("batch_size"), 3),
|
||||
testing_batch_size=_parse_int(all_settings.get("testing_batch_size"), 3),
|
||||
api_provider=api_provider,
|
||||
|
||||
280
server/services/browser_view_service.py
Normal file
280
server/services/browser_view_service.py
Normal file
@@ -0,0 +1,280 @@
|
||||
"""
|
||||
Browser View Service
|
||||
====================
|
||||
|
||||
Captures periodic screenshots from active playwright-cli browser sessions
|
||||
and streams them to the UI via WebSocket callbacks.
|
||||
|
||||
Each agent gets an isolated browser session (e.g., coding-5, testing-0).
|
||||
This service polls those sessions with `playwright-cli screenshot` and
|
||||
delivers the frames to subscribed UI clients.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
import shutil
|
||||
import threading
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Awaitable, Callable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
POLL_INTERVAL = 2.0 # seconds between screenshot captures
|
||||
BACKOFF_INTERVAL = 10.0 # seconds after repeated failures
|
||||
MAX_FAILURES_BEFORE_BACKOFF = 10
|
||||
MAX_FAILURES_BEFORE_STOP = 90 # ~3 minutes at normal rate before giving up
|
||||
SCREENSHOT_TIMEOUT = 5 # seconds
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionInfo:
|
||||
"""Metadata for an active browser session."""
|
||||
session_name: str
|
||||
agent_index: int
|
||||
agent_type: str # "coding" or "testing"
|
||||
feature_id: int
|
||||
feature_name: str
|
||||
consecutive_failures: int = 0
|
||||
stopped: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScreenshotData:
|
||||
"""A captured screenshot ready for delivery."""
|
||||
session_name: str
|
||||
agent_index: int
|
||||
agent_type: str
|
||||
feature_id: int
|
||||
feature_name: str
|
||||
image_base64: str # base64-encoded PNG
|
||||
timestamp: str
|
||||
|
||||
|
||||
class BrowserViewService:
|
||||
"""Manages screenshot capture for active agent browser sessions.
|
||||
|
||||
Follows the same singleton-per-project pattern as DevServerProcessManager.
|
||||
"""
|
||||
|
||||
def __init__(self, project_name: str, project_dir: Path):
|
||||
self.project_name = project_name
|
||||
self.project_dir = project_dir
|
||||
self._active_sessions: dict[str, SessionInfo] = {}
|
||||
self._subscribers = 0
|
||||
self._poll_task: asyncio.Task | None = None
|
||||
self._screenshot_callbacks: set[Callable[[ScreenshotData], Awaitable[None]]] = set()
|
||||
self._lock = asyncio.Lock()
|
||||
self._playwright_cli: str | None = None
|
||||
|
||||
def _get_playwright_cli(self) -> str | None:
|
||||
"""Find playwright-cli executable."""
|
||||
if self._playwright_cli is not None:
|
||||
return self._playwright_cli
|
||||
path = shutil.which("playwright-cli")
|
||||
if path:
|
||||
self._playwright_cli = path
|
||||
else:
|
||||
logger.warning("playwright-cli not found in PATH; browser view disabled")
|
||||
return self._playwright_cli
|
||||
|
||||
async def register_session(
|
||||
self,
|
||||
session_name: str,
|
||||
agent_index: int,
|
||||
agent_type: str,
|
||||
feature_id: int,
|
||||
feature_name: str,
|
||||
) -> None:
|
||||
"""Register an agent's browser session for screenshot capture."""
|
||||
async with self._lock:
|
||||
self._active_sessions[session_name] = SessionInfo(
|
||||
session_name=session_name,
|
||||
agent_index=agent_index,
|
||||
agent_type=agent_type,
|
||||
feature_id=feature_id,
|
||||
feature_name=feature_name,
|
||||
)
|
||||
logger.debug("Registered browser session: %s", session_name)
|
||||
|
||||
async def unregister_session(self, session_name: str) -> None:
|
||||
"""Unregister a browser session when agent completes."""
|
||||
async with self._lock:
|
||||
removed = self._active_sessions.pop(session_name, None)
|
||||
if removed:
|
||||
logger.debug("Unregistered browser session: %s", session_name)
|
||||
# Clean up screenshot file
|
||||
self._cleanup_screenshot_file(session_name)
|
||||
|
||||
def add_screenshot_callback(self, callback: Callable[[ScreenshotData], Awaitable[None]]) -> None:
|
||||
self._screenshot_callbacks.add(callback)
|
||||
|
||||
def remove_screenshot_callback(self, callback: Callable[[ScreenshotData], Awaitable[None]]) -> None:
|
||||
self._screenshot_callbacks.discard(callback)
|
||||
|
||||
async def add_subscriber(self) -> None:
|
||||
"""Called when a UI client wants browser screenshots."""
|
||||
async with self._lock:
|
||||
self._subscribers += 1
|
||||
if self._subscribers == 1:
|
||||
self._start_polling()
|
||||
|
||||
async def remove_subscriber(self) -> None:
|
||||
"""Called when a UI client stops wanting screenshots."""
|
||||
async with self._lock:
|
||||
self._subscribers = max(0, self._subscribers - 1)
|
||||
if self._subscribers == 0:
|
||||
self._stop_polling()
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Clean up all sessions and stop polling."""
|
||||
async with self._lock:
|
||||
for session_name in list(self._active_sessions):
|
||||
self._cleanup_screenshot_file(session_name)
|
||||
self._active_sessions.clear()
|
||||
self._stop_polling()
|
||||
|
||||
def _start_polling(self) -> None:
|
||||
"""Start the screenshot polling loop."""
|
||||
if self._poll_task is not None and not self._poll_task.done():
|
||||
return
|
||||
self._poll_task = asyncio.create_task(self._poll_loop())
|
||||
logger.info("Started browser screenshot polling for %s", self.project_name)
|
||||
|
||||
def _stop_polling(self) -> None:
|
||||
"""Stop the screenshot polling loop."""
|
||||
if self._poll_task is not None and not self._poll_task.done():
|
||||
self._poll_task.cancel()
|
||||
self._poll_task = None
|
||||
logger.info("Stopped browser screenshot polling for %s", self.project_name)
|
||||
|
||||
async def _poll_loop(self) -> None:
|
||||
"""Main polling loop - capture screenshots for all active sessions."""
|
||||
try:
|
||||
while True:
|
||||
async with self._lock:
|
||||
sessions = list(self._active_sessions.values())
|
||||
|
||||
if sessions and self._screenshot_callbacks:
|
||||
# Capture screenshots with limited concurrency
|
||||
sem = asyncio.Semaphore(3)
|
||||
|
||||
async def capture_with_sem(session: SessionInfo) -> None:
|
||||
async with sem:
|
||||
await self._capture_and_deliver(session)
|
||||
|
||||
await asyncio.gather(
|
||||
*(capture_with_sem(s) for s in sessions if not s.stopped),
|
||||
return_exceptions=True,
|
||||
)
|
||||
|
||||
await asyncio.sleep(POLL_INTERVAL)
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
except Exception:
|
||||
logger.warning("Browser screenshot polling crashed", exc_info=True)
|
||||
|
||||
async def _capture_and_deliver(self, session: SessionInfo) -> None:
|
||||
"""Capture a screenshot for a session and deliver to callbacks."""
|
||||
cli = self._get_playwright_cli()
|
||||
if not cli:
|
||||
return
|
||||
|
||||
# Determine interval based on failure count
|
||||
if session.consecutive_failures >= MAX_FAILURES_BEFORE_BACKOFF:
|
||||
# In backoff mode - only capture every BACKOFF_INTERVAL/POLL_INTERVAL polls
|
||||
# We achieve this by checking a simple modulo on failure count
|
||||
if session.consecutive_failures % int(BACKOFF_INTERVAL / POLL_INTERVAL) != 0:
|
||||
return
|
||||
|
||||
screenshot_dir = self.project_dir / ".playwright-cli"
|
||||
screenshot_dir.mkdir(parents=True, exist_ok=True)
|
||||
screenshot_path = screenshot_dir / f"_view_{session.session_name}.png"
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
cli, "-s", session.session_name, "screenshot",
|
||||
f"--filename={screenshot_path}",
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
cwd=str(self.project_dir),
|
||||
)
|
||||
_, stderr = await asyncio.wait_for(proc.communicate(), timeout=SCREENSHOT_TIMEOUT)
|
||||
|
||||
if proc.returncode != 0:
|
||||
session.consecutive_failures += 1
|
||||
if session.consecutive_failures >= MAX_FAILURES_BEFORE_STOP:
|
||||
session.stopped = True
|
||||
logger.debug(
|
||||
"Stopped polling session %s after %d failures",
|
||||
session.session_name, session.consecutive_failures,
|
||||
)
|
||||
return
|
||||
|
||||
# Read and encode the screenshot
|
||||
if not screenshot_path.exists():
|
||||
session.consecutive_failures += 1
|
||||
return
|
||||
|
||||
image_bytes = screenshot_path.read_bytes()
|
||||
image_base64 = base64.b64encode(image_bytes).decode("ascii")
|
||||
|
||||
# Reset failure counter on success
|
||||
session.consecutive_failures = 0
|
||||
# Re-enable if previously stopped
|
||||
session.stopped = False
|
||||
|
||||
screenshot = ScreenshotData(
|
||||
session_name=session.session_name,
|
||||
agent_index=session.agent_index,
|
||||
agent_type=session.agent_type,
|
||||
feature_id=session.feature_id,
|
||||
feature_name=session.feature_name,
|
||||
image_base64=image_base64,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
# Deliver to all callbacks
|
||||
for callback in list(self._screenshot_callbacks):
|
||||
try:
|
||||
await callback(screenshot)
|
||||
except Exception:
|
||||
pass # Connection may be closed
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
session.consecutive_failures += 1
|
||||
except Exception:
|
||||
session.consecutive_failures += 1
|
||||
finally:
|
||||
# Clean up the screenshot file
|
||||
try:
|
||||
screenshot_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _cleanup_screenshot_file(self, session_name: str) -> None:
|
||||
"""Remove a session's screenshot file."""
|
||||
try:
|
||||
path = self.project_dir / ".playwright-cli" / f"_view_{session_name}.png"
|
||||
path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Global instance management (thread-safe)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_services: dict[tuple[str, str], BrowserViewService] = {}
|
||||
_services_lock = threading.Lock()
|
||||
|
||||
|
||||
def get_browser_view_service(project_name: str, project_dir: Path) -> BrowserViewService:
|
||||
"""Get or create a BrowserViewService for a project (thread-safe)."""
|
||||
with _services_lock:
|
||||
key = (project_name, str(project_dir.resolve()))
|
||||
if key not in _services:
|
||||
_services[key] = BrowserViewService(project_name, project_dir)
|
||||
return _services[key]
|
||||
@@ -16,6 +16,7 @@ from typing import Set
|
||||
from fastapi import WebSocket, WebSocketDisconnect
|
||||
|
||||
from .schemas import AGENT_MASCOTS
|
||||
from .services.browser_view_service import get_browser_view_service
|
||||
from .services.chat_constants import ROOT_DIR
|
||||
from .services.dev_server_manager import get_devserver_manager
|
||||
from .services.process_manager import get_manager
|
||||
@@ -787,8 +788,39 @@ async def project_websocket(websocket: WebSocket, project_name: str):
|
||||
# Create orchestrator tracker for observability
|
||||
orchestrator_tracker = OrchestratorTracker()
|
||||
|
||||
# Get browser view service for embedded browser screenshots
|
||||
browser_view_service = get_browser_view_service(project_name, project_dir)
|
||||
browser_view_subscribed = False
|
||||
# Counter to mirror orchestrator's testing session naming (testing-0, testing-1, ...)
|
||||
testing_session_counter = 0
|
||||
# Deferred session registration: store metadata at agent start, register on first browser command.
|
||||
# This avoids premature polling failures when agents spend time reading/planning before opening a browser.
|
||||
# Key: session_name -> registration kwargs
|
||||
pending_browser_sessions: dict[str, dict] = {}
|
||||
# Track which feature IDs map to which session names (for deferred lookup)
|
||||
feature_to_session: dict[int, str] = {}
|
||||
|
||||
async def on_screenshot(screenshot):
|
||||
"""Handle browser screenshot - send to this WebSocket."""
|
||||
try:
|
||||
await websocket.send_json({
|
||||
"type": "browser_screenshot",
|
||||
"sessionName": screenshot.session_name,
|
||||
"agentIndex": screenshot.agent_index,
|
||||
"agentType": screenshot.agent_type,
|
||||
"featureId": screenshot.feature_id,
|
||||
"featureName": screenshot.feature_name,
|
||||
"imageData": screenshot.image_base64,
|
||||
"timestamp": screenshot.timestamp,
|
||||
})
|
||||
except Exception:
|
||||
pass # Connection may be closed
|
||||
|
||||
browser_view_service.add_screenshot_callback(on_screenshot)
|
||||
|
||||
async def on_output(line: str):
|
||||
"""Handle agent output - broadcast to this WebSocket."""
|
||||
nonlocal testing_session_counter
|
||||
try:
|
||||
# Extract feature ID from line if present
|
||||
feature_id = None
|
||||
@@ -817,6 +849,48 @@ async def project_websocket(websocket: WebSocket, project_name: str):
|
||||
if agent_update:
|
||||
await websocket.send_json(agent_update)
|
||||
|
||||
# Register/unregister browser sessions based on agent lifecycle
|
||||
update_state = agent_update.get("state")
|
||||
update_type = agent_update.get("agentType", "coding")
|
||||
update_feature_id = agent_update.get("featureId", 0)
|
||||
update_feature_name = agent_update.get("featureName", "")
|
||||
update_agent_index = agent_update.get("agentIndex", 0)
|
||||
|
||||
if update_state == "thinking" and agent_update.get("thought") in ("Starting work...", "Starting batch work..."):
|
||||
# Agent just started - defer browser session registration until
|
||||
# we detect an actual playwright-cli open/goto command. This avoids
|
||||
# polling failures while the agent is still reading code / planning.
|
||||
if update_type == "coding":
|
||||
session_name = f"coding-{update_feature_id}"
|
||||
else:
|
||||
session_name = f"testing-{testing_session_counter}"
|
||||
testing_session_counter += 1
|
||||
pending_browser_sessions[session_name] = dict(
|
||||
session_name=session_name,
|
||||
agent_index=update_agent_index,
|
||||
agent_type=update_type,
|
||||
feature_id=update_feature_id,
|
||||
feature_name=update_feature_name,
|
||||
)
|
||||
feature_to_session[update_feature_id] = session_name
|
||||
elif update_state in ("success", "error"):
|
||||
# Agent completed - unregister browser session
|
||||
if update_type == "coding":
|
||||
session_name = f"coding-{update_feature_id}"
|
||||
await browser_view_service.unregister_session(session_name)
|
||||
pending_browser_sessions.pop(session_name, None)
|
||||
feature_to_session.pop(update_feature_id, None)
|
||||
# Testing sessions are cleaned up on orchestrator stop
|
||||
|
||||
# Detect playwright-cli browser commands and activate deferred sessions
|
||||
if feature_id is not None and "playwright-cli" in line and any(
|
||||
kw in line for kw in ("open ", "goto ", "open\t", "goto\t")
|
||||
):
|
||||
sess_name = feature_to_session.get(feature_id)
|
||||
if sess_name and sess_name in pending_browser_sessions:
|
||||
reg = pending_browser_sessions.pop(sess_name)
|
||||
await browser_view_service.register_session(**reg)
|
||||
|
||||
# Also check for orchestrator events and emit orchestrator_update messages
|
||||
orch_update = await orchestrator_tracker.process_line(line)
|
||||
if orch_update:
|
||||
@@ -826,6 +900,7 @@ async def project_websocket(websocket: WebSocket, project_name: str):
|
||||
|
||||
async def on_status_change(status: str):
|
||||
"""Handle status change - broadcast to this WebSocket."""
|
||||
nonlocal testing_session_counter
|
||||
try:
|
||||
await websocket.send_json({
|
||||
"type": "agent_status",
|
||||
@@ -835,6 +910,10 @@ async def project_websocket(websocket: WebSocket, project_name: str):
|
||||
if status in ("stopped", "crashed"):
|
||||
await agent_tracker.reset()
|
||||
await orchestrator_tracker.reset()
|
||||
await browser_view_service.stop()
|
||||
testing_session_counter = 0
|
||||
pending_browser_sessions.clear()
|
||||
feature_to_session.clear()
|
||||
except Exception:
|
||||
pass # Connection may be closed
|
||||
|
||||
@@ -908,10 +987,23 @@ async def project_websocket(websocket: WebSocket, project_name: str):
|
||||
data = await websocket.receive_text()
|
||||
message = json.loads(data)
|
||||
|
||||
msg_type = message.get("type")
|
||||
|
||||
# Handle ping
|
||||
if message.get("type") == "ping":
|
||||
if msg_type == "ping":
|
||||
await websocket.send_json({"type": "pong"})
|
||||
|
||||
# Handle browser view subscribe/unsubscribe
|
||||
elif msg_type == "browser_view_subscribe":
|
||||
if not browser_view_subscribed:
|
||||
browser_view_subscribed = True
|
||||
await browser_view_service.add_subscriber()
|
||||
|
||||
elif msg_type == "browser_view_unsubscribe":
|
||||
if browser_view_subscribed:
|
||||
browser_view_subscribed = False
|
||||
await browser_view_service.remove_subscriber()
|
||||
|
||||
except WebSocketDisconnect:
|
||||
break
|
||||
except json.JSONDecodeError:
|
||||
@@ -935,5 +1027,10 @@ async def project_websocket(websocket: WebSocket, project_name: str):
|
||||
devserver_manager.remove_output_callback(on_dev_output)
|
||||
devserver_manager.remove_status_callback(on_dev_status_change)
|
||||
|
||||
# Unregister browser view callbacks and subscriber
|
||||
browser_view_service.remove_screenshot_callback(on_screenshot)
|
||||
if browser_view_subscribed:
|
||||
await browser_view_service.remove_subscriber()
|
||||
|
||||
# Disconnect from manager
|
||||
await manager.disconnect(websocket, project_name)
|
||||
|
||||
Reference in New Issue
Block a user