diff --git a/src/browserServerBackend.ts b/src/browserServerBackend.ts index 75f1fc6..7f5fda5 100644 --- a/src/browserServerBackend.ts +++ b/src/browserServerBackend.ts @@ -57,16 +57,17 @@ export class BrowserServerBackend implements ServerBackend { const context = this._context!; const response = new Response(context, schema.name, parsedArguments); const tool = this._tools.find(tool => tool.schema.name === schema.name)!; - await context.setInputRecorderEnabled(false); + context.setRunningTool(true); try { await tool.handle(context, parsedArguments, response); - } catch (error) { + await response.finish(); + this._sessionLog?.logResponse(response); + } catch (error: any) { response.addError(String(error)); } finally { - await context.setInputRecorderEnabled(true); + context.setRunningTool(false); } - await this._sessionLog?.logResponse(response); - return await response.serialize(); + return response.serialize(); } serverInitialized(version: mcpServer.ClientVersion | undefined) { diff --git a/src/context.ts b/src/context.ts index 46aa5d4..e9de93a 100644 --- a/src/context.ts +++ b/src/context.ts @@ -24,13 +24,14 @@ import type { Tool } from './tools/tool.js'; import type { FullConfig } from './config.js'; import type { BrowserContextFactory } from './browserContextFactory.js'; import type * as actions from './actions.js'; -import type { Action, SessionLog } from './sessionLog.js'; +import type { SessionLog } from './sessionLog.js'; const testDebug = debug('pw:mcp:test'); export class Context { readonly tools: Tool[]; readonly config: FullConfig; + readonly sessionLog: SessionLog | undefined; private _browserContextPromise: Promise<{ browserContext: playwright.BrowserContext, close: () => Promise }> | undefined; private _browserContextFactory: BrowserContextFactory; private _tabs: Tab[] = []; @@ -40,14 +41,13 @@ export class Context { private static _allContexts: Set = new Set(); private _closeBrowserContextPromise: Promise | undefined; - private _inputRecorder: InputRecorder | undefined; - private _sessionLog: SessionLog | undefined; + private _isRunningTool: boolean = false; constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory, sessionLog: SessionLog | undefined) { this.tools = tools; this.config = config; this._browserContextFactory = browserContextFactory; - this._sessionLog = sessionLog; + this.sessionLog = sessionLog; testDebug('create context'); Context._allContexts.add(this); } @@ -93,29 +93,6 @@ export class Context { return this._currentTab!; } - async listTabsMarkdown(force: boolean = false): Promise { - if (this._tabs.length === 1 && !force) - return []; - - if (!this._tabs.length) { - return [ - '### Open tabs', - 'No open tabs. Use the "browser_navigate" tool to navigate to a page first.', - '', - ]; - } - - const lines: string[] = ['### Open tabs']; - for (let i = 0; i < this._tabs.length; i++) { - const tab = this._tabs[i]; - const title = await tab.title(); - const url = tab.page.url(); - const current = tab === this._currentTab ? ' (current)' : ''; - lines.push(`- ${i}:${current} [${title}] (${url})`); - } - lines.push(''); - return lines; - } async closeTab(index: number | undefined): Promise { const tab = index === undefined ? this._currentTab : this._tabs[index]; @@ -152,8 +129,12 @@ export class Context { this._closeBrowserContextPromise = undefined; } - async setInputRecorderEnabled(enabled: boolean) { - await this._inputRecorder?.setEnabled(enabled); + isRunningTool() { + return this._isRunningTool; + } + + setRunningTool(isRunningTool: boolean) { + this._isRunningTool = isRunningTool; } private async _closeBrowserContextImpl() { @@ -208,8 +189,8 @@ export class Context { const result = await this._browserContextFactory.createContext(this.clientVersion!); const { browserContext } = result; await this._setupRequestInterception(browserContext); - if (this._sessionLog) - this._inputRecorder = await InputRecorder.create(this._sessionLog, browserContext); + if (this.sessionLog) + await InputRecorder.create(this, browserContext); for (const page of browserContext.pages()) this._onPageCreated(page); browserContext.on('page', page => this._onPageCreated(page)); @@ -226,87 +207,54 @@ export class Context { } export class InputRecorder { - private _actions: Action[] = []; - private _enabled = false; - private _sessionLog: SessionLog; + private _context: Context; private _browserContext: playwright.BrowserContext; - private _flushTimer: NodeJS.Timeout | undefined; - private constructor(sessionLog: SessionLog, browserContext: playwright.BrowserContext) { - this._sessionLog = sessionLog; + private constructor(context: Context, browserContext: playwright.BrowserContext) { + this._context = context; this._browserContext = browserContext; } - static async create(sessionLog: SessionLog, browserContext: playwright.BrowserContext) { - const recorder = new InputRecorder(sessionLog, browserContext); + static async create(context: Context, browserContext: playwright.BrowserContext) { + const recorder = new InputRecorder(context, browserContext); await recorder._initialize(); - await recorder.setEnabled(true); return recorder; } private async _initialize() { + const sessionLog = this._context.sessionLog!; await (this._browserContext as any)._enableRecorder({ mode: 'recording', recorderMode: 'api', }, { actionAdded: (page: playwright.Page, data: actions.ActionInContext, code: string) => { - if (!this._enabled) + if (this._context.isRunningTool()) return; const tab = Tab.forPage(page); - this._actions.push({ ...data, tab, code: code.trim(), timestamp: performance.now() }); - this._scheduleFlush(); + if (tab) + sessionLog.logUserAction(data.action, tab, code, false); }, actionUpdated: (page: playwright.Page, data: actions.ActionInContext, code: string) => { - if (!this._enabled) + if (this._context.isRunningTool()) return; const tab = Tab.forPage(page); - this._actions[this._actions.length - 1] = { ...data, tab, code: code.trim(), timestamp: performance.now() }; - this._scheduleFlush(); + if (tab) + sessionLog.logUserAction(data.action, tab, code, true); }, signalAdded: (page: playwright.Page, data: actions.SignalInContext) => { + if (this._context.isRunningTool()) + return; if (data.signal.name !== 'navigation') return; const tab = Tab.forPage(page); - this._actions.push({ - frame: data.frame, - action: { - name: 'navigate', - url: data.signal.url, - signals: [], - }, - startTime: data.timestamp, - endTime: data.timestamp, - tab, - code: `await page.goto('${data.signal.url}');`, - timestamp: performance.now(), - }); - this._scheduleFlush(); + const navigateAction: actions.Action = { + name: 'navigate', + url: data.signal.url, + signals: [], + }; + if (tab) + sessionLog.logUserAction(navigateAction, tab, `await page.goto('${data.signal.url}');`, false); }, }); } - - async setEnabled(enabled: boolean) { - this._enabled = enabled; - if (!enabled) - await this._flush(); - } - - private _clearTimer() { - if (this._flushTimer) { - clearTimeout(this._flushTimer); - this._flushTimer = undefined; - } - } - - private _scheduleFlush() { - this._clearTimer(); - this._flushTimer = setTimeout(() => this._flush(), 1000); - } - - private async _flush() { - this._clearTimer(); - const actions = this._actions; - this._actions = []; - await this._sessionLog.logActions(actions); - } } diff --git a/src/response.ts b/src/response.ts index 7aa4608..6c66d3d 100644 --- a/src/response.ts +++ b/src/response.ts @@ -16,8 +16,7 @@ import { renderModalStates } from './tab.js'; -import type { TabSnapshot } from './tab.js'; -import type { ModalState } from './tools/tool.js'; +import type { Tab, TabSnapshot } from './tab.js'; import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types.js'; import type { Context } from './context.js'; @@ -28,7 +27,7 @@ export class Response { private _context: Context; private _includeSnapshot = false; private _includeTabs = false; - private _snapshot: { tabSnapshot?: TabSnapshot, modalState?: ModalState } | undefined; + private _tabSnapshot: TabSnapshot | undefined; readonly toolName: string; readonly toolArgs: Record; @@ -81,17 +80,20 @@ export class Response { this._includeTabs = true; } - async snapshot(): Promise<{ tabSnapshot?: TabSnapshot, modalState?: ModalState }> { - if (this._snapshot) - return this._snapshot; + async finish() { + // All the async snapshotting post-action is happening here. + // Everything below should race against modal states. if (this._includeSnapshot && this._context.currentTab()) - this._snapshot = await this._context.currentTabOrDie().captureSnapshot(); - else - this._snapshot = {}; - return this._snapshot; + this._tabSnapshot = await this._context.currentTabOrDie().captureSnapshot(); + for (const tab of this._context.tabs()) + await tab.updateTitle(); } - async serialize(): Promise<{ content: (TextContent | ImageContent)[], isError?: boolean }> { + tabSnapshot(): TabSnapshot | undefined { + return this._tabSnapshot; + } + + serialize(): { content: (TextContent | ImageContent)[], isError?: boolean } { const response: string[] = []; // Start with command result. @@ -112,16 +114,14 @@ ${this._code.join('\n')} // List browser tabs. if (this._includeSnapshot || this._includeTabs) - response.push(...(await this._context.listTabsMarkdown(this._includeTabs))); + response.push(...renderTabsMarkdown(this._context.tabs(), this._includeTabs)); // Add snapshot if provided. - const snapshot = await this.snapshot(); - if (snapshot?.modalState) { - response.push(...renderModalStates(this._context, [snapshot.modalState])); + if (this._tabSnapshot?.modalStates.length) { + response.push(...renderModalStates(this._context, this._tabSnapshot.modalStates)); response.push(''); - } - if (snapshot?.tabSnapshot) { - response.push(renderTabSnapshot(snapshot.tabSnapshot)); + } else if (this._tabSnapshot) { + response.push(renderTabSnapshot(this._tabSnapshot)); response.push(''); } @@ -172,6 +172,28 @@ function renderTabSnapshot(tabSnapshot: TabSnapshot): string { return lines.join('\n'); } +function renderTabsMarkdown(tabs: Tab[], force: boolean = false): string[] { + if (tabs.length === 1 && !force) + return []; + + if (!tabs.length) { + return [ + '### Open tabs', + 'No open tabs. Use the "browser_navigate" tool to navigate to a page first.', + '', + ]; + } + + const lines: string[] = ['### Open tabs']; + for (let i = 0; i < tabs.length; i++) { + const tab = tabs[i]; + const current = tab.isCurrentTab() ? ' (current)' : ''; + lines.push(`- ${i}:${current} [${tab.lastTitle()}] (${tab.page.url()})`); + } + lines.push(''); + return lines; +} + function trim(text: string, maxLength: number) { if (text.length <= maxLength) return text; diff --git a/src/sessionLog.ts b/src/sessionLog.ts index e4099e6..7d6a365 100644 --- a/src/sessionLog.ts +++ b/src/sessionLog.ts @@ -19,17 +19,32 @@ import path from 'path'; import { outputFile } from './config.js'; import { Response } from './response.js'; + +import { logUnhandledError } from './log.js'; import type { FullConfig } from './config.js'; import type * as actions from './actions.js'; -import type { Tab } from './tab.js'; +import type { Tab, TabSnapshot } from './tab.js'; -export type Action = actions.ActionInContext & { code: string; tab?: Tab | undefined; timestamp: number }; +type LogEntry = { + timestamp: number; + toolCall?: { + toolName: string; + toolArgs: Record; + result: string; + isError?: boolean; + }; + userAction?: actions.Action; + code: string; + tabSnapshot?: TabSnapshot; +}; export class SessionLog { private _folder: string; private _file: string; private _ordinal = 0; - private _lastModified = 0; + private _pendingEntries: LogEntry[] = []; + private _sessionFileQueue = Promise.resolve(); + private _flushEntriesTimeout: NodeJS.Timeout | undefined; constructor(sessionFolder: string) { this._folder = sessionFolder; @@ -44,90 +59,118 @@ export class SessionLog { return new SessionLog(sessionFolder); } - lastModified() { - return this._lastModified; + logResponse(response: Response) { + const entry: LogEntry = { + timestamp: performance.now(), + toolCall: { + toolName: response.toolName, + toolArgs: response.toolArgs, + result: response.result(), + isError: response.isError(), + }, + code: response.code(), + tabSnapshot: response.tabSnapshot(), + }; + this._appendEntry(entry); } - async logResponse(response: Response) { - this._lastModified = performance.now(); - const prefix = `${(++this._ordinal).toString().padStart(3, '0')}`; - const lines: string[] = [ - `### Tool call: ${response.toolName}`, - `- Args`, - '```json', - JSON.stringify(response.toolArgs, null, 2), - '```', - ]; - if (response.result()) { - lines.push( - response.isError() ? `- Error` : `- Result`, - '```', - response.result(), - '```'); + logUserAction(action: actions.Action, tab: Tab, code: string, isUpdate: boolean) { + code = code.trim(); + if (isUpdate) { + const lastEntry = this._pendingEntries[this._pendingEntries.length - 1]; + if (lastEntry.userAction?.name === action.name) { + lastEntry.userAction = action; + lastEntry.code = code; + return; + } } - - if (response.code()) { - lines.push( - `- Code`, - '```js', - response.code(), - '```'); + if (action.name === 'navigate') { + // Already logged at this location. + const lastEntry = this._pendingEntries[this._pendingEntries.length - 1]; + if (lastEntry?.tabSnapshot?.url === action.url) + return; } - - const snapshot = await response.snapshot(); - if (snapshot?.tabSnapshot) { - const fileName = `${prefix}.snapshot.yml`; - await fs.promises.writeFile(path.join(this._folder, fileName), snapshot.tabSnapshot?.ariaSnapshot); - lines.push(`- Snapshot: ${fileName}`); - } - - for (const image of response.images()) { - const fileName = `${prefix}.screenshot.${extension(image.contentType)}`; - await fs.promises.writeFile(path.join(this._folder, fileName), image.data); - lines.push(`- Screenshot: ${fileName}`); - } - - lines.push('', '', ''); - await this._appendLines(lines); + const entry: LogEntry = { + timestamp: performance.now(), + userAction: action, + code, + tabSnapshot: { + url: tab.page.url(), + title: '', + ariaSnapshot: action.ariaSnapshot || '', + modalStates: [], + consoleMessages: [], + downloads: [], + }, + }; + this._appendEntry(entry); } - async logActions(actions: Action[]) { - // Skip recent navigation, it is a side-effect of the previous action or tool use. - if (actions?.[0]?.action?.name === 'navigate' && actions[0].timestamp - this._lastModified < 1000) - return; + private _appendEntry(entry: LogEntry) { + this._pendingEntries.push(entry); + if (this._flushEntriesTimeout) + clearTimeout(this._flushEntriesTimeout); + this._flushEntriesTimeout = setTimeout(() => this._flushEntries(), 1000); + } - this._lastModified = performance.now(); - const lines: string[] = []; - for (const action of actions) { - const prefix = `${(++this._ordinal).toString().padStart(3, '0')}`; - lines.push( - `### User action: ${action.action.name}`, - ); - if (action.code) { + private async _flushEntries() { + clearTimeout(this._flushEntriesTimeout); + const entries = this._pendingEntries; + this._pendingEntries = []; + const lines: string[] = ['']; + + for (const entry of entries) { + const ordinal = (++this._ordinal).toString().padStart(3, '0'); + if (entry.toolCall) { + lines.push( + `### Tool call: ${entry.toolCall.toolName}`, + `- Args`, + '```json', + JSON.stringify(entry.toolCall.toolArgs, null, 2), + '```', + ); + if (entry.toolCall.result) { + lines.push( + entry.toolCall.isError ? `- Error` : `- Result`, + '```', + entry.toolCall.result, + '```', + ); + } + } + + if (entry.userAction) { + const actionData = { ...entry.userAction } as any; + delete actionData.ariaSnapshot; + delete actionData.selector; + delete actionData.signals; + + lines.push( + `### User action: ${entry.userAction.name}`, + `- Args`, + '```json', + JSON.stringify(actionData, null, 2), + '```', + ); + } + + if (entry.code) { lines.push( `- Code`, '```js', - action.code, + entry.code, '```'); } - if (action.action.ariaSnapshot) { - const fileName = `${prefix}.snapshot.yml`; - await fs.promises.writeFile(path.join(this._folder, fileName), action.action.ariaSnapshot); + + if (entry.tabSnapshot) { + const fileName = `${ordinal}.snapshot.yml`; + fs.promises.writeFile(path.join(this._folder, fileName), entry.tabSnapshot.ariaSnapshot).catch(logUnhandledError); lines.push(`- Snapshot: ${fileName}`); } - lines.push('', '', ''); + + lines.push('', ''); } - await this._appendLines(lines); - } - - private async _appendLines(lines: string[]) { - await fs.promises.appendFile(this._file, lines.join('\n')); + this._sessionFileQueue = this._sessionFileQueue.then(() => fs.promises.appendFile(this._file, lines.join('\n'))); } } - -function extension(contentType: string): 'jpg' | 'png' { - if (contentType === 'image/jpeg') - return 'jpg'; - return 'png'; -} diff --git a/src/tab.ts b/src/tab.ts index ca5e0e9..2e14907 100644 --- a/src/tab.ts +++ b/src/tab.ts @@ -48,6 +48,7 @@ export type TabSnapshot = { export class Tab extends EventEmitter { readonly context: Context; readonly page: playwright.Page; + private _lastTitle = 'about:blank'; private _consoleMessages: ConsoleMessage[] = []; private _recentConsoleMessages: ConsoleMessage[] = []; private _requests: Map = new Map(); @@ -137,8 +138,18 @@ export class Tab extends EventEmitter { this._onPageClose(this); } - async title(): Promise { - return await callOnPageNoTrace(this.page, page => page.title()); + async updateTitle() { + await this._raceAgainstModalStates(async () => { + this._lastTitle = await callOnPageNoTrace(this.page, page => page.title()); + }); + } + + lastTitle(): string { + return this._lastTitle; + } + + isCurrentTab(): boolean { + return this === this.context.currentTab(); } async waitForLoadState(state: 'load', options?: { timeout?: number }): Promise { @@ -182,15 +193,15 @@ export class Tab extends EventEmitter { return this._requests; } - async captureSnapshot(): Promise<{ tabSnapshot?: TabSnapshot, modalState?: ModalState }> { + async captureSnapshot(): Promise { let tabSnapshot: TabSnapshot | undefined; - const modalState = await this._raceAgainstModalStates(async () => { + const modalStates = await this._raceAgainstModalStates(async () => { const snapshot = await (this.page as PageEx)._snapshotForAI(); tabSnapshot = { url: this.page.url(), title: await this.page.title(), ariaSnapshot: snapshot, - modalStates: this.modalStates(), + modalStates: [], consoleMessages: [], downloads: this._downloads, }; @@ -200,25 +211,32 @@ export class Tab extends EventEmitter { tabSnapshot.consoleMessages = this._recentConsoleMessages; this._recentConsoleMessages = []; } - return { tabSnapshot, modalState }; + return tabSnapshot ?? { + url: this.page.url(), + title: '', + ariaSnapshot: '', + modalStates, + consoleMessages: [], + downloads: [], + }; } private _javaScriptBlocked(): boolean { return this._modalStates.some(state => state.type === 'dialog'); } - private async _raceAgainstModalStates(action: () => Promise): Promise { + private async _raceAgainstModalStates(action: () => Promise): Promise { if (this.modalStates().length) - return this.modalStates()[0]; + return this.modalStates(); - const promise = new ManualPromise(); - const listener = (modalState: ModalState) => promise.resolve(modalState); + const promise = new ManualPromise(); + const listener = (modalState: ModalState) => promise.resolve([modalState]); this.once(TabEvents.modalState, listener); return await Promise.race([ action().then(() => { this.off(TabEvents.modalState, listener); - return undefined; + return []; }), promise, ]); diff --git a/tests/session-log.spec.ts b/tests/session-log.spec.ts index dccc6a9..2654c2d 100644 --- a/tests/session-log.spec.ts +++ b/tests/session-log.spec.ts @@ -47,8 +47,8 @@ test('session log should record tool calls', async ({ startClient, server }, tes const output = stderr().split('\n').filter(line => line.startsWith('Session: '))[0]; const sessionFolder = output.substring('Session: '.length); - const sessionLog = await fs.promises.readFile(path.join(sessionFolder, 'session.md'), 'utf8'); - expect(sessionLog).toBe(`### Tool call: browser_navigate + await expect.poll(() => readSessionLog(sessionFolder)).toBe(` +### Tool call: browser_navigate - Args \`\`\`json { @@ -76,11 +76,120 @@ await page.getByRole('button', { name: 'Submit' }).click(); \`\`\` - Snapshot: 002.snapshot.yml +`); +}); + +test('session log should record user action', async ({ cdpServer, startClient }, testInfo) => { + const browserContext = await cdpServer.start(); + const { client, stderr } = await startClient({ + args: [ + '--save-session', + '--output-dir', testInfo.outputPath('output'), + `--cdp-endpoint=${cdpServer.endpoint}`, + ], + }); + + // Force browser context creation. + await client.callTool({ + name: 'browser_snapshot', + }); + + const [page] = browserContext.pages(); + await page.setContent(` + + + `); + + await page.getByRole('button', { name: 'Button 1' }).click(); + + const output = stderr().split('\n').filter(line => line.startsWith('Session: '))[0]; + const sessionFolder = output.substring('Session: '.length); + + await expect.poll(() => readSessionLog(sessionFolder)).toBe(` +### Tool call: browser_snapshot +- Args +\`\`\`json +{} +\`\`\` +- Snapshot: 001.snapshot.yml + + +### User action: click +- Args +\`\`\`json +{ + "name": "click", + "ref": "e2", + "button": "left", + "modifiers": 0, + "clickCount": 1 +} +\`\`\` +- Code +\`\`\`js +await page.getByRole('button', { name: 'Button 1' }).click(); +\`\`\` +- Snapshot: 002.snapshot.yml `); }); -test('session log should record tool user actions', async ({ cdpServer, startClient }, testInfo) => { +test('session log should update user action', async ({ cdpServer, startClient }, testInfo) => { + const browserContext = await cdpServer.start(); + const { client, stderr } = await startClient({ + args: [ + '--save-session', + '--output-dir', testInfo.outputPath('output'), + `--cdp-endpoint=${cdpServer.endpoint}`, + ], + }); + + // Force browser context creation. + await client.callTool({ + name: 'browser_snapshot', + }); + + const [page] = browserContext.pages(); + await page.setContent(` + + + `); + + await page.getByRole('button', { name: 'Button 1' }).dblclick(); + + const output = stderr().split('\n').filter(line => line.startsWith('Session: '))[0]; + const sessionFolder = output.substring('Session: '.length); + + await expect.poll(() => readSessionLog(sessionFolder)).toBe(` +### Tool call: browser_snapshot +- Args +\`\`\`json +{} +\`\`\` +- Snapshot: 001.snapshot.yml + + +### User action: click +- Args +\`\`\`json +{ + "name": "click", + "ref": "e2", + "button": "left", + "modifiers": 0, + "clickCount": 2 +} +\`\`\` +- Code +\`\`\`js +await page.getByRole('button', { name: 'Button 1' }).dblclick(); +\`\`\` +- Snapshot: 002.snapshot.yml + +`); +}); + +test('session log should record tool calls and user actions', async ({ cdpServer, startClient }, testInfo) => { const browserContext = await cdpServer.start(); const { client, stderr } = await startClient({ args: [ @@ -117,8 +226,8 @@ test('session log should record tool user actions', async ({ cdpServer, startCli const output = stderr().split('\n').filter(line => line.startsWith('Session: '))[0]; const sessionFolder = output.substring('Session: '.length); - const sessionLog = await fs.promises.readFile(path.join(sessionFolder, 'session.md'), 'utf8'); - expect(sessionLog).toBe(`### Tool call: browser_snapshot + await expect.poll(() => readSessionLog(sessionFolder)).toBe(` +### Tool call: browser_snapshot - Args \`\`\`json {} @@ -127,6 +236,16 @@ test('session log should record tool user actions', async ({ cdpServer, startCli ### User action: click +- Args +\`\`\`json +{ + "name": "click", + "ref": "e2", + "button": "left", + "modifiers": 0, + "clickCount": 1 +} +\`\`\` - Code \`\`\`js await page.getByRole('button', { name: 'Button 1' }).click(); @@ -148,6 +267,9 @@ await page.getByRole('button', { name: 'Button 2' }).click(); \`\`\` - Snapshot: 003.snapshot.yml - `); }); + +async function readSessionLog(sessionFolder: string): Promise { + return await fs.promises.readFile(path.join(sessionFolder, 'session.md'), 'utf8').catch(() => ''); +}