chore: record user actions in the session log (#798)

This commit is contained in:
Pavel Feldman
2025-07-30 18:26:13 -07:00
committed by GitHub
parent 4df162aff5
commit f600234897
13 changed files with 536 additions and 54 deletions

172
src/actions.d.ts vendored Normal file
View File

@@ -0,0 +1,172 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
type Point = { x: number, y: number };
export type ActionName =
'check' |
'click' |
'closePage' |
'fill' |
'navigate' |
'openPage' |
'press' |
'select' |
'uncheck' |
'setInputFiles' |
'assertText' |
'assertValue' |
'assertChecked' |
'assertVisible' |
'assertSnapshot';
export type ActionBase = {
name: ActionName,
signals: Signal[],
ariaSnapshot?: string,
};
export type ActionWithSelector = ActionBase & {
selector: string,
ref?: string,
};
export type ClickAction = ActionWithSelector & {
name: 'click',
button: 'left' | 'middle' | 'right',
modifiers: number,
clickCount: number,
position?: Point,
};
export type CheckAction = ActionWithSelector & {
name: 'check',
};
export type UncheckAction = ActionWithSelector & {
name: 'uncheck',
};
export type FillAction = ActionWithSelector & {
name: 'fill',
text: string,
};
export type NavigateAction = ActionBase & {
name: 'navigate',
url: string,
};
export type OpenPageAction = ActionBase & {
name: 'openPage',
url: string,
};
export type ClosesPageAction = ActionBase & {
name: 'closePage',
};
export type PressAction = ActionWithSelector & {
name: 'press',
key: string,
modifiers: number,
};
export type SelectAction = ActionWithSelector & {
name: 'select',
options: string[],
};
export type SetInputFilesAction = ActionWithSelector & {
name: 'setInputFiles',
files: string[],
};
export type AssertTextAction = ActionWithSelector & {
name: 'assertText',
text: string,
substring: boolean,
};
export type AssertValueAction = ActionWithSelector & {
name: 'assertValue',
value: string,
};
export type AssertCheckedAction = ActionWithSelector & {
name: 'assertChecked',
checked: boolean,
};
export type AssertVisibleAction = ActionWithSelector & {
name: 'assertVisible',
};
export type AssertSnapshotAction = ActionWithSelector & {
name: 'assertSnapshot',
ariaSnapshot: string,
};
export type Action = ClickAction | CheckAction | ClosesPageAction | OpenPageAction | UncheckAction | FillAction | NavigateAction | PressAction | SelectAction | SetInputFilesAction | AssertTextAction | AssertValueAction | AssertCheckedAction | AssertVisibleAction | AssertSnapshotAction;
export type AssertAction = AssertCheckedAction | AssertValueAction | AssertTextAction | AssertVisibleAction | AssertSnapshotAction;
export type PerformOnRecordAction = ClickAction | CheckAction | UncheckAction | PressAction | SelectAction;
// Signals.
export type BaseSignal = {
};
export type NavigationSignal = BaseSignal & {
name: 'navigation',
url: string,
};
export type PopupSignal = BaseSignal & {
name: 'popup',
popupAlias: string,
};
export type DownloadSignal = BaseSignal & {
name: 'download',
downloadAlias: string,
};
export type DialogSignal = BaseSignal & {
name: 'dialog',
dialogAlias: string,
};
export type Signal = NavigationSignal | PopupSignal | DownloadSignal | DialogSignal;
export type FrameDescription = {
pageGuid: string;
pageAlias: string;
framePath: string[];
};
export type ActionInContext = {
frame: FrameDescription;
description?: string;
action: Action;
startTime: number;
endTime?: number;
};
export type SignalInContext = {
frame: FrameDescription;
signal: Signal;
timestamp: number;
};

View File

@@ -33,16 +33,20 @@ export class BrowserServerBackend implements ServerBackend {
onclose?: () => void;
private _tools: Tool[];
private _context: Context;
private _context: Context | undefined;
private _sessionLog: SessionLog | undefined;
private _config: FullConfig;
private _browserContextFactory: BrowserContextFactory;
constructor(config: FullConfig, browserContextFactory: BrowserContextFactory) {
this._config = config;
this._browserContextFactory = browserContextFactory;
this._tools = filteredTools(config);
this._context = new Context(this._tools, config, browserContextFactory);
}
async initialize() {
this._sessionLog = this._context.config.saveSession ? await SessionLog.create(this._context.config) : undefined;
this._sessionLog = this._config.saveSession ? await SessionLog.create(this._config) : undefined;
this._context = new Context(this._tools, this._config, this._browserContextFactory, this._sessionLog);
}
tools(): mcpServer.ToolSchema<any>[] {
@@ -50,20 +54,27 @@ export class BrowserServerBackend implements ServerBackend {
}
async callTool(schema: mcpServer.ToolSchema<any>, parsedArguments: any) {
const response = new Response(this._context, schema.name, parsedArguments);
const context = this._context!;
const response = new Response(context, schema.name, parsedArguments);
const tool = this._tools.find(tool => tool.schema.name === schema.name)!;
await tool.handle(this._context, parsedArguments, response);
if (this._sessionLog)
await this._sessionLog.log(response);
await context.setInputRecorderEnabled(false);
try {
await tool.handle(context, parsedArguments, response);
} catch (error) {
response.addError(String(error));
} finally {
await context.setInputRecorderEnabled(true);
}
await this._sessionLog?.logResponse(response);
return await response.serialize();
}
serverInitialized(version: mcpServer.ClientVersion | undefined) {
this._context.clientVersion = version;
this._context!.clientVersion = version;
}
serverClosed() {
this.onclose?.();
void this._context.dispose().catch(logUnhandledError);
void this._context!.dispose().catch(logUnhandledError);
}
}

View File

@@ -23,6 +23,8 @@ import { Tab } from './tab.js';
import type { Tool } from './tools/tool.js';
import type { FullConfig } from './config.js';
import type { BrowserContextFactory } from './browserContextFactory.js';
import type * as actions from './actions.js';
import type { Action, SessionLog } from './sessionLog.js';
const testDebug = debug('pw:mcp:test');
@@ -33,15 +35,19 @@ export class Context {
private _browserContextFactory: BrowserContextFactory;
private _tabs: Tab[] = [];
private _currentTab: Tab | undefined;
clientVersion: { name: string; version: string; } | undefined;
private static _allContexts: Set<Context> = new Set();
private _closeBrowserContextPromise: Promise<void> | undefined;
private _inputRecorder: InputRecorder | undefined;
private _sessionLog: SessionLog | undefined;
constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory) {
constructor(tools: Tool[], config: FullConfig, browserContextFactory: BrowserContextFactory, sessionLog: SessionLog | undefined) {
this.tools = tools;
this.config = config;
this._browserContextFactory = browserContextFactory;
this._sessionLog = sessionLog;
testDebug('create context');
Context._allContexts.add(this);
}
@@ -146,6 +152,10 @@ export class Context {
this._closeBrowserContextPromise = undefined;
}
async setInputRecorderEnabled(enabled: boolean) {
await this._inputRecorder?.setEnabled(enabled);
}
private async _closeBrowserContextImpl() {
if (!this._browserContextPromise)
return;
@@ -198,6 +208,8 @@ export class Context {
const result = await this._browserContextFactory.createContext(this.clientVersion!);
const { browserContext } = result;
await this._setupRequestInterception(browserContext);
if (this._sessionLog)
this._inputRecorder = await InputRecorder.create(this._sessionLog, browserContext);
for (const page of browserContext.pages())
this._onPageCreated(page);
browserContext.on('page', page => this._onPageCreated(page));
@@ -212,3 +224,89 @@ export class Context {
return result;
}
}
export class InputRecorder {
private _actions: Action[] = [];
private _enabled = false;
private _sessionLog: SessionLog;
private _browserContext: playwright.BrowserContext;
private _flushTimer: NodeJS.Timeout | undefined;
private constructor(sessionLog: SessionLog, browserContext: playwright.BrowserContext) {
this._sessionLog = sessionLog;
this._browserContext = browserContext;
}
static async create(sessionLog: SessionLog, browserContext: playwright.BrowserContext) {
const recorder = new InputRecorder(sessionLog, browserContext);
await recorder._initialize();
await recorder.setEnabled(true);
return recorder;
}
private async _initialize() {
await (this._browserContext as any)._enableRecorder({
mode: 'recording',
recorderMode: 'api',
}, {
actionAdded: (page: playwright.Page, data: actions.ActionInContext, code: string) => {
if (!this._enabled)
return;
const tab = Tab.forPage(page);
this._actions.push({ ...data, tab, code: code.trim(), timestamp: performance.now() });
this._scheduleFlush();
},
actionUpdated: (page: playwright.Page, data: actions.ActionInContext, code: string) => {
if (!this._enabled)
return;
const tab = Tab.forPage(page);
this._actions[this._actions.length - 1] = { ...data, tab, code: code.trim(), timestamp: performance.now() };
this._scheduleFlush();
},
signalAdded: (page: playwright.Page, data: actions.SignalInContext) => {
if (data.signal.name !== 'navigation')
return;
const tab = Tab.forPage(page);
this._actions.push({
frame: data.frame,
action: {
name: 'navigate',
url: data.signal.url,
signals: [],
},
startTime: data.timestamp,
endTime: data.timestamp,
tab,
code: `await page.goto('${data.signal.url}');`,
timestamp: performance.now(),
});
this._scheduleFlush();
},
});
}
async setEnabled(enabled: boolean) {
this._enabled = enabled;
if (!enabled)
await this._flush();
}
private _clearTimer() {
if (this._flushTimer) {
clearTimeout(this._flushTimer);
this._flushTimer = undefined;
}
}
private _scheduleFlush() {
this._clearTimer();
this._flushTimer = setTimeout(() => this._flush(), 1000);
}
private async _flush() {
this._clearTimer();
const actions = this._actions;
this._actions = [];
await this._sessionLog.logActions(actions);
}
}

View File

@@ -41,10 +41,14 @@ export class Response {
}
addError(error: string) {
this._result.push(`Error: ${error}`);
this._result.push(error);
this._isError = true;
}
isError() {
return this._isError;
}
result() {
return this._result.join('\n');
}

View File

@@ -20,13 +20,16 @@ import path from 'path';
import { outputFile } from './config.js';
import { Response } from './response.js';
import type { FullConfig } from './config.js';
import type * as actions from './actions.js';
import type { Tab } from './tab.js';
let sessionOrdinal = 0;
export type Action = actions.ActionInContext & { code: string; tab?: Tab | undefined; timestamp: number };
export class SessionLog {
private _folder: string;
private _file: string;
private _ordinal = 0;
private _lastModified = 0;
constructor(sessionFolder: string) {
this._folder = sessionFolder;
@@ -34,18 +37,22 @@ export class SessionLog {
}
static async create(config: FullConfig): Promise<SessionLog> {
const sessionFolder = await outputFile(config, `session-${(++sessionOrdinal).toString().padStart(3, '0')}`);
const sessionFolder = await outputFile(config, `session-${Date.now()}`);
await fs.promises.mkdir(sessionFolder, { recursive: true });
// eslint-disable-next-line no-console
console.error(`Session: ${sessionFolder}`);
return new SessionLog(sessionFolder);
}
async log(response: Response) {
lastModified() {
return this._lastModified;
}
async logResponse(response: Response) {
this._lastModified = performance.now();
const prefix = `${(++this._ordinal).toString().padStart(3, '0')}`;
const lines: string[] = [
`### Tool: ${response.toolName}`,
``,
`### Tool call: ${response.toolName}`,
`- Args`,
'```json',
JSON.stringify(response.toolArgs, null, 2),
@@ -53,7 +60,7 @@ export class SessionLog {
];
if (response.result()) {
lines.push(
`- Result`,
response.isError() ? `- Error` : `- Result`,
'```',
response.result(),
'```');
@@ -80,7 +87,41 @@ export class SessionLog {
lines.push(`- Screenshot: ${fileName}`);
}
lines.push('', '');
lines.push('', '', '');
await this._appendLines(lines);
}
async logActions(actions: Action[]) {
// Skip recent navigation, it is a side-effect of the previous action or tool use.
if (actions?.[0]?.action?.name === 'navigate' && actions[0].timestamp - this._lastModified < 1000)
return;
this._lastModified = performance.now();
const lines: string[] = [];
for (const action of actions) {
const prefix = `${(++this._ordinal).toString().padStart(3, '0')}`;
lines.push(
`### User action: ${action.action.name}`,
);
if (action.code) {
lines.push(
`- Code`,
'```js',
action.code,
'```');
}
if (action.action.ariaSnapshot) {
const fileName = `${prefix}.snapshot.yml`;
await fs.promises.writeFile(path.join(this._folder, fileName), action.action.ariaSnapshot);
lines.push(`- Snapshot: ${fileName}`);
}
lines.push('', '', '');
}
await this._appendLines(lines);
}
private async _appendLines(lines: string[]) {
await fs.promises.appendFile(this._file, lines.join('\n'));
}
}

View File

@@ -69,6 +69,11 @@ export class Tab extends EventEmitter<TabEventsInterface> {
});
page.setDefaultNavigationTimeout(60000);
page.setDefaultTimeout(5000);
(page as any)[tabSymbol] = this;
}
static forPage(page: playwright.Page): Tab | undefined {
return (page as any)[tabSymbol];
}
modalStates(): ModalState[] {
@@ -308,3 +313,5 @@ function trim(text: string, maxLength: number) {
return text;
return text.slice(0, maxLength) + '...';
}
const tabSymbol = Symbol('tabSymbol');

View File

@@ -37,7 +37,6 @@ const uploadFile = defineTabTool({
if (!modalState)
throw new Error('No file chooser visible');
response.addCode(`// Select files for upload`);
response.addCode(`await fileChooser.setFiles(${JSON.stringify(params.paths)})`);
tab.clearModalState(modalState);

View File

@@ -60,9 +60,9 @@ export function defineTabTool<Input extends z.Schema>(tool: TabTool<Input>): Too
const tab = context.currentTabOrDie();
const modalStates = tab.modalStates().map(state => state.type);
if (tool.clearsModalState && !modalStates.includes(tool.clearsModalState))
response.addError(`The tool "${tool.schema.name}" can only be used when there is related modal state present.\n` + tab.modalStatesMarkdown().join('\n'));
response.addError(`Error: The tool "${tool.schema.name}" can only be used when there is related modal state present.\n` + tab.modalStatesMarkdown().join('\n'));
else if (!tool.clearsModalState && modalStates.length)
response.addError(`Tool "${tool.schema.name}" does not handle the modal state.\n` + tab.modalStatesMarkdown().join('\n'));
response.addError(`Error: Tool "${tool.schema.name}" does not handle the modal state.\n` + tab.modalStatesMarkdown().join('\n'));
else
return tool.handle(tab, params, response);
},