chore: move state to tab, do not cache snapshot (#730)

This commit is contained in:
Pavel Feldman
2025-07-22 07:53:33 -07:00
committed by GitHub
parent cfcca40b90
commit 468c84eb8f
23 changed files with 291 additions and 355 deletions

View File

@@ -15,7 +15,7 @@
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool, defineTool } from './tool.js';
const close = defineTool({
capability: 'core',
@@ -38,7 +38,7 @@ const close = defineTool({
},
});
const resize = defineTool({
const resize = defineTabTool({
capability: 'core',
schema: {
name: 'browser_resize',
@@ -51,9 +51,7 @@ const resize = defineTool({
type: 'readOnly',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
handle: async (tab, params) => {
const code = [
`// Resize browser window to ${params.width}x${params.height}`,
`await page.setViewportSize({ width: ${params.width}, height: ${params.height} });`

View File

@@ -15,9 +15,9 @@
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
const console = defineTool({
const console = defineTabTool({
capability: 'core',
schema: {
name: 'browser_console_messages',
@@ -26,8 +26,8 @@ const console = defineTool({
inputSchema: z.object({}),
type: 'readOnly',
},
handle: async context => {
const messages = context.currentTabOrDie().consoleMessages();
handle: async tab => {
const messages = tab.consoleMessages();
const log = messages.map(message => message.toString()).join('\n');
return {
code: [`// <internal code to get console messages>`],

View File

@@ -15,9 +15,9 @@
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
const handleDialog = defineTool({
const handleDialog = defineTabTool({
capability: 'core',
schema: {
@@ -31,8 +31,8 @@ const handleDialog = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const dialogState = context.modalStates().find(state => state.type === 'dialog');
handle: async (tab, params) => {
const dialogState = tab.modalStates().find(state => state.type === 'dialog');
if (!dialogState)
throw new Error('No dialog visible');
@@ -41,7 +41,7 @@ const handleDialog = defineTool({
else
await dialogState.dialog.dismiss();
context.clearModalState(dialogState);
tab.clearModalState(dialogState);
const code = [
`// <internal code to handle "${dialogState.dialog.type()}" dialog>`,

View File

@@ -16,7 +16,7 @@
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
import * as javascript from '../javascript.js';
import { generateLocator } from './utils.js';
@@ -28,7 +28,7 @@ const evaluateSchema = z.object({
ref: z.string().optional().describe('Exact target element reference from the page snapshot'),
});
const evaluate = defineTool({
const evaluate = defineTabTool({
capability: 'core',
schema: {
name: 'browser_evaluate',
@@ -38,14 +38,12 @@ const evaluate = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
handle: async (tab, params) => {
const code: string[] = [];
let locator: playwright.Locator | undefined;
if (params.ref && params.element) {
const snapshot = tab.snapshotOrDie();
locator = snapshot.refLocator({ ref: params.ref, element: params.element });
locator = await tab.refLocator({ ref: params.ref, element: params.element });
code.push(`await page.${await generateLocator(locator)}.evaluate(${javascript.quote(params.function)});`);
} else {
code.push(`await page.evaluate(${javascript.quote(params.function)});`);

View File

@@ -15,9 +15,9 @@
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
const uploadFile = defineTool({
const uploadFile = defineTabTool({
capability: 'core',
schema: {
@@ -30,8 +30,8 @@ const uploadFile = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const modalState = context.modalStates().find(state => state.type === 'fileChooser');
handle: async (tab, params) => {
const modalState = tab.modalStates().find(state => state.type === 'fileChooser');
if (!modalState)
throw new Error('No file chooser visible');
@@ -41,7 +41,7 @@ const uploadFile = defineTool({
const action = async () => {
await modalState.fileChooser.setFiles(params.paths);
context.clearModalState(modalState);
tab.clearModalState(modalState);
};
return {

View File

@@ -16,12 +16,12 @@
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
import { elementSchema } from './snapshot.js';
import { generateLocator } from './utils.js';
import * as javascript from '../javascript.js';
const pressKey = defineTool({
const pressKey = defineTabTool({
capability: 'core',
schema: {
@@ -34,9 +34,7 @@ const pressKey = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
handle: async (tab, params) => {
const code = [
`// Press ${params.key}`,
`await page.keyboard.press('${params.key}');`,
@@ -59,7 +57,7 @@ const typeSchema = elementSchema.extend({
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
});
const type = defineTool({
const type = defineTabTool({
capability: 'core',
schema: {
name: 'browser_type',
@@ -69,9 +67,8 @@ const type = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(params);
handle: async (tab, params) => {
const locator = await tab.refLocator(params);
const code: string[] = [];
const steps: (() => Promise<void>)[] = [];

View File

@@ -15,13 +15,13 @@
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
const elementSchema = z.object({
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
});
const mouseMove = defineTool({
const mouseMove = defineTabTool({
capability: 'vision',
schema: {
name: 'browser_mouse_move_xy',
@@ -34,8 +34,7 @@ const mouseMove = defineTool({
type: 'readOnly',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
handle: async (tab, params) => {
const code = [
`// Move mouse to (${params.x}, ${params.y})`,
`await page.mouse.move(${params.x}, ${params.y});`,
@@ -50,7 +49,7 @@ const mouseMove = defineTool({
},
});
const mouseClick = defineTool({
const mouseClick = defineTabTool({
capability: 'vision',
schema: {
name: 'browser_mouse_click_xy',
@@ -63,8 +62,7 @@ const mouseClick = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
handle: async (tab, params) => {
const code = [
`// Click mouse at coordinates (${params.x}, ${params.y})`,
`await page.mouse.move(${params.x}, ${params.y});`,
@@ -85,7 +83,7 @@ const mouseClick = defineTool({
},
});
const mouseDrag = defineTool({
const mouseDrag = defineTabTool({
capability: 'vision',
schema: {
name: 'browser_mouse_drag_xy',
@@ -100,9 +98,7 @@ const mouseDrag = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
handle: async (tab, params) => {
const code = [
`// Drag mouse from (${params.startX}, ${params.startY}) to (${params.endX}, ${params.endY})`,
`await page.mouse.move(${params.startX}, ${params.startY});`,

View File

@@ -15,7 +15,7 @@
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTool, defineTabTool } from './tool.js';
const navigate = defineTool({
capability: 'core',
@@ -47,7 +47,7 @@ const navigate = defineTool({
},
});
const goBack = defineTool({
const goBack = defineTabTool({
capability: 'core',
schema: {
name: 'browser_navigate_back',
@@ -57,8 +57,7 @@ const goBack = defineTool({
type: 'readOnly',
},
handle: async context => {
const tab = await context.ensureTab();
handle: async tab => {
await tab.page.goBack();
const code = [
`// Navigate back`,
@@ -73,7 +72,7 @@ const goBack = defineTool({
},
});
const goForward = defineTool({
const goForward = defineTabTool({
capability: 'core',
schema: {
name: 'browser_navigate_forward',
@@ -82,8 +81,7 @@ const goForward = defineTool({
inputSchema: z.object({}),
type: 'readOnly',
},
handle: async context => {
const tab = context.currentTabOrDie();
handle: async tab => {
await tab.page.goForward();
const code = [
`// Navigate forward`,

View File

@@ -15,11 +15,11 @@
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
import type * as playwright from 'playwright';
const requests = defineTool({
const requests = defineTabTool({
capability: 'core',
schema: {
@@ -30,8 +30,8 @@ const requests = defineTool({
type: 'readOnly',
},
handle: async context => {
const requests = context.currentTabOrDie().requests();
handle: async tab => {
const requests = tab.requests();
const log = [...requests.entries()].map(([request, response]) => renderRequest(request, response)).join('\n');
return {
code: [`// <internal code to list network requests>`],

View File

@@ -15,7 +15,7 @@
*/
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
import * as javascript from '../javascript.js';
import { outputFile } from '../config.js';
@@ -24,7 +24,7 @@ const pdfSchema = z.object({
filename: z.string().optional().describe('File name to save the pdf to. Defaults to `page-{timestamp}.pdf` if not specified.'),
});
const pdf = defineTool({
const pdf = defineTabTool({
capability: 'pdf',
schema: {
@@ -35,9 +35,8 @@ const pdf = defineTool({
type: 'readOnly',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
const fileName = await outputFile(context.config, params.filename ?? `page-${new Date().toISOString()}.pdf`);
handle: async (tab, params) => {
const fileName = await outputFile(tab.context.config, params.filename ?? `page-${new Date().toISOString()}.pdf`);
const code = [
`// Save page as ${fileName}`,

View File

@@ -16,7 +16,7 @@
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool } from './tool.js';
import * as javascript from '../javascript.js';
import { outputFile } from '../config.js';
import { generateLocator } from './utils.js';
@@ -41,7 +41,7 @@ const screenshotSchema = z.object({
path: ['fullPage']
});
const screenshot = defineTool({
const screenshot = defineTabTool({
capability: 'core',
schema: {
name: 'browser_take_screenshot',
@@ -51,10 +51,9 @@ const screenshot = defineTool({
type: 'readOnly',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
handle: async (tab, params) => {
const fileType = params.raw ? 'png' : 'jpeg';
const fileName = await outputFile(context.config, params.filename ?? `page-${new Date().toISOString()}.${fileType}`);
const fileName = await outputFile(tab.context.config, params.filename ?? `page-${new Date().toISOString()}.${fileType}`);
const options: playwright.PageScreenshotOptions = {
type: fileType,
quality: fileType === 'png' ? undefined : 50,
@@ -70,14 +69,14 @@ const screenshot = defineTool({
];
// Only get snapshot when element screenshot is needed
const locator = params.ref ? tab.snapshotOrDie().refLocator({ element: params.element || '', ref: params.ref }) : null;
const locator = params.ref ? await tab.refLocator({ element: params.element || '', ref: params.ref }) : null;
if (locator)
code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
else
code.push(`await page.screenshot(${javascript.formatObject(options)});`);
const includeBase64 = context.clientSupportsImages();
const includeBase64 = tab.context.config.imageResponses !== 'omit';
const action = async () => {
const screenshot = locator ? await locator.screenshot(options) : await tab.page.screenshot(options);
return {

View File

@@ -16,7 +16,7 @@
import { z } from 'zod';
import { defineTool } from './tool.js';
import { defineTabTool, defineTool } from './tool.js';
import * as javascript from '../javascript.js';
import { generateLocator } from './utils.js';
@@ -51,7 +51,7 @@ const clickSchema = elementSchema.extend({
button: z.enum(['left', 'right', 'middle']).optional().describe('Button to click, defaults to left'),
});
const click = defineTool({
const click = defineTabTool({
capability: 'core',
schema: {
name: 'browser_click',
@@ -61,9 +61,8 @@ const click = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const tab = context.currentTabOrDie();
const locator = tab.snapshotOrDie().refLocator(params);
handle: async (tab, params) => {
const locator = await tab.refLocator(params);
const button = params.button;
const buttonAttr = button ? `{ button: '${button}' }` : '';
@@ -85,7 +84,7 @@ const click = defineTool({
},
});
const drag = defineTool({
const drag = defineTabTool({
capability: 'core',
schema: {
name: 'browser_drag',
@@ -100,10 +99,11 @@ const drag = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const snapshot = context.currentTabOrDie().snapshotOrDie();
const startLocator = snapshot.refLocator({ ref: params.startRef, element: params.startElement });
const endLocator = snapshot.refLocator({ ref: params.endRef, element: params.endElement });
handle: async (tab, params) => {
const [startLocator, endLocator] = await tab.refLocators([
{ ref: params.startRef, element: params.startElement },
{ ref: params.endRef, element: params.endElement },
]);
const code = [
`// Drag ${params.startElement} to ${params.endElement}`,
@@ -119,7 +119,7 @@ const drag = defineTool({
},
});
const hover = defineTool({
const hover = defineTabTool({
capability: 'core',
schema: {
name: 'browser_hover',
@@ -129,9 +129,8 @@ const hover = defineTool({
type: 'readOnly',
},
handle: async (context, params) => {
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(params);
handle: async (tab, params) => {
const locator = await tab.refLocator(params);
const code = [
`// Hover over ${params.element}`,
@@ -151,7 +150,7 @@ const selectOptionSchema = elementSchema.extend({
values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
});
const selectOption = defineTool({
const selectOption = defineTabTool({
capability: 'core',
schema: {
name: 'browser_select_option',
@@ -161,9 +160,8 @@ const selectOption = defineTool({
type: 'destructive',
},
handle: async (context, params) => {
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(params);
handle: async (tab, params) => {
const locator = await tab.refLocator(params);
const code = [
`// Select options [${params.values.join(', ')}] in ${params.element}`,

View File

@@ -37,7 +37,7 @@ const listTabs = defineTool({
resultOverride: {
content: [{
type: 'text',
text: await context.listTabsMarkdown(),
text: (await context.listTabsMarkdown()).join('\n'),
}],
},
};
@@ -85,9 +85,9 @@ const newTab = defineTool({
},
handle: async (context, params) => {
await context.newTab();
const tab = await context.newTab();
if (params.url)
await context.currentTabOrDie().navigate(params.url);
await tab.navigate(params.url);
const code = [
`// <internal code to open a new tab>`,

View File

@@ -19,6 +19,7 @@ import type { z } from 'zod';
import type { Context } from '../context.js';
import type * as playwright from 'playwright';
import type { ToolCapability } from '../../config.js';
import type { Tab } from '../tab.js';
export type ToolSchema<Input extends InputType> = {
name: string;
@@ -64,3 +65,25 @@ export type Tool<Input extends InputType = InputType> = {
export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
return tool;
}
export type TabTool<Input extends InputType = InputType> = {
capability: ToolCapability;
schema: ToolSchema<Input>;
clearsModalState?: ModalState['type'];
handle: (tab: Tab, params: z.output<Input>) => Promise<ToolResult>;
};
export function defineTabTool<Input extends InputType>(tool: TabTool<Input>): Tool<Input> {
return {
...tool,
handle: async (context, params) => {
const tab = context.currentTabOrDie();
const modalStates = tab.modalStates().map(state => state.type);
if (tool.clearsModalState && !modalStates.includes(tool.clearsModalState))
throw new Error(`The tool "${tool.schema.name}" can only be used when there is related modal state present.\n` + tab.modalStatesMarkdown().join('\n'));
if (!tool.clearsModalState && modalStates.length)
throw new Error(`Tool "${tool.schema.name}" does not handle the modal state.\n` + tab.modalStatesMarkdown().join('\n'));
return tool.handle(tab, params);
},
};
}

View File

@@ -18,10 +18,9 @@
import { asLocator } from 'playwright-core/lib/utils';
import type * as playwright from 'playwright';
import type { Context } from '../context.js';
import type { Tab } from '../tab.js';
export async function waitForCompletion<R>(context: Context, tab: Tab, callback: () => Promise<R>): Promise<R> {
export async function waitForCompletion<R>(tab: Tab, callback: () => Promise<R>): Promise<R> {
const requests = new Set<playwright.Request>();
let frameNavigated = false;
let waitCallback: () => void = () => {};
@@ -65,7 +64,7 @@ export async function waitForCompletion<R>(context: Context, tab: Tab, callback:
if (!requests.size && !frameNavigated)
waitCallback();
await waitBarrier;
await context.waitForTimeout(1000);
await tab.waitForTimeout(1000);
return result;
} finally {
dispose();