chore: extract page snapshot, prep for multipage (#120)

This commit is contained in:
Pavel Feldman
2025-04-02 11:42:39 -07:00
committed by GitHub
parent 23f392dd91
commit 89627fd23a
7 changed files with 194 additions and 114 deletions

View File

@@ -20,7 +20,7 @@ import path from 'path';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { captureAriaSnapshot, runAndWait, sanitizeForFilePath } from './utils';
import { sanitizeForFilePath } from './utils';
import type { ToolFactory, Tool } from './tool';
@@ -28,7 +28,7 @@ const navigateSchema = z.object({
url: z.string().describe('The URL to navigate to'),
});
export const navigate: ToolFactory = snapshot => ({
export const navigate: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_navigate',
description: 'Navigate to a URL',
@@ -36,18 +36,15 @@ export const navigate: ToolFactory = snapshot => ({
},
handle: async (context, params) => {
const validatedParams = navigateSchema.parse(params);
const page = await context.createPage();
await page.goto(validatedParams.url, { waitUntil: 'domcontentloaded' });
// Cap load event to 5 seconds, the page is operational at this point.
await page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
if (snapshot)
return captureAriaSnapshot(context);
return {
content: [{
type: 'text',
text: `Navigated to ${validatedParams.url}`,
}],
};
await context.createPage();
return await context.run(async page => {
await page.goto(validatedParams.url, { waitUntil: 'domcontentloaded' });
// Cap load event to 5 seconds, the page is operational at this point.
await page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
}, {
status: `Navigated to ${validatedParams.url}`,
captureSnapshot,
});
},
});
@@ -60,7 +57,12 @@ export const goBack: ToolFactory = snapshot => ({
inputSchema: zodToJsonSchema(goBackSchema),
},
handle: async context => {
return await runAndWait(context, 'Navigated back', async page => page.goBack(), snapshot);
return await context.runAndWait(async page => {
await page.goBack();
}, {
status: 'Navigated back',
captureSnapshot: snapshot,
});
},
});
@@ -73,7 +75,12 @@ export const goForward: ToolFactory = snapshot => ({
inputSchema: zodToJsonSchema(goForwardSchema),
},
handle: async context => {
return await runAndWait(context, 'Navigated forward', async page => page.goForward(), snapshot);
return await context.runAndWait(async page => {
await page.goForward();
}, {
status: 'Navigated forward',
captureSnapshot: snapshot,
});
},
});
@@ -103,7 +110,7 @@ const pressKeySchema = z.object({
key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
});
export const pressKey: Tool = {
export const pressKey: (captureSnapshot: boolean) => Tool = captureSnapshot => ({
schema: {
name: 'browser_press_key',
description: 'Press a key on the keyboard',
@@ -111,11 +118,14 @@ export const pressKey: Tool = {
},
handle: async (context, params) => {
const validatedParams = pressKeySchema.parse(params);
return await runAndWait(context, `Pressed key ${validatedParams.key}`, async page => {
return await context.runAndWait(async page => {
await page.keyboard.press(validatedParams.key);
}, {
status: `Pressed key ${validatedParams.key}`,
captureSnapshot,
});
},
};
});
const pdfSchema = z.object({});
@@ -161,7 +171,7 @@ const chooseFileSchema = z.object({
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
});
export const chooseFile: ToolFactory = snapshot => ({
export const chooseFile: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_choose_file',
description: 'Choose one or multiple files to upload',
@@ -169,9 +179,13 @@ export const chooseFile: ToolFactory = snapshot => ({
},
handle: async (context, params) => {
const validatedParams = chooseFileSchema.parse(params);
return await runAndWait(context, `Chose files ${validatedParams.paths.join(', ')}`, async () => {
return await context.runAndWait(async () => {
await context.submitFileChooser(validatedParams.paths);
}, snapshot);
}, {
status: `Chose files ${validatedParams.paths.join(', ')}`,
captureSnapshot,
noClearFileChooser: true,
});
},
});

View File

@@ -17,8 +17,6 @@
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { runAndWait } from './utils';
import type { Tool } from './tool';
export const screenshot: Tool = {
@@ -76,11 +74,13 @@ export const click: Tool = {
},
handle: async (context, params) => {
return await runAndWait(context, 'Clicked mouse', async page => {
return await context.runAndWait(async page => {
const validatedParams = clickSchema.parse(params);
await page.mouse.move(validatedParams.x, validatedParams.y);
await page.mouse.down();
await page.mouse.up();
}, {
status: 'Clicked mouse',
});
},
};
@@ -101,11 +101,13 @@ export const drag: Tool = {
handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
return await runAndWait(context, `Dragged mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`, async page => {
return await context.runAndWait(async page => {
await page.mouse.move(validatedParams.startX, validatedParams.startY);
await page.mouse.down();
await page.mouse.move(validatedParams.endX, validatedParams.endY);
await page.mouse.up();
}, {
status: `Dragged mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`,
});
},
};
@@ -124,10 +126,12 @@ export const type: Tool = {
handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
return await runAndWait(context, `Typed text "${validatedParams.text}"`, async page => {
return await context.runAndWait(async page => {
await page.keyboard.type(validatedParams.text);
if (validatedParams.submit)
await page.keyboard.press('Enter');
}, {
status: `Typed text "${validatedParams.text}"`,
});
},
};

View File

@@ -17,8 +17,6 @@
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';
import { captureAriaSnapshot, runAndWait } from './utils';
import type * as playwright from 'playwright';
import type { Tool } from './tool';
@@ -30,7 +28,7 @@ export const snapshot: Tool = {
},
handle: async context => {
return await captureAriaSnapshot(context);
return await context.run(async () => {}, { captureSnapshot: true });
},
};
@@ -48,7 +46,12 @@ export const click: Tool = {
handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
return runAndWait(context, `"${validatedParams.element}" clicked`, () => context.refLocator(validatedParams.ref).click(), true);
return await context.runAndWaitWithSnapshot(async () => {
const locator = context.lastSnapshot().refLocator(validatedParams.ref);
await locator.click();
}, {
status: `Clicked "${validatedParams.element}"`,
});
},
};
@@ -68,11 +71,13 @@ export const drag: Tool = {
handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
return runAndWait(context, `Dragged "${validatedParams.startElement}" to "${validatedParams.endElement}"`, async () => {
const startLocator = context.refLocator(validatedParams.startRef);
const endLocator = context.refLocator(validatedParams.endRef);
return await context.runAndWaitWithSnapshot(async () => {
const startLocator = context.lastSnapshot().refLocator(validatedParams.startRef);
const endLocator = context.lastSnapshot().refLocator(validatedParams.endRef);
await startLocator.dragTo(endLocator);
}, true);
}, {
status: `Dragged "${validatedParams.startElement}" to "${validatedParams.endElement}"`,
});
},
};
@@ -85,7 +90,12 @@ export const hover: Tool = {
handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
return runAndWait(context, `Hovered over "${validatedParams.element}"`, () => context.refLocator(validatedParams.ref).hover(), true);
return context.runAndWaitWithSnapshot(async () => {
const locator = context.lastSnapshot().refLocator(validatedParams.ref);
await locator.hover();
}, {
status: `Hovered over "${validatedParams.element}"`,
});
},
};
@@ -103,12 +113,14 @@ export const type: Tool = {
handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
return await runAndWait(context, `Typed "${validatedParams.text}" into "${validatedParams.element}"`, async () => {
const locator = context.refLocator(validatedParams.ref);
return await context.runAndWaitWithSnapshot(async () => {
const locator = context.lastSnapshot().refLocator(validatedParams.ref);
await locator.fill(validatedParams.text);
if (validatedParams.submit)
await locator.press('Enter');
}, true);
}, {
status: `Typed "${validatedParams.text}" into "${validatedParams.element}"`,
});
},
};
@@ -125,10 +137,12 @@ export const selectOption: Tool = {
handle: async (context, params) => {
const validatedParams = selectOptionSchema.parse(params);
return await runAndWait(context, `Selected option in "${validatedParams.element}"`, async () => {
const locator = context.refLocator(validatedParams.ref);
return await context.runAndWaitWithSnapshot(async () => {
const locator = context.lastSnapshot().refLocator(validatedParams.ref);
await locator.selectOption(validatedParams.values);
}, true);
}, {
status: `Selected option in "${validatedParams.element}"`,
});
},
};

View File

@@ -15,10 +15,8 @@
*/
import type * as playwright from 'playwright';
import type { ToolResult } from './tool';
import type { Context } from '../context';
async function waitForCompletion<R>(page: playwright.Page, callback: () => Promise<R>): Promise<R> {
export async function waitForCompletion<R>(page: playwright.Page, callback: () => Promise<R>): Promise<R> {
const requests = new Set<playwright.Request>();
let frameNavigated = false;
let waitCallback: () => void = () => {};
@@ -71,42 +69,6 @@ async function waitForCompletion<R>(page: playwright.Page, callback: () => Promi
}
}
export async function runAndWait(context: Context, status: string, callback: (page: playwright.Page) => Promise<any>, snapshot: boolean = false): Promise<ToolResult> {
const page = context.existingPage();
const dismissFileChooser = context.hasFileChooser();
await waitForCompletion(page, () => callback(page));
if (dismissFileChooser)
context.clearFileChooser();
const result: ToolResult = snapshot ? await captureAriaSnapshot(context, status) : {
content: [{ type: 'text', text: status }],
};
return result;
}
export async function captureAriaSnapshot(context: Context, status: string = ''): Promise<ToolResult> {
const page = context.existingPage();
const lines = [];
if (status)
lines.push(`${status}`);
lines.push(
'',
`- Page URL: ${page.url()}`,
`- Page Title: ${await page.title()}`
);
if (context.hasFileChooser())
lines.push(`- There is a file chooser visible that requires browser_choose_file to be called`);
lines.push(
`- Page Snapshot`,
'```yaml',
await context.allFramesSnapshot(),
'```',
''
);
return {
content: [{ type: 'text', text: lines.join('\n') }],
};
}
export function sanitizeForFilePath(s: string) {
return s.replace(/[\x00-\x2C\x2E-\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]+/g, '-');
}