chore: infer tool params (#241)

Moves the `schema.parse` call to the calling side of the handler, so we
don't have to duplicate it everywhere.
This commit is contained in:
Simon Knott
2025-04-22 13:24:38 +02:00
committed by GitHub
parent 9578a5b2af
commit c80f7cf222
20 changed files with 212 additions and 273 deletions

View File

@@ -18,21 +18,20 @@ import path from 'path';
import os from 'os';
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';
import { sanitizeForFilePath } from './utils';
import { generateLocator } from '../context';
import * as javascript from '../javascript';
import type * as playwright from 'playwright';
import type { Tool } from './tool';
import { defineTool } from './tool';
const snapshot: Tool = {
const snapshot = defineTool({
capability: 'core',
schema: {
name: 'browser_snapshot',
description: 'Capture accessibility snapshot of the current page, this is better than screenshot',
inputSchema: zodToJsonSchema(z.object({})),
inputSchema: z.object({}),
},
handle: async context => {
@@ -44,28 +43,27 @@ const snapshot: Tool = {
waitForNetwork: false,
};
},
};
});
const elementSchema = z.object({
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
ref: z.string().describe('Exact target element reference from the page snapshot'),
});
const click: Tool = {
const click = defineTool({
capability: 'core',
schema: {
name: 'browser_click',
description: 'Perform click on a web page',
inputSchema: zodToJsonSchema(elementSchema),
inputSchema: elementSchema,
},
handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
const tab = context.currentTabOrDie();
const locator = tab.snapshotOrDie().refLocator(validatedParams.ref);
const locator = tab.snapshotOrDie().refLocator(params.ref);
const code = [
`// Click ${validatedParams.element}`,
`// Click ${params.element}`,
`await page.${await generateLocator(locator)}.click();`
];
@@ -76,31 +74,28 @@ const click: Tool = {
waitForNetwork: true,
};
},
};
const dragSchema = z.object({
startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
startRef: z.string().describe('Exact source element reference from the page snapshot'),
endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
endRef: z.string().describe('Exact target element reference from the page snapshot'),
});
const drag: Tool = {
const drag = defineTool({
capability: 'core',
schema: {
name: 'browser_drag',
description: 'Perform drag and drop between two elements',
inputSchema: zodToJsonSchema(dragSchema),
inputSchema: z.object({
startElement: z.string().describe('Human-readable source element description used to obtain the permission to interact with the element'),
startRef: z.string().describe('Exact source element reference from the page snapshot'),
endElement: z.string().describe('Human-readable target element description used to obtain the permission to interact with the element'),
endRef: z.string().describe('Exact target element reference from the page snapshot'),
}),
},
handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie();
const startLocator = snapshot.refLocator(validatedParams.startRef);
const endLocator = snapshot.refLocator(validatedParams.endRef);
const startLocator = snapshot.refLocator(params.startRef);
const endLocator = snapshot.refLocator(params.endRef);
const code = [
`// Drag ${validatedParams.startElement} to ${validatedParams.endElement}`,
`// Drag ${params.startElement} to ${params.endElement}`,
`await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});`
];
@@ -111,23 +106,22 @@ const drag: Tool = {
waitForNetwork: true,
};
},
};
});
const hover: Tool = {
const hover = defineTool({
capability: 'core',
schema: {
name: 'browser_hover',
description: 'Hover over element on page',
inputSchema: zodToJsonSchema(elementSchema),
inputSchema: elementSchema,
},
handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref);
const locator = snapshot.refLocator(params.ref);
const code = [
`// Hover over ${validatedParams.element}`,
`// Hover over ${params.element}`,
`await page.${await generateLocator(locator)}.hover();`
];
@@ -138,7 +132,7 @@ const hover: Tool = {
waitForNetwork: true,
};
},
};
});
const typeSchema = elementSchema.extend({
text: z.string().describe('Text to type into the element'),
@@ -146,33 +140,32 @@ const typeSchema = elementSchema.extend({
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
});
const type: Tool = {
const type = defineTool({
capability: 'core',
schema: {
name: 'browser_type',
description: 'Type text into editable element',
inputSchema: zodToJsonSchema(typeSchema),
inputSchema: typeSchema,
},
handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref);
const locator = snapshot.refLocator(params.ref);
const code: string[] = [];
const steps: (() => Promise<void>)[] = [];
if (validatedParams.slowly) {
code.push(`// Press "${validatedParams.text}" sequentially into "${validatedParams.element}"`);
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(validatedParams.text)});`);
steps.push(() => locator.pressSequentially(validatedParams.text));
if (params.slowly) {
code.push(`// Press "${params.text}" sequentially into "${params.element}"`);
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(params.text)});`);
steps.push(() => locator.pressSequentially(params.text));
} else {
code.push(`// Fill "${validatedParams.text}" into "${validatedParams.element}"`);
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(validatedParams.text)});`);
steps.push(() => locator.fill(validatedParams.text));
code.push(`// Fill "${params.text}" into "${params.element}"`);
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(params.text)});`);
steps.push(() => locator.fill(params.text));
}
if (validatedParams.submit) {
if (params.submit) {
code.push(`// Submit text`);
code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
steps.push(() => locator.press('Enter'));
@@ -185,38 +178,37 @@ const type: Tool = {
waitForNetwork: true,
};
},
};
});
const selectOptionSchema = elementSchema.extend({
values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
});
const selectOption: Tool = {
const selectOption = defineTool({
capability: 'core',
schema: {
name: 'browser_select_option',
description: 'Select an option in a dropdown',
inputSchema: zodToJsonSchema(selectOptionSchema),
inputSchema: selectOptionSchema,
},
handle: async (context, params) => {
const validatedParams = selectOptionSchema.parse(params);
const snapshot = context.currentTabOrDie().snapshotOrDie();
const locator = snapshot.refLocator(validatedParams.ref);
const locator = snapshot.refLocator(params.ref);
const code = [
`// Select options [${validatedParams.values.join(', ')}] in ${validatedParams.element}`,
`await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(validatedParams.values)});`
`// Select options [${params.values.join(', ')}] in ${params.element}`,
`await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(params.values)});`
];
return {
code,
action: () => locator.selectOption(validatedParams.values).then(() => {}),
action: () => locator.selectOption(params.values).then(() => {}),
captureSnapshot: true,
waitForNetwork: true,
};
},
};
});
const screenshotSchema = z.object({
raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
@@ -229,28 +221,27 @@ const screenshotSchema = z.object({
path: ['ref', 'element']
});
const screenshot: Tool = {
const screenshot = defineTool({
capability: 'core',
schema: {
name: 'browser_take_screenshot',
description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
inputSchema: zodToJsonSchema(screenshotSchema),
inputSchema: screenshotSchema,
},
handle: async (context, params) => {
const validatedParams = screenshotSchema.parse(params);
const tab = context.currentTabOrDie();
const snapshot = tab.snapshotOrDie();
const fileType = validatedParams.raw ? 'png' : 'jpeg';
const fileType = params.raw ? 'png' : 'jpeg';
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`;
const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
const isElementScreenshot = validatedParams.element && validatedParams.ref;
const isElementScreenshot = params.element && params.ref;
const code = [
`// Screenshot ${isElementScreenshot ? validatedParams.element : 'viewport'} and save it as ${fileName}`,
`// Screenshot ${isElementScreenshot ? params.element : 'viewport'} and save it as ${fileName}`,
];
const locator = validatedParams.ref ? snapshot.refLocator(validatedParams.ref) : null;
const locator = params.ref ? snapshot.refLocator(params.ref) : null;
if (locator)
code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
@@ -275,7 +266,7 @@ const screenshot: Tool = {
waitForNetwork: false,
};
}
};
});
export default [