chore: turn vision into capability (#679)
Fixes https://github.com/microsoft/playwright-mcp/issues/420
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const close = defineTool({
|
||||
capability: 'core',
|
||||
@@ -38,7 +38,7 @@ const close = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const resize: ToolFactory = captureSnapshot => defineTool({
|
||||
const resize = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_resize',
|
||||
@@ -66,13 +66,13 @@ const resize: ToolFactory = captureSnapshot => defineTool({
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
export default [
|
||||
close,
|
||||
resize(captureSnapshot)
|
||||
resize
|
||||
];
|
||||
|
||||
@@ -15,9 +15,9 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const handleDialog: ToolFactory = captureSnapshot => defineTool({
|
||||
const handleDialog = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
@@ -49,7 +49,7 @@ const handleDialog: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
@@ -57,6 +57,6 @@ const handleDialog: ToolFactory = captureSnapshot => defineTool({
|
||||
clearsModalState: 'dialog',
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
handleDialog(captureSnapshot),
|
||||
export default [
|
||||
handleDialog,
|
||||
];
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const uploadFile: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'files',
|
||||
const uploadFile = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_file_upload',
|
||||
@@ -47,13 +47,13 @@ const uploadFile: ToolFactory = captureSnapshot => defineTool({
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
clearsModalState: 'fileChooser',
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
uploadFile(captureSnapshot),
|
||||
export default [
|
||||
uploadFile,
|
||||
];
|
||||
|
||||
@@ -23,7 +23,7 @@ import { defineTool } from './tool.js';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const install = defineTool({
|
||||
capability: 'install',
|
||||
capability: 'core-install',
|
||||
schema: {
|
||||
name: 'browser_install',
|
||||
title: 'Install the browser specified in the config',
|
||||
|
||||
@@ -15,9 +15,13 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
|
||||
const pressKey: ToolFactory = captureSnapshot => defineTool({
|
||||
import { defineTool } from './tool.js';
|
||||
import { elementSchema } from './snapshot.js';
|
||||
import { generateLocator } from './utils.js';
|
||||
import * as javascript from '../javascript.js';
|
||||
|
||||
const pressKey = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
@@ -43,12 +47,61 @@ const pressKey: ToolFactory = captureSnapshot => defineTool({
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
pressKey(captureSnapshot),
|
||||
const typeSchema = elementSchema.extend({
|
||||
text: z.string().describe('Text to type into the element'),
|
||||
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
|
||||
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
|
||||
});
|
||||
|
||||
const type = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_type',
|
||||
title: 'Type text',
|
||||
description: 'Type text into editable element',
|
||||
inputSchema: typeSchema,
|
||||
type: 'destructive',
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const snapshot = context.currentTabOrDie().snapshotOrDie();
|
||||
const locator = snapshot.refLocator(params);
|
||||
|
||||
const code: string[] = [];
|
||||
const steps: (() => Promise<void>)[] = [];
|
||||
|
||||
if (params.slowly) {
|
||||
code.push(`// Press "${params.text}" sequentially into "${params.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(params.text)});`);
|
||||
steps.push(() => locator.pressSequentially(params.text));
|
||||
} else {
|
||||
code.push(`// Fill "${params.text}" into "${params.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(params.text)});`);
|
||||
steps.push(() => locator.fill(params.text));
|
||||
}
|
||||
|
||||
if (params.submit) {
|
||||
code.push(`// Submit text`);
|
||||
code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
|
||||
steps.push(() => locator.press('Enter'));
|
||||
}
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => steps.reduce((acc, step) => acc.then(step), Promise.resolve()),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default [
|
||||
pressKey,
|
||||
type,
|
||||
];
|
||||
|
||||
@@ -17,50 +17,14 @@
|
||||
import { z } from 'zod';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
import * as javascript from '../javascript.js';
|
||||
|
||||
const elementSchema = z.object({
|
||||
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
|
||||
});
|
||||
|
||||
const screenshot = defineTool({
|
||||
capability: 'core',
|
||||
const mouseMove = defineTool({
|
||||
capability: 'vision',
|
||||
schema: {
|
||||
name: 'browser_screen_capture',
|
||||
title: 'Take a screenshot',
|
||||
description: 'Take a screenshot of the current page',
|
||||
inputSchema: z.object({}),
|
||||
type: 'readOnly',
|
||||
},
|
||||
|
||||
handle: async context => {
|
||||
const tab = await context.ensureTab();
|
||||
const options = { type: 'jpeg' as 'jpeg', quality: 50, scale: 'css' as 'css' };
|
||||
|
||||
const code = [
|
||||
`// Take a screenshot of the current page`,
|
||||
`await page.screenshot(${javascript.formatObject(options)});`,
|
||||
];
|
||||
|
||||
const action = () => tab.page.screenshot(options).then(buffer => {
|
||||
return {
|
||||
content: [{ type: 'image' as 'image', data: buffer.toString('base64'), mimeType: 'image/jpeg' }],
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const moveMouse = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_screen_move_mouse',
|
||||
name: 'browser_mouse_move_xy',
|
||||
title: 'Move mouse',
|
||||
description: 'Move mouse to a given position',
|
||||
inputSchema: elementSchema.extend({
|
||||
@@ -86,12 +50,12 @@ const moveMouse = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const click = defineTool({
|
||||
capability: 'core',
|
||||
const mouseClick = defineTool({
|
||||
capability: 'vision',
|
||||
schema: {
|
||||
name: 'browser_screen_click',
|
||||
name: 'browser_mouse_click_xy',
|
||||
title: 'Click',
|
||||
description: 'Click left mouse button',
|
||||
description: 'Click left mouse button at a given position',
|
||||
inputSchema: elementSchema.extend({
|
||||
x: z.number().describe('X coordinate'),
|
||||
y: z.number().describe('Y coordinate'),
|
||||
@@ -121,12 +85,12 @@ const click = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const drag = defineTool({
|
||||
capability: 'core',
|
||||
const mouseDrag = defineTool({
|
||||
capability: 'vision',
|
||||
schema: {
|
||||
name: 'browser_screen_drag',
|
||||
name: 'browser_mouse_drag_xy',
|
||||
title: 'Drag mouse',
|
||||
description: 'Drag left mouse button',
|
||||
description: 'Drag left mouse button to a given position',
|
||||
inputSchema: elementSchema.extend({
|
||||
startX: z.number().describe('Start X coordinate'),
|
||||
startY: z.number().describe('Start Y coordinate'),
|
||||
@@ -163,51 +127,8 @@ const drag = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const type = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_screen_type',
|
||||
title: 'Type text',
|
||||
description: 'Type text',
|
||||
inputSchema: z.object({
|
||||
text: z.string().describe('Text to type into the element'),
|
||||
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
|
||||
}),
|
||||
type: 'destructive',
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const tab = context.currentTabOrDie();
|
||||
|
||||
const code = [
|
||||
`// Type ${params.text}`,
|
||||
`await page.keyboard.type('${params.text}');`,
|
||||
];
|
||||
|
||||
const action = async () => {
|
||||
await tab.page.keyboard.type(params.text);
|
||||
if (params.submit)
|
||||
await tab.page.keyboard.press('Enter');
|
||||
};
|
||||
|
||||
if (params.submit) {
|
||||
code.push(`// Submit text`);
|
||||
code.push(`await page.keyboard.press('Enter');`);
|
||||
}
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default [
|
||||
screenshot,
|
||||
moveMouse,
|
||||
click,
|
||||
drag,
|
||||
type,
|
||||
mouseMove,
|
||||
mouseClick,
|
||||
mouseDrag,
|
||||
];
|
||||
@@ -15,9 +15,9 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const navigate: ToolFactory = captureSnapshot => defineTool({
|
||||
const navigate = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
@@ -41,14 +41,14 @@ const navigate: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const goBack: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'history',
|
||||
const goBack = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_navigate_back',
|
||||
title: 'Go back',
|
||||
@@ -67,14 +67,14 @@ const goBack: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const goForward: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'history',
|
||||
const goForward = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_navigate_forward',
|
||||
title: 'Go forward',
|
||||
@@ -91,14 +91,14 @@ const goForward: ToolFactory = captureSnapshot => defineTool({
|
||||
];
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
navigate(captureSnapshot),
|
||||
goBack(captureSnapshot),
|
||||
goForward(captureSnapshot),
|
||||
export default [
|
||||
navigate,
|
||||
goBack,
|
||||
goForward,
|
||||
];
|
||||
|
||||
@@ -79,7 +79,7 @@ const screenshot = defineTool({
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: true,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -41,7 +41,7 @@ const snapshot = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const elementSchema = z.object({
|
||||
export const elementSchema = z.object({
|
||||
element: z.string().describe('Human-readable element description used to obtain permission to interact with the element'),
|
||||
ref: z.string().describe('Exact target element reference from the page snapshot'),
|
||||
});
|
||||
@@ -144,54 +144,6 @@ const hover = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const typeSchema = elementSchema.extend({
|
||||
text: z.string().describe('Text to type into the element'),
|
||||
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
|
||||
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
|
||||
});
|
||||
|
||||
const type = defineTool({
|
||||
capability: 'core',
|
||||
schema: {
|
||||
name: 'browser_type',
|
||||
title: 'Type text',
|
||||
description: 'Type text into editable element',
|
||||
inputSchema: typeSchema,
|
||||
type: 'destructive',
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const snapshot = context.currentTabOrDie().snapshotOrDie();
|
||||
const locator = snapshot.refLocator(params);
|
||||
|
||||
const code: string[] = [];
|
||||
const steps: (() => Promise<void>)[] = [];
|
||||
|
||||
if (params.slowly) {
|
||||
code.push(`// Press "${params.text}" sequentially into "${params.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(params.text)});`);
|
||||
steps.push(() => locator.pressSequentially(params.text));
|
||||
} else {
|
||||
code.push(`// Fill "${params.text}" into "${params.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(params.text)});`);
|
||||
steps.push(() => locator.fill(params.text));
|
||||
}
|
||||
|
||||
if (params.submit) {
|
||||
code.push(`// Submit text`);
|
||||
code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
|
||||
steps.push(() => locator.press('Enter'));
|
||||
}
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => steps.reduce((acc, step) => acc.then(step), Promise.resolve()),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const selectOptionSchema = elementSchema.extend({
|
||||
values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
|
||||
});
|
||||
@@ -229,6 +181,5 @@ export default [
|
||||
click,
|
||||
drag,
|
||||
hover,
|
||||
type,
|
||||
selectOption,
|
||||
];
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const listTabs = defineTool({
|
||||
capability: 'tabs',
|
||||
capability: 'core-tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_list',
|
||||
@@ -44,8 +44,8 @@ const listTabs = defineTool({
|
||||
},
|
||||
});
|
||||
|
||||
const selectTab: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'tabs',
|
||||
const selectTab = defineTool({
|
||||
capability: 'core-tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_select',
|
||||
@@ -65,14 +65,14 @@ const selectTab: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const newTab: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'tabs',
|
||||
const newTab = defineTool({
|
||||
capability: 'core-tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_new',
|
||||
@@ -94,14 +94,14 @@ const newTab: ToolFactory = captureSnapshot => defineTool({
|
||||
];
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const closeTab: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'tabs',
|
||||
const closeTab = defineTool({
|
||||
capability: 'core-tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_close',
|
||||
@@ -120,15 +120,15 @@ const closeTab: ToolFactory = captureSnapshot => defineTool({
|
||||
];
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
export default [
|
||||
listTabs,
|
||||
newTab(captureSnapshot),
|
||||
selectTab(captureSnapshot),
|
||||
closeTab(captureSnapshot),
|
||||
newTab,
|
||||
selectTab,
|
||||
closeTab,
|
||||
];
|
||||
|
||||
@@ -61,8 +61,6 @@ export type Tool<Input extends InputType = InputType> = {
|
||||
handle: (context: Context, params: z.output<Input>) => Promise<ToolResult>;
|
||||
};
|
||||
|
||||
export type ToolFactory = (snapshot: boolean) => Tool<any>;
|
||||
|
||||
export function defineTool<Input extends InputType>(tool: Tool<Input>): Tool<Input> {
|
||||
return tool;
|
||||
}
|
||||
|
||||
@@ -15,10 +15,10 @@
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { defineTool, type ToolFactory } from './tool.js';
|
||||
import { defineTool } from './tool.js';
|
||||
|
||||
const wait: ToolFactory = captureSnapshot => defineTool({
|
||||
capability: 'wait',
|
||||
const wait = defineTool({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_wait_for',
|
||||
@@ -40,7 +40,7 @@ const wait: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
if (params.time) {
|
||||
code.push(`await new Promise(f => setTimeout(f, ${params.time!} * 1000));`);
|
||||
await new Promise(f => setTimeout(f, Math.min(10000, params.time! * 1000)));
|
||||
await new Promise(f => setTimeout(f, Math.min(30000, params.time! * 1000)));
|
||||
}
|
||||
|
||||
const tab = context.currentTabOrDie();
|
||||
@@ -59,12 +59,12 @@ const wait: ToolFactory = captureSnapshot => defineTool({
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
wait(captureSnapshot),
|
||||
export default [
|
||||
wait,
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user