chore: turn vision into capability (#679)

Fixes https://github.com/microsoft/playwright-mcp/issues/420
This commit is contained in:
Pavel Feldman
2025-07-16 16:40:00 -07:00
committed by GitHub
parent 012c906500
commit d61aa16fee
23 changed files with 366 additions and 575 deletions

View File

@@ -34,7 +34,6 @@ test('test snapshot tool list', async ({ client }) => {
'browser_navigate_forward',
'browser_navigate',
'browser_network_requests',
'browser_pdf_save',
'browser_press_key',
'browser_resize',
'browser_snapshot',
@@ -47,45 +46,33 @@ test('test snapshot tool list', async ({ client }) => {
]));
});
test('test vision tool list', async ({ visionClient }) => {
const { tools: visionTools } = await visionClient.listTools();
expect(new Set(visionTools.map(t => t.name))).toEqual(new Set([
'browser_close',
'browser_console_messages',
'browser_file_upload',
'browser_handle_dialog',
'browser_install',
'browser_navigate_back',
'browser_navigate_forward',
'browser_navigate',
'browser_network_requests',
'browser_pdf_save',
'browser_press_key',
'browser_resize',
'browser_screen_capture',
'browser_screen_click',
'browser_screen_drag',
'browser_screen_move_mouse',
'browser_screen_type',
'browser_tab_close',
'browser_tab_list',
'browser_tab_new',
'browser_tab_select',
'browser_wait_for',
]));
});
test('test capabilities', async ({ startClient }) => {
test('test capabilities (pdf)', async ({ startClient }) => {
const { client } = await startClient({
args: ['--caps="core"'],
args: ['--caps=pdf'],
});
const { tools } = await client.listTools();
const toolNames = tools.map(t => t.name);
expect(toolNames).not.toContain('browser_file_upload');
expect(toolNames).not.toContain('browser_pdf_save');
expect(toolNames).not.toContain('browser_screen_capture');
expect(toolNames).not.toContain('browser_screen_click');
expect(toolNames).not.toContain('browser_screen_drag');
expect(toolNames).not.toContain('browser_screen_move_mouse');
expect(toolNames).not.toContain('browser_screen_type');
expect(toolNames).toContain('browser_pdf_save');
});
test('test capabilities (vision)', async ({ startClient }) => {
const { client } = await startClient({
args: ['--caps=vision'],
});
const { tools } = await client.listTools();
const toolNames = tools.map(t => t.name);
expect(toolNames).toContain('browser_mouse_move_xy');
expect(toolNames).toContain('browser_mouse_click_xy');
expect(toolNames).toContain('browser_mouse_drag_xy');
});
test('support for legacy --vision option', async ({ startClient }) => {
const { client } = await startClient({
args: ['--vision'],
});
const { tools } = await client.listTools();
const toolNames = tools.map(t => t.name);
expect(toolNames).toContain('browser_mouse_move_xy');
expect(toolNames).toContain('browser_mouse_click_xy');
expect(toolNames).toContain('browser_mouse_drag_xy');
});

51
tests/evaluate.spec.ts Normal file
View File

@@ -0,0 +1,51 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { test, expect } from './fixtures.js';
test('browser_evaluate', async ({ client, server }) => {
expect(await client.callTool({
name: 'browser_navigate',
arguments: { url: server.HELLO_WORLD },
})).toContainTextContent(`- Page Title: Title`);
const result = await client.callTool({
name: 'browser_evaluate',
arguments: {
function: '() => document.title',
},
});
expect(result).toContainTextContent(`"Title"`);
});
test('browser_evaluate (element)', async ({ client, server }) => {
server.setContent('/', `
<body style="background-color: red">Hello, world!</body>
`, 'text/html');
await client.callTool({
name: 'browser_navigate',
arguments: { url: server.PREFIX },
});
expect(await client.callTool({
name: 'browser_evaluate',
arguments: {
function: 'element => element.style.backgroundColor',
element: 'body',
ref: 'e1',
},
})).toContainTextContent(`- Result: "red"`);
});

View File

@@ -41,7 +41,6 @@ type CDPServer = {
type TestFixtures = {
client: Client;
visionClient: Client;
startClient: (options?: { clientName?: string, args?: string[], config?: Config }) => Promise<{ client: Client, stderr: () => string }>;
wsEndpoint: string;
cdpServer: CDPServer;
@@ -61,11 +60,6 @@ export const test = baseTest.extend<TestFixtures & TestOptions, WorkerFixtures>(
await use(client);
},
visionClient: async ({ startClient }, use) => {
const { client } = await startClient({ args: ['--vision'] });
await use(client);
},
startClient: async ({ mcpHeadless, mcpBrowser, mcpMode }, use, testInfo) => {
const userDataDir = mcpMode !== 'docker' ? testInfo.outputPath('user-data-dir') : undefined;
const configDir = path.dirname(test.info().config.configFile!);

View File

@@ -19,7 +19,7 @@ import fs from 'fs';
import { test, expect } from './fixtures.js';
test('save as pdf unavailable', async ({ startClient, server }) => {
const { client } = await startClient({ args: ['--caps="no-pdf"'] });
const { client } = await startClient();
await client.callTool({
name: 'browser_navigate',
arguments: { url: server.HELLO_WORLD },
@@ -32,7 +32,7 @@ test('save as pdf unavailable', async ({ startClient, server }) => {
test('save as pdf', async ({ startClient, mcpBrowser, server }, testInfo) => {
const { client } = await startClient({
config: { outputDir: testInfo.outputPath('output') },
config: { outputDir: testInfo.outputPath('output'), capabilities: ['pdf'] },
});
test.skip(!!mcpBrowser && !['chromium', 'chrome', 'msedge'].includes(mcpBrowser), 'Save as PDF is only supported in Chromium.');
@@ -52,7 +52,7 @@ test('save as pdf (filename: output.pdf)', async ({ startClient, mcpBrowser, ser
const outputDir = testInfo.outputPath('output');
test.skip(!!mcpBrowser && !['chromium', 'chrome', 'msedge'].includes(mcpBrowser), 'Save as PDF is only supported in Chromium.');
const { client } = await startClient({
config: { outputDir },
config: { outputDir, capabilities: ['pdf'] },
});
expect(await client.callTool({