chore: group tools, prepare for capabilities (#134)

This commit is contained in:
Pavel Feldman
2025-04-04 15:22:00 -07:00
committed by GitHub
parent fc0cccf4a5
commit 707ebbf4d4
14 changed files with 500 additions and 361 deletions

View File

@@ -14,79 +14,16 @@
* limitations under the License.
*/
import os from 'os';
import path from 'path';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { sanitizeForFilePath } from './utils';
import type { ToolFactory, Tool } from './tool';
const navigateSchema = z.object({
url: z.string().describe('The URL to navigate to'),
});
export const navigate: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_navigate',
description: 'Navigate to a URL',
inputSchema: zodToJsonSchema(navigateSchema),
},
handle: async (context, params) => {
const validatedParams = navigateSchema.parse(params);
const currentTab = await context.ensureTab();
return await currentTab.run(async tab => {
await tab.navigate(validatedParams.url);
}, {
status: `Navigated to ${validatedParams.url}`,
captureSnapshot,
});
},
});
const goBackSchema = z.object({});
export const goBack: ToolFactory = snapshot => ({
schema: {
name: 'browser_go_back',
description: 'Go back to the previous page',
inputSchema: zodToJsonSchema(goBackSchema),
},
handle: async context => {
return await context.currentTab().runAndWait(async tab => {
await tab.page.goBack();
}, {
status: 'Navigated back',
captureSnapshot: snapshot,
});
},
});
const goForwardSchema = z.object({});
export const goForward: ToolFactory = snapshot => ({
schema: {
name: 'browser_go_forward',
description: 'Go forward to the next page',
inputSchema: zodToJsonSchema(goForwardSchema),
},
handle: async context => {
return await context.currentTab().runAndWait(async tab => {
await tab.page.goForward();
}, {
status: 'Navigated forward',
captureSnapshot: snapshot,
});
},
});
import type { Tool } from './tool';
const waitSchema = z.object({
time: z.number().describe('The time to wait in seconds'),
});
export const wait: Tool = {
const wait: Tool = {
schema: {
name: 'browser_wait',
description: 'Wait for a specified time in seconds',
@@ -104,51 +41,9 @@ export const wait: Tool = {
},
};
const pressKeySchema = z.object({
key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
});
export const pressKey: (captureSnapshot: boolean) => Tool = captureSnapshot => ({
schema: {
name: 'browser_press_key',
description: 'Press a key on the keyboard',
inputSchema: zodToJsonSchema(pressKeySchema),
},
handle: async (context, params) => {
const validatedParams = pressKeySchema.parse(params);
return await context.currentTab().runAndWait(async tab => {
await tab.page.keyboard.press(validatedParams.key);
}, {
status: `Pressed key ${validatedParams.key}`,
captureSnapshot,
});
},
});
const pdfSchema = z.object({});
export const pdf: Tool = {
schema: {
name: 'browser_save_as_pdf',
description: 'Save page as PDF',
inputSchema: zodToJsonSchema(pdfSchema),
},
handle: async context => {
const tab = context.currentTab();
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + '.pdf';
await tab.page.pdf({ path: fileName });
return {
content: [{
type: 'text',
text: `Saved as ${fileName}`,
}],
};
},
};
const closeSchema = z.object({});
export const close: Tool = {
const close: Tool = {
schema: {
name: 'browser_close',
description: 'Close the page',
@@ -165,42 +60,7 @@ export const close: Tool = {
},
};
const chooseFileSchema = z.object({
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
});
export const chooseFile: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_choose_file',
description: 'Choose one or multiple files to upload',
inputSchema: zodToJsonSchema(chooseFileSchema),
},
handle: async (context, params) => {
const validatedParams = chooseFileSchema.parse(params);
const tab = context.currentTab();
return await tab.runAndWait(async () => {
await tab.submitFileChooser(validatedParams.paths);
}, {
status: `Chose files ${validatedParams.paths.join(', ')}`,
captureSnapshot,
noClearFileChooser: true,
});
},
});
export const install: Tool = {
schema: {
name: 'browser_install',
description: 'Install the browser specified in the config. Call this if you get an error about the browser not being installed.',
inputSchema: zodToJsonSchema(z.object({})),
},
handle: async context => {
const channel = await context.install();
return {
content: [{
type: 'text',
text: `Browser ${channel} installed`,
}],
};
},
};
export default [
close,
wait,
];

47
src/tools/fileChooser.ts Normal file
View File

@@ -0,0 +1,47 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import type { ToolFactory } from './tool';
const chooseFileSchema = z.object({
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
});
const chooseFile: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_choose_file',
description: 'Choose one or multiple files to upload',
inputSchema: zodToJsonSchema(chooseFileSchema),
},
handle: async (context, params) => {
const validatedParams = chooseFileSchema.parse(params);
const tab = context.currentTab();
return await tab.runAndWait(async () => {
await tab.submitFileChooser(validatedParams.paths);
}, {
status: `Chose files ${validatedParams.paths.join(', ')}`,
captureSnapshot,
noClearFileChooser: true,
});
},
});
export default (captureSnapshot: boolean) => [
chooseFile(captureSnapshot),
];

60
src/tools/install.ts Normal file
View File

@@ -0,0 +1,60 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { fork } from 'child_process';
import path from 'path';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import type { Tool } from './tool';
const install: Tool = {
schema: {
name: 'browser_install',
description: 'Install the browser specified in the config. Call this if you get an error about the browser not being installed.',
inputSchema: zodToJsonSchema(z.object({})),
},
handle: async context => {
const channel = context.options.launchOptions?.channel ?? context.options.browserName ?? 'chrome';
const cli = path.join(require.resolve('playwright/package.json'), '..', 'cli.js');
const child = fork(cli, ['install', channel], {
stdio: 'pipe',
});
const output: string[] = [];
child.stdout?.on('data', data => output.push(data.toString()));
child.stderr?.on('data', data => output.push(data.toString()));
await new Promise<void>((resolve, reject) => {
child.on('close', code => {
if (code === 0)
resolve();
else
reject(new Error(`Failed to install browser: ${output.join('')}`));
});
});
return {
content: [{
type: 'text',
text: `Browser ${channel} installed`,
}],
};
},
};
export default [
install,
];

45
src/tools/keyboard.ts Normal file
View File

@@ -0,0 +1,45 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import zodToJsonSchema from 'zod-to-json-schema';
import type { ToolFactory } from './tool';
const pressKeySchema = z.object({
key: z.string().describe('Name of the key to press or a character to generate, such as `ArrowLeft` or `a`'),
});
const pressKey: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_press_key',
description: 'Press a key on the keyboard',
inputSchema: zodToJsonSchema(pressKeySchema),
},
handle: async (context, params) => {
const validatedParams = pressKeySchema.parse(params);
return await context.currentTab().runAndWait(async tab => {
await tab.page.keyboard.press(validatedParams.key);
}, {
status: `Pressed key ${validatedParams.key}`,
captureSnapshot,
});
},
});
export default (captureSnapshot: boolean) => [
pressKey(captureSnapshot),
];

84
src/tools/navigate.ts Normal file
View File

@@ -0,0 +1,84 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import type { ToolFactory } from './tool';
const navigateSchema = z.object({
url: z.string().describe('The URL to navigate to'),
});
const navigate: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_navigate',
description: 'Navigate to a URL',
inputSchema: zodToJsonSchema(navigateSchema),
},
handle: async (context, params) => {
const validatedParams = navigateSchema.parse(params);
const currentTab = await context.ensureTab();
return await currentTab.run(async tab => {
await tab.navigate(validatedParams.url);
}, {
status: `Navigated to ${validatedParams.url}`,
captureSnapshot,
});
},
});
const goBackSchema = z.object({});
const goBack: ToolFactory = snapshot => ({
schema: {
name: 'browser_navigate_back',
description: 'Go back to the previous page',
inputSchema: zodToJsonSchema(goBackSchema),
},
handle: async context => {
return await context.currentTab().runAndWait(async tab => {
await tab.page.goBack();
}, {
status: 'Navigated back',
captureSnapshot: snapshot,
});
},
});
const goForwardSchema = z.object({});
const goForward: ToolFactory = snapshot => ({
schema: {
name: 'browser_navigate_forward',
description: 'Go forward to the next page',
inputSchema: zodToJsonSchema(goForwardSchema),
},
handle: async context => {
return await context.currentTab().runAndWait(async tab => {
await tab.page.goForward();
}, {
status: 'Navigated forward',
captureSnapshot: snapshot,
});
},
});
export default (captureSnapshot: boolean) => [
navigate(captureSnapshot),
goBack(captureSnapshot),
goForward(captureSnapshot),
];

50
src/tools/pdf.ts Normal file
View File

@@ -0,0 +1,50 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import os from 'os';
import path from 'path';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { sanitizeForFilePath } from './utils';
import type { Tool } from './tool';
const pdfSchema = z.object({});
const pdf: Tool = {
schema: {
name: 'browser_pdf_save',
description: 'Save page as PDF',
inputSchema: zodToJsonSchema(pdfSchema),
},
handle: async context => {
const tab = context.currentTab();
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + '.pdf';
await tab.page.pdf({ path: fileName });
return {
content: [{
type: 'text',
text: `Saved as ${fileName}`,
}],
};
},
};
export default [
pdf,
];

View File

@@ -19,9 +19,9 @@ import { zodToJsonSchema } from 'zod-to-json-schema';
import type { Tool } from './tool';
export const screenshot: Tool = {
const screenshot: Tool = {
schema: {
name: 'browser_screenshot',
name: 'browser_screen_capture',
description: 'Take a screenshot of the current page',
inputSchema: zodToJsonSchema(z.object({})),
},
@@ -44,9 +44,9 @@ const moveMouseSchema = elementSchema.extend({
y: z.number().describe('Y coordinate'),
});
export const moveMouse: Tool = {
const moveMouse: Tool = {
schema: {
name: 'browser_move_mouse',
name: 'browser_screen_move_mouse',
description: 'Move mouse to a given position',
inputSchema: zodToJsonSchema(moveMouseSchema),
},
@@ -66,9 +66,9 @@ const clickSchema = elementSchema.extend({
y: z.number().describe('Y coordinate'),
});
export const click: Tool = {
const click: Tool = {
schema: {
name: 'browser_click',
name: 'browser_screen_click',
description: 'Click left mouse button',
inputSchema: zodToJsonSchema(clickSchema),
},
@@ -92,9 +92,9 @@ const dragSchema = elementSchema.extend({
endY: z.number().describe('End Y coordinate'),
});
export const drag: Tool = {
const drag: Tool = {
schema: {
name: 'browser_drag',
name: 'browser_screen_drag',
description: 'Drag left mouse button',
inputSchema: zodToJsonSchema(dragSchema),
},
@@ -117,9 +117,9 @@ const typeSchema = z.object({
submit: z.boolean().optional().describe('Whether to submit entered text (press Enter after)'),
});
export const type: Tool = {
const type: Tool = {
schema: {
name: 'browser_type',
name: 'browser_screen_type',
description: 'Type text',
inputSchema: zodToJsonSchema(typeSchema),
},
@@ -135,3 +135,11 @@ export const type: Tool = {
});
},
};
export default [
screenshot,
moveMouse,
click,
drag,
type,
];

View File

@@ -20,7 +20,7 @@ import zodToJsonSchema from 'zod-to-json-schema';
import type * as playwright from 'playwright';
import type { Tool } from './tool';
export const snapshot: Tool = {
const snapshot: Tool = {
schema: {
name: 'browser_snapshot',
description: 'Capture accessibility snapshot of the current page, this is better than screenshot',
@@ -37,7 +37,7 @@ const elementSchema = z.object({
ref: z.string().describe('Exact target element reference from the page snapshot'),
});
export const click: Tool = {
const click: Tool = {
schema: {
name: 'browser_click',
description: 'Perform click on a web page',
@@ -62,7 +62,7 @@ const dragSchema = z.object({
endRef: z.string().describe('Exact target element reference from the page snapshot'),
});
export const drag: Tool = {
const drag: Tool = {
schema: {
name: 'browser_drag',
description: 'Perform drag and drop between two elements',
@@ -81,7 +81,7 @@ export const drag: Tool = {
},
};
export const hover: Tool = {
const hover: Tool = {
schema: {
name: 'browser_hover',
description: 'Hover over element on page',
@@ -105,7 +105,7 @@ const typeSchema = elementSchema.extend({
slowly: z.boolean().optional().describe('Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.'),
});
export const type: Tool = {
const type: Tool = {
schema: {
name: 'browser_type',
description: 'Type text into editable element',
@@ -132,7 +132,7 @@ const selectOptionSchema = elementSchema.extend({
values: z.array(z.string()).describe('Array of values to select in the dropdown. This can be a single value or multiple values.'),
});
export const selectOption: Tool = {
const selectOption: Tool = {
schema: {
name: 'browser_select_option',
description: 'Select an option in a dropdown',
@@ -154,7 +154,7 @@ const screenshotSchema = z.object({
raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
});
export const screenshot: Tool = {
const screenshot: Tool = {
schema: {
name: 'browser_take_screenshot',
description: `Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.`,
@@ -171,3 +171,13 @@ export const screenshot: Tool = {
};
},
};
export default [
snapshot,
click,
drag,
hover,
type,
selectOption,
screenshot,
];

View File

@@ -19,9 +19,9 @@ import { zodToJsonSchema } from 'zod-to-json-schema';
import type { ToolFactory, Tool } from './tool';
export const listTabs: Tool = {
const listTabs: Tool = {
schema: {
name: 'browser_list_tabs',
name: 'browser_tab_list',
description: 'List browser tabs',
inputSchema: zodToJsonSchema(z.object({})),
},
@@ -39,9 +39,9 @@ const selectTabSchema = z.object({
index: z.number().describe('The index of the tab to select'),
});
export const selectTab: ToolFactory = captureSnapshot => ({
const selectTab: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_select_tab',
name: 'browser_tab_select',
description: 'Select a tab by index',
inputSchema: zodToJsonSchema(selectTabSchema),
},
@@ -57,9 +57,9 @@ const newTabSchema = z.object({
url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
});
export const newTab: Tool = {
const newTab: Tool = {
schema: {
name: 'browser_new_tab',
name: 'browser_tab_new',
description: 'Open a new tab',
inputSchema: zodToJsonSchema(newTabSchema),
},
@@ -76,9 +76,9 @@ const closeTabSchema = z.object({
index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
});
export const closeTab: ToolFactory = captureSnapshot => ({
const closeTab: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_close_tab',
name: 'browser_tab_close',
description: 'Close a tab',
inputSchema: zodToJsonSchema(closeTabSchema),
},
@@ -96,3 +96,10 @@ export const closeTab: ToolFactory = captureSnapshot => ({
};
},
});
export default (captureSnapshot: boolean) => [
listTabs,
newTab,
selectTab(captureSnapshot),
closeTab(captureSnapshot),
];