chore: allow multiple tabs (#129)

This commit is contained in:
Pavel Feldman
2025-04-03 19:24:17 -07:00
committed by GitHub
parent b358e47d71
commit e36d4ea695
10 changed files with 354 additions and 92 deletions

View File

@@ -36,11 +36,9 @@ export const navigate: ToolFactory = captureSnapshot => ({
},
handle: async (context, params) => {
const validatedParams = navigateSchema.parse(params);
await context.createPage();
return await context.currentPage().run(async page => {
await page.page.goto(validatedParams.url, { waitUntil: 'domcontentloaded' });
// Cap load event to 5 seconds, the page is operational at this point.
await page.page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
const currentTab = await context.ensureTab();
return await currentTab.run(async tab => {
await tab.navigate(validatedParams.url);
}, {
status: `Navigated to ${validatedParams.url}`,
captureSnapshot,
@@ -57,8 +55,8 @@ export const goBack: ToolFactory = snapshot => ({
inputSchema: zodToJsonSchema(goBackSchema),
},
handle: async context => {
return await context.currentPage().runAndWait(async page => {
await page.page.goBack();
return await context.currentTab().runAndWait(async tab => {
await tab.page.goBack();
}, {
status: 'Navigated back',
captureSnapshot: snapshot,
@@ -75,8 +73,8 @@ export const goForward: ToolFactory = snapshot => ({
inputSchema: zodToJsonSchema(goForwardSchema),
},
handle: async context => {
return await context.currentPage().runAndWait(async page => {
await page.page.goForward();
return await context.currentTab().runAndWait(async tab => {
await tab.page.goForward();
}, {
status: 'Navigated forward',
captureSnapshot: snapshot,
@@ -118,8 +116,8 @@ export const pressKey: (captureSnapshot: boolean) => Tool = captureSnapshot => (
},
handle: async (context, params) => {
const validatedParams = pressKeySchema.parse(params);
return await context.currentPage().runAndWait(async page => {
await page.page.keyboard.press(validatedParams.key);
return await context.currentTab().runAndWait(async tab => {
await tab.page.keyboard.press(validatedParams.key);
}, {
status: `Pressed key ${validatedParams.key}`,
captureSnapshot,
@@ -136,9 +134,9 @@ export const pdf: Tool = {
inputSchema: zodToJsonSchema(pdfSchema),
},
handle: async context => {
const page = context.currentPage();
const tab = context.currentTab();
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + '.pdf';
await page.page.pdf({ path: fileName });
await tab.page.pdf({ path: fileName });
return {
content: [{
type: 'text',
@@ -179,9 +177,9 @@ export const chooseFile: ToolFactory = captureSnapshot => ({
},
handle: async (context, params) => {
const validatedParams = chooseFileSchema.parse(params);
const page = context.currentPage();
return await page.runAndWait(async () => {
await page.submitFileChooser(validatedParams.paths);
const tab = context.currentTab();
return await tab.runAndWait(async () => {
await tab.submitFileChooser(validatedParams.paths);
}, {
status: `Chose files ${validatedParams.paths.join(', ')}`,
captureSnapshot,

View File

@@ -27,8 +27,8 @@ export const screenshot: Tool = {
},
handle: async context => {
const page = context.currentPage();
const screenshot = await page.page.screenshot({ type: 'jpeg', quality: 50, scale: 'css' });
const tab = context.currentTab();
const screenshot = await tab.page.screenshot({ type: 'jpeg', quality: 50, scale: 'css' });
return {
content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: 'image/jpeg' }],
};
@@ -53,8 +53,8 @@ export const moveMouse: Tool = {
handle: async (context, params) => {
const validatedParams = moveMouseSchema.parse(params);
const page = context.currentPage();
await page.page.mouse.move(validatedParams.x, validatedParams.y);
const tab = context.currentTab();
await tab.page.mouse.move(validatedParams.x, validatedParams.y);
return {
content: [{ type: 'text', text: `Moved mouse to (${validatedParams.x}, ${validatedParams.y})` }],
};
@@ -74,11 +74,11 @@ export const click: Tool = {
},
handle: async (context, params) => {
return await context.currentPage().runAndWait(async page => {
return await context.currentTab().runAndWait(async tab => {
const validatedParams = clickSchema.parse(params);
await page.page.mouse.move(validatedParams.x, validatedParams.y);
await page.page.mouse.down();
await page.page.mouse.up();
await tab.page.mouse.move(validatedParams.x, validatedParams.y);
await tab.page.mouse.down();
await tab.page.mouse.up();
}, {
status: 'Clicked mouse',
});
@@ -101,11 +101,11 @@ export const drag: Tool = {
handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
return await context.currentPage().runAndWait(async page => {
await page.page.mouse.move(validatedParams.startX, validatedParams.startY);
await page.page.mouse.down();
await page.page.mouse.move(validatedParams.endX, validatedParams.endY);
await page.page.mouse.up();
return await context.currentTab().runAndWait(async tab => {
await tab.page.mouse.move(validatedParams.startX, validatedParams.startY);
await tab.page.mouse.down();
await tab.page.mouse.move(validatedParams.endX, validatedParams.endY);
await tab.page.mouse.up();
}, {
status: `Dragged mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`,
});
@@ -126,10 +126,10 @@ export const type: Tool = {
handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
return await context.currentPage().runAndWait(async page => {
await page.page.keyboard.type(validatedParams.text);
return await context.currentTab().runAndWait(async tab => {
await tab.page.keyboard.type(validatedParams.text);
if (validatedParams.submit)
await page.page.keyboard.press('Enter');
await tab.page.keyboard.press('Enter');
}, {
status: `Typed text "${validatedParams.text}"`,
});

View File

@@ -28,7 +28,7 @@ export const snapshot: Tool = {
},
handle: async context => {
return await context.currentPage().run(async () => {}, { captureSnapshot: true });
return await context.currentTab().run(async () => {}, { captureSnapshot: true });
},
};
@@ -46,8 +46,8 @@ export const click: Tool = {
handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
return await context.currentPage().runAndWaitWithSnapshot(async page => {
const locator = page.lastSnapshot().refLocator(validatedParams.ref);
return await context.currentTab().runAndWaitWithSnapshot(async tab => {
const locator = tab.lastSnapshot().refLocator(validatedParams.ref);
await locator.click();
}, {
status: `Clicked "${validatedParams.element}"`,
@@ -71,9 +71,9 @@ export const drag: Tool = {
handle: async (context, params) => {
const validatedParams = dragSchema.parse(params);
return await context.currentPage().runAndWaitWithSnapshot(async page => {
const startLocator = page.lastSnapshot().refLocator(validatedParams.startRef);
const endLocator = page.lastSnapshot().refLocator(validatedParams.endRef);
return await context.currentTab().runAndWaitWithSnapshot(async tab => {
const startLocator = tab.lastSnapshot().refLocator(validatedParams.startRef);
const endLocator = tab.lastSnapshot().refLocator(validatedParams.endRef);
await startLocator.dragTo(endLocator);
}, {
status: `Dragged "${validatedParams.startElement}" to "${validatedParams.endElement}"`,
@@ -90,8 +90,8 @@ export const hover: Tool = {
handle: async (context, params) => {
const validatedParams = elementSchema.parse(params);
return await context.currentPage().runAndWaitWithSnapshot(async page => {
const locator = page.lastSnapshot().refLocator(validatedParams.ref);
return await context.currentTab().runAndWaitWithSnapshot(async tab => {
const locator = tab.lastSnapshot().refLocator(validatedParams.ref);
await locator.hover();
}, {
status: `Hovered over "${validatedParams.element}"`,
@@ -114,8 +114,8 @@ export const type: Tool = {
handle: async (context, params) => {
const validatedParams = typeSchema.parse(params);
return await context.currentPage().runAndWaitWithSnapshot(async page => {
const locator = page.lastSnapshot().refLocator(validatedParams.ref);
return await context.currentTab().runAndWaitWithSnapshot(async tab => {
const locator = tab.lastSnapshot().refLocator(validatedParams.ref);
if (validatedParams.slowly)
await locator.pressSequentially(validatedParams.text);
else
@@ -141,8 +141,8 @@ export const selectOption: Tool = {
handle: async (context, params) => {
const validatedParams = selectOptionSchema.parse(params);
return await context.currentPage().runAndWaitWithSnapshot(async page => {
const locator = page.lastSnapshot().refLocator(validatedParams.ref);
return await context.currentTab().runAndWaitWithSnapshot(async tab => {
const locator = tab.lastSnapshot().refLocator(validatedParams.ref);
await locator.selectOption(validatedParams.values);
}, {
status: `Selected option in "${validatedParams.element}"`,
@@ -163,9 +163,9 @@ export const screenshot: Tool = {
handle: async (context, params) => {
const validatedParams = screenshotSchema.parse(params);
const page = context.currentPage();
const tab = context.currentTab();
const options: playwright.PageScreenshotOptions = validatedParams.raw ? { type: 'png', scale: 'css' } : { type: 'jpeg', quality: 50, scale: 'css' };
const screenshot = await page.page.screenshot(options);
const screenshot = await tab.page.screenshot(options);
return {
content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: validatedParams.raw ? 'image/png' : 'image/jpeg' }],
};

98
src/tools/tabs.ts Normal file
View File

@@ -0,0 +1,98 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import type { ToolFactory, Tool } from './tool';
export const listTabs: Tool = {
schema: {
name: 'browser_list_tabs',
description: 'List browser tabs',
inputSchema: zodToJsonSchema(z.object({})),
},
handle: async context => {
return {
content: [{
type: 'text',
text: await context.listTabs(),
}],
};
},
};
const selectTabSchema = z.object({
index: z.number().describe('The index of the tab to select'),
});
export const selectTab: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_select_tab',
description: 'Select a tab by index',
inputSchema: zodToJsonSchema(selectTabSchema),
},
handle: async (context, params) => {
const validatedParams = selectTabSchema.parse(params);
await context.selectTab(validatedParams.index);
const currentTab = await context.ensureTab();
return await currentTab.run(async () => {}, { captureSnapshot });
},
});
const newTabSchema = z.object({
url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
});
export const newTab: Tool = {
schema: {
name: 'browser_new_tab',
description: 'Open a new tab',
inputSchema: zodToJsonSchema(newTabSchema),
},
handle: async (context, params) => {
const validatedParams = newTabSchema.parse(params);
await context.newTab();
if (validatedParams.url)
await context.currentTab().navigate(validatedParams.url);
return await context.currentTab().run(async () => {}, { captureSnapshot: true });
},
};
const closeTabSchema = z.object({
index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
});
export const closeTab: ToolFactory = captureSnapshot => ({
schema: {
name: 'browser_close_tab',
description: 'Close a tab',
inputSchema: zodToJsonSchema(closeTabSchema),
},
handle: async (context, params) => {
const validatedParams = closeTabSchema.parse(params);
await context.closeTab(validatedParams.index);
const currentTab = await context.currentTab();
if (currentTab)
return await currentTab.run(async () => {}, { captureSnapshot });
return {
content: [{
type: 'text',
text: await context.listTabs(),
}],
};
},
});