Compare commits
24 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9578a5b2af | ||
|
|
cd5aa344f1 | ||
|
|
dc955c73a3 | ||
|
|
d4f8f87b03 | ||
|
|
0c3792d231 | ||
|
|
7695717546 | ||
|
|
6a070a0dd8 | ||
|
|
6481100bdf | ||
|
|
4b261286bf | ||
|
|
7e4a964b0a | ||
|
|
cea347d067 | ||
|
|
6054290d9a | ||
|
|
6d4adfe5c6 | ||
|
|
e7c7709b33 | ||
|
|
5c2e11017d | ||
|
|
e4331313f9 | ||
|
|
bc48600a49 | ||
|
|
0d6bb2f547 | ||
|
|
795a9d578a | ||
|
|
4a19e18999 | ||
|
|
4d59e06184 | ||
|
|
6891a525b3 | ||
|
|
0f7fd1362f | ||
|
|
de08c24b96 |
29
.github/workflows/ci.yml
vendored
29
.github/workflows/ci.yml
vendored
@@ -7,8 +7,30 @@ on:
|
||||
branches: [ main ]
|
||||
|
||||
jobs:
|
||||
build-and-test:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Use Node.js 18
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '18'
|
||||
cache: 'npm'
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
- run: npm run build
|
||||
- name: Run ESLint
|
||||
run: npm run lint
|
||||
- run: npm run update-readme
|
||||
- name: Ensure no changes
|
||||
run: git diff --exit-code
|
||||
|
||||
test:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -25,8 +47,9 @@ jobs:
|
||||
- name: Playwright install
|
||||
run: npx playwright install --with-deps
|
||||
|
||||
- name: Run linting
|
||||
run: npm run lint
|
||||
- name: Install MS Edge
|
||||
if: ${{ matrix.os == 'macos-latest' }} # MS Edge is not preinstalled on macOS runners.
|
||||
run: npx playwright install msedge
|
||||
|
||||
- name: Build
|
||||
run: npm run build
|
||||
|
||||
3
.github/workflows/publish.yml
vendored
3
.github/workflows/publish.yml
vendored
@@ -15,9 +15,10 @@ jobs:
|
||||
node-version: 18
|
||||
registry-url: https://registry.npmjs.org/
|
||||
- run: npm ci
|
||||
- run: npx playwright install --with-deps
|
||||
- run: npm run build
|
||||
- run: npm run lint
|
||||
- run: npm run test
|
||||
- run: npm run ctest
|
||||
- run: npm publish --provenance
|
||||
env:
|
||||
NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}}
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
lib/
|
||||
node_modules/
|
||||
test-results/
|
||||
.vscode/mcp.json
|
||||
|
||||
22
Dockerfile
Normal file
22
Dockerfile
Normal file
@@ -0,0 +1,22 @@
|
||||
FROM node:22-bookworm-slim
|
||||
|
||||
# Set the working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package.json and package-lock.json at this stage to leverage the build cache
|
||||
COPY package*.json ./
|
||||
|
||||
# Install dependencies
|
||||
RUN npm ci
|
||||
|
||||
# Install chromium and its dependencies, but only for headless mode
|
||||
RUN npx -y playwright install --with-deps --only-shell chromium
|
||||
|
||||
# Copy the rest of the app
|
||||
COPY . .
|
||||
|
||||
# Build the app
|
||||
RUN npm run build
|
||||
|
||||
# Run in headless and only with chromium (other browsers need more dependencies not included in this image)
|
||||
ENTRYPOINT ["node", "cli.js", "--headless", "--browser", "chromium"]
|
||||
164
README.md
164
README.md
@@ -17,6 +17,8 @@ A Model Context Protocol (MCP) server that provides browser automation capabilit
|
||||
|
||||
### Example config
|
||||
|
||||
#### NPX
|
||||
|
||||
```js
|
||||
{
|
||||
"mcpServers": {
|
||||
@@ -30,7 +32,6 @@ A Model Context Protocol (MCP) server that provides browser automation capabilit
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
#### Installation in VS Code
|
||||
|
||||
Install the Playwright MCP server in VS Code using one of these buttons:
|
||||
@@ -128,6 +129,20 @@ And then in MCP client config, set the `url` to the SSE endpoint:
|
||||
}
|
||||
```
|
||||
|
||||
### Docker
|
||||
|
||||
**NOTE:** The Docker implementation only supports headless chromium at the moment.
|
||||
```js
|
||||
{
|
||||
"mcpServers": {
|
||||
"playwright": {
|
||||
"command": "docker",
|
||||
"args": ["run", "-i", "--rm", "--init", "mcp/playwright"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Tool Modes
|
||||
|
||||
The tools are available in two modes:
|
||||
@@ -154,41 +169,71 @@ To use Vision Mode, add the `--vision` flag when starting the server:
|
||||
Vision Mode works best with the computer use models that are able to interact with elements using
|
||||
X Y coordinate space, based on the provided screenshot.
|
||||
|
||||
### Build with Docker
|
||||
|
||||
You can build the Docker image yourself.
|
||||
```
|
||||
docker build -t mcp/playwright .
|
||||
```
|
||||
|
||||
### Programmatic usage with custom transports
|
||||
|
||||
```js
|
||||
import http from 'http';
|
||||
|
||||
import { createServer } from '@playwright/mcp';
|
||||
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
||||
|
||||
// ...
|
||||
http.createServer(async (req, res) => {
|
||||
// ...
|
||||
|
||||
const server = createServer({
|
||||
launchOptions: { headless: true }
|
||||
// Creates a headless Playwright MCP server with SSE transport
|
||||
const mcpServer = await createServer({ headless: true });
|
||||
const transport = new SSEServerTransport('/messages', res);
|
||||
await mcpServer.connect(transport);
|
||||
|
||||
// ...
|
||||
});
|
||||
transport = new SSEServerTransport("/messages", res);
|
||||
server.connect(transport);
|
||||
|
||||
```
|
||||
|
||||
<!--- Generated by update-readme.js -->
|
||||
|
||||
### Snapshot-based Interactions
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_snapshot**
|
||||
- Description: Capture accessibility snapshot of the current page, this is better than screenshot
|
||||
- Parameters: None
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_click**
|
||||
- Description: Perform click on a web page
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_drag**
|
||||
- Description: Perform drag and drop between two elements
|
||||
- Parameters:
|
||||
- `startElement` (string): Human-readable source element description used to obtain the permission to interact with the element
|
||||
- `startRef` (string): Exact source element reference from the page snapshot
|
||||
- `endElement` (string): Human-readable target element description used to obtain the permission to interact with the element
|
||||
- `endRef` (string): Exact target element reference from the page snapshot
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_hover**
|
||||
- Description: Hover over element on page
|
||||
- Parameters:
|
||||
- `element` (string): Human-readable element description used to obtain permission to interact with the element
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
|
||||
- **browser_drag**
|
||||
- Description: Perform drag and drop between two elements
|
||||
- Parameters:
|
||||
- `startElement` (string): Human-readable source element description used to obtain permission to interact with the element
|
||||
- `startRef` (string): Exact source element reference from the page snapshot
|
||||
- `endElement` (string): Human-readable target element description used to obtain permission to interact with the element
|
||||
- `endRef` (string): Exact target element reference from the page snapshot
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_type**
|
||||
- Description: Type text into editable element
|
||||
@@ -199,6 +244,8 @@ server.connect(transport);
|
||||
- `submit` (boolean, optional): Whether to submit entered text (press Enter after)
|
||||
- `slowly` (boolean, optional): Whether to type one character at a time. Useful for triggering key handlers in the page. By default entire text is filled in at once.
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_select_option**
|
||||
- Description: Select an option in a dropdown
|
||||
- Parameters:
|
||||
@@ -206,17 +253,25 @@ server.connect(transport);
|
||||
- `ref` (string): Exact target element reference from the page snapshot
|
||||
- `values` (array): Array of values to select in the dropdown. This can be a single value or multiple values.
|
||||
|
||||
- **browser_snapshot**
|
||||
- Description: Capture accessibility snapshot of the current page, this is better than screenshot
|
||||
- Parameters: None
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_take_screenshot**
|
||||
- Description: Take a screenshot of the current page. You can't perform actions based on the screenshot, use browser_snapshot for actions.
|
||||
- Parameters:
|
||||
- `raw` (boolean, optional): Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.
|
||||
- `element` (string, optional): Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.
|
||||
- `ref` (string, optional): Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.
|
||||
|
||||
### Vision-based Interactions
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_capture**
|
||||
- Description: Take a screenshot of the current page
|
||||
- Parameters: None
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_move_mouse**
|
||||
- Description: Move mouse to a given position
|
||||
- Parameters:
|
||||
@@ -224,9 +279,7 @@ server.connect(transport);
|
||||
- `x` (number): X coordinate
|
||||
- `y` (number): Y coordinate
|
||||
|
||||
- **browser_screen_capture**
|
||||
- Description: Take a screenshot of the current page
|
||||
- Parameters: None
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_click**
|
||||
- Description: Click left mouse button
|
||||
@@ -235,6 +288,8 @@ server.connect(transport);
|
||||
- `x` (number): X coordinate
|
||||
- `y` (number): Y coordinate
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_drag**
|
||||
- Description: Drag left mouse button
|
||||
- Parameters:
|
||||
@@ -244,33 +299,38 @@ server.connect(transport);
|
||||
- `endX` (number): End X coordinate
|
||||
- `endY` (number): End Y coordinate
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_screen_type**
|
||||
- Description: Type text
|
||||
- Parameters:
|
||||
- `text` (string): Text to type
|
||||
- `text` (string): Text to type into the element
|
||||
- `submit` (boolean, optional): Whether to submit entered text (press Enter after)
|
||||
|
||||
- **browser_press_key**
|
||||
- Description: Press a key on the keyboard
|
||||
- Parameters:
|
||||
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
|
||||
|
||||
### Tab Management
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_tab_list**
|
||||
- Description: List browser tabs
|
||||
- Parameters: None
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_tab_new**
|
||||
- Description: Open a new tab
|
||||
- Parameters:
|
||||
- `url` (string, optional): The URL to navigate to in the new tab. If not provided, the new tab will be blank.
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_tab_select**
|
||||
- Description: Select a tab by index
|
||||
- Parameters:
|
||||
- `index` (number): The index of the tab to select
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_tab_close**
|
||||
- Description: Close a tab
|
||||
- Parameters:
|
||||
@@ -278,48 +338,92 @@ server.connect(transport);
|
||||
|
||||
### Navigation
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_navigate**
|
||||
- Description: Navigate to a URL
|
||||
- Parameters:
|
||||
- `url` (string): The URL to navigate to
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_navigate_back**
|
||||
- Description: Go back to the previous page
|
||||
- Parameters: None
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_navigate_forward**
|
||||
- Description: Go forward to the next page
|
||||
- Parameters: None
|
||||
|
||||
### Keyboard
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_press_key**
|
||||
- Description: Press a key on the keyboard
|
||||
- Parameters:
|
||||
- `key` (string): Name of the key to press or a character to generate, such as `ArrowLeft` or `a`
|
||||
|
||||
### Console
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_console_messages**
|
||||
- Description: Returns all console messages
|
||||
- Parameters: None
|
||||
|
||||
### Files and Media
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_file_upload**
|
||||
- Description: Choose one or multiple files to upload
|
||||
- Description: Upload one or multiple files
|
||||
- Parameters:
|
||||
- `paths` (array): The absolute paths to the files to upload. Can be a single file or multiple files.
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_pdf_save**
|
||||
- Description: Save page as PDF
|
||||
- Parameters: None
|
||||
|
||||
### Utilities
|
||||
|
||||
- **browser_wait**
|
||||
- Description: Wait for a specified time in seconds
|
||||
- Parameters:
|
||||
- `time` (number): The time to wait in seconds (capped at 10 seconds)
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_close**
|
||||
- Description: Close the page
|
||||
- Parameters: None
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_wait**
|
||||
- Description: Wait for a specified time in seconds
|
||||
- Parameters:
|
||||
- `time` (number): The time to wait in seconds
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_resize**
|
||||
- Description: Resize the browser window
|
||||
- Parameters:
|
||||
- `width` (number): Width of the browser window
|
||||
- `height` (number): Height of the browser window
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_install**
|
||||
- Description: Install the browser specified in the config. Call this if you get an error about the browser not being installed.
|
||||
- Parameters: None
|
||||
|
||||
<!-- NOTE: This has been generated via update-readme.js -->
|
||||
|
||||
- **browser_handle_dialog**
|
||||
- Description: Handle a dialog
|
||||
- Parameters:
|
||||
- `accept` (boolean): Whether to accept the dialog.
|
||||
- `promptText` (string, optional): The text of the prompt in case of a prompt dialog.
|
||||
|
||||
<!--- End of generated section -->
|
||||
|
||||
47
examples.md
Normal file
47
examples.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# Examples
|
||||
|
||||
Below are some examples of prompts to use with Playwright MCP. Draw inspiration from them to create your own. If you come up with a new prompt that works well, feel free to let us know in an issue or pull request.
|
||||
|
||||
## Write a regression test for a bug report
|
||||
|
||||
We have received a user report about a bug on our website https://debs-obrien.github.io/playwright-movies-app. I'd like you to verify if this bug exists by trying to follow the steps. If you see different behaviour, explain how it's different. If you see the same behaviour, please write me a Playwright regression test that encapsulates the buggy user flow. While writing the test, draw inspiration from the Playwright code you ran. Here's the user report:
|
||||
|
||||
Howdy. I love your movies site, but there's something fishy going on. I'm looking at The Garfield Movie - a spectacular film - but somehow it's showing the wrong length. The movie is 90mins long, I measured myself yesterday! But somehow your site is claiming 101mins. Am I looking at it wrong, or is there a bug?
|
||||
|
||||
**Output:**
|
||||
|
||||
```ts
|
||||
import { test, expect } from '@playwright/test';
|
||||
|
||||
/**
|
||||
* Regression test for bug: The Garfield Movie showing incorrect runtime
|
||||
* User reported actual runtime is 90 minutes, but site shows 101 minutes
|
||||
*/
|
||||
test('verify The Garfield Movie runtime bug', async ({ page }) => {
|
||||
// Navigate to the movies app home page
|
||||
await page.goto('https://debs-obrien.github.io/playwright-movies-app');
|
||||
|
||||
// Wait for page to load completely
|
||||
await expect(page.getByRole('heading', { name: 'Popular', level: 1 })).toBeVisible();
|
||||
|
||||
// Look for The Garfield Movie and click on it
|
||||
const garfieldMovieLink = page.getByRole('link', { name: /poster of The Garfield Movie/i });
|
||||
await expect(garfieldMovieLink).toBeVisible();
|
||||
await garfieldMovieLink.click();
|
||||
|
||||
// Verify we're on the correct movie page
|
||||
await expect(page.getByRole('heading', { name: 'The Garfield Movie', level: 1 })).toBeVisible();
|
||||
|
||||
// Check the runtime information
|
||||
// The bug: Expected runtime should be 90 min, but site shows 101 min
|
||||
const movieInfoText = await page.locator('text=English / 101 min. / 2024').textContent();
|
||||
|
||||
// This test will fail because of the bug (which is what we want to demonstrate)
|
||||
// Once fixed, this assertion should be updated to the correct runtime (90 min)
|
||||
expect(movieInfoText).toContain('90 min');
|
||||
|
||||
// Alternative assertion that verifies the incorrect runtime is still present
|
||||
// Uncomment this and comment the above assertion to verify the bug exists
|
||||
// expect(movieInfoText).toContain('101 min');
|
||||
});
|
||||
```
|
||||
61
index.d.ts
vendored
61
index.d.ts
vendored
@@ -15,33 +15,46 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import type { LaunchOptions } from 'playwright';
|
||||
import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
||||
|
||||
type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
|
||||
|
||||
type Options = {
|
||||
/**
|
||||
* Path to the user data directory.
|
||||
*/
|
||||
userDataDir?: string;
|
||||
|
||||
/**
|
||||
* Launch options for the browser.
|
||||
*/
|
||||
launchOptions?: LaunchOptions;
|
||||
|
||||
/**
|
||||
* Use screenshots instead of snapshots. Less accurate, reliable and overall
|
||||
* slower, but contains visual representation of the page.
|
||||
* @default false
|
||||
*/
|
||||
vision?: boolean;
|
||||
|
||||
/**
|
||||
* Capabilities to enable.
|
||||
*/
|
||||
capabilities?: ToolCapability[];
|
||||
/**
|
||||
* The browser to use (e.g., 'chrome', 'chromium', 'firefox', 'webkit', 'msedge').
|
||||
*/
|
||||
browser?: string;
|
||||
/**
|
||||
* Path to a user data directory for browser profile persistence.
|
||||
*/
|
||||
userDataDir?: string;
|
||||
/**
|
||||
* Whether to run the browser in headless mode (default: true).
|
||||
*/
|
||||
headless?: boolean;
|
||||
/**
|
||||
* Path to a custom browser executable.
|
||||
*/
|
||||
executablePath?: string;
|
||||
/**
|
||||
* Chrome DevTools Protocol endpoint to connect to an existing browser instance.
|
||||
*/
|
||||
cdpEndpoint?: string;
|
||||
/**
|
||||
* Enable vision capabilities (e.g., visual automation or OCR).
|
||||
*/
|
||||
vision?: boolean;
|
||||
/**
|
||||
* List of enabled tool capabilities. Possible values:
|
||||
* - 'core': Core browser automation features.
|
||||
* - 'tabs': Tab management features.
|
||||
* - 'pdf': PDF generation and manipulation.
|
||||
* - 'history': Browser history access.
|
||||
* - 'wait': Wait and timing utilities.
|
||||
* - 'files': File upload/download support.
|
||||
* - 'install': Browser installation utilities.
|
||||
*/
|
||||
capabilities?: ToolCapability[];
|
||||
};
|
||||
|
||||
export function createServer(options?: Options): Server;
|
||||
export declare function createServer(options?: Options): Promise<Server>;
|
||||
export {};
|
||||
|
||||
4
package-lock.json
generated
4
package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "@playwright/mcp",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.14",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "@playwright/mcp",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.14",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.6.1",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@playwright/mcp",
|
||||
"version": "0.0.11",
|
||||
"version": "0.0.14",
|
||||
"description": "Playwright Tools for MCP",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
@@ -17,8 +17,12 @@
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"lint": "eslint .",
|
||||
"update-readme": "node utils/update-readme.js",
|
||||
"watch": "tsc --watch",
|
||||
"test": "playwright test",
|
||||
"ctest": "playwright test --project=chrome",
|
||||
"ftest": "playwright test --project=firefox",
|
||||
"wtest": "playwright test --project=webkit",
|
||||
"clean": "rm -rf lib",
|
||||
"npm-publish": "npm run clean && npm run build && npm run test && npm publish"
|
||||
},
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
|
||||
import { defineConfig } from '@playwright/test';
|
||||
|
||||
import type { Project } from '@playwright/test';
|
||||
|
||||
export default defineConfig({
|
||||
testDir: './tests',
|
||||
fullyParallel: true,
|
||||
@@ -23,5 +25,11 @@ export default defineConfig({
|
||||
retries: process.env.CI ? 2 : 0,
|
||||
workers: process.env.CI ? 1 : undefined,
|
||||
reporter: 'list',
|
||||
projects: [{ name: 'default' }],
|
||||
projects: [
|
||||
{ name: 'chrome' },
|
||||
{ name: 'msedge', use: { mcpBrowser: 'msedge' } },
|
||||
{ name: 'chromium', use: { mcpBrowser: 'chromium' } },
|
||||
{ name: 'firefox', use: { mcpBrowser: 'firefox' } },
|
||||
{ name: 'webkit', use: { mcpBrowser: 'webkit' } },
|
||||
].filter(Boolean) as Project[],
|
||||
});
|
||||
|
||||
279
src/context.ts
279
src/context.ts
@@ -18,7 +18,10 @@ import * as playwright from 'playwright';
|
||||
import yaml from 'yaml';
|
||||
|
||||
import { waitForCompletion } from './tools/utils';
|
||||
import { ToolResult } from './tools/tool';
|
||||
import { ManualPromise } from './manualPromise';
|
||||
|
||||
import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
|
||||
import type { ModalState, Tool, ToolActionResult } from './tools/tool';
|
||||
|
||||
export type ContextOptions = {
|
||||
browserName?: 'chromium' | 'firefox' | 'webkit';
|
||||
@@ -30,29 +33,51 @@ export type ContextOptions = {
|
||||
|
||||
type PageOrFrameLocator = playwright.Page | playwright.FrameLocator;
|
||||
|
||||
type RunOptions = {
|
||||
captureSnapshot?: boolean;
|
||||
waitForCompletion?: boolean;
|
||||
status?: string;
|
||||
noClearFileChooser?: boolean;
|
||||
type PendingAction = {
|
||||
dialogShown: ManualPromise<void>;
|
||||
};
|
||||
|
||||
export class Context {
|
||||
readonly tools: Tool[];
|
||||
readonly options: ContextOptions;
|
||||
private _browser: playwright.Browser | undefined;
|
||||
private _browserContext: playwright.BrowserContext | undefined;
|
||||
private _tabs: Tab[] = [];
|
||||
private _currentTab: Tab | undefined;
|
||||
private _modalStates: (ModalState & { tab: Tab })[] = [];
|
||||
private _pendingAction: PendingAction | undefined;
|
||||
|
||||
constructor(options: ContextOptions) {
|
||||
constructor(tools: Tool[], options: ContextOptions) {
|
||||
this.tools = tools;
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
modalStates(): ModalState[] {
|
||||
return this._modalStates;
|
||||
}
|
||||
|
||||
setModalState(modalState: ModalState, inTab: Tab) {
|
||||
this._modalStates.push({ ...modalState, tab: inTab });
|
||||
}
|
||||
|
||||
clearModalState(modalState: ModalState) {
|
||||
this._modalStates = this._modalStates.filter(state => state !== modalState);
|
||||
}
|
||||
|
||||
modalStatesMarkdown(): string[] {
|
||||
const result: string[] = ['### Modal state'];
|
||||
for (const state of this._modalStates) {
|
||||
const tool = this.tools.find(tool => tool.clearsModalState === state.type);
|
||||
result.push(`- [${state.description}]: can be handled by the "${tool?.schema.name}" tool`);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
tabs(): Tab[] {
|
||||
return this._tabs;
|
||||
}
|
||||
|
||||
currentTab(): Tab {
|
||||
currentTabOrDie(): Tab {
|
||||
if (!this._currentTab)
|
||||
throw new Error('No current snapshot available. Capture a snapshot of navigate to a new location first.');
|
||||
return this._currentTab;
|
||||
@@ -77,10 +102,10 @@ export class Context {
|
||||
return this._currentTab!;
|
||||
}
|
||||
|
||||
async listTabs(): Promise<string> {
|
||||
async listTabsMarkdown(): Promise<string> {
|
||||
if (!this._tabs.length)
|
||||
return 'No tabs open';
|
||||
const lines: string[] = ['Open tabs:'];
|
||||
return '### No tabs open';
|
||||
const lines: string[] = ['### Open tabs'];
|
||||
for (let i = 0; i < this._tabs.length; i++) {
|
||||
const tab = this._tabs[i];
|
||||
const title = await tab.page.title();
|
||||
@@ -92,9 +117,121 @@ export class Context {
|
||||
}
|
||||
|
||||
async closeTab(index: number | undefined) {
|
||||
const tab = index === undefined ? this.currentTab() : this._tabs[index - 1];
|
||||
await tab.page.close();
|
||||
return await this.listTabs();
|
||||
const tab = index === undefined ? this._currentTab : this._tabs[index - 1];
|
||||
await tab?.page.close();
|
||||
return await this.listTabsMarkdown();
|
||||
}
|
||||
|
||||
async run(tool: Tool, params: Record<string, unknown> | undefined) {
|
||||
// Tab management is done outside of the action() call.
|
||||
const toolResult = await tool.handle(this, params);
|
||||
const { code, action, waitForNetwork, captureSnapshot, resultOverride } = toolResult;
|
||||
const racingAction = action ? () => this._raceAgainstModalDialogs(action) : undefined;
|
||||
|
||||
if (resultOverride)
|
||||
return resultOverride;
|
||||
|
||||
if (!this._currentTab) {
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: 'No open pages available. Use the "browser_navigate" tool to navigate to a page first.',
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
const tab = this.currentTabOrDie();
|
||||
// TODO: race against modal dialogs to resolve clicks.
|
||||
let actionResult: { content?: (ImageContent | TextContent)[] } | undefined;
|
||||
try {
|
||||
if (waitForNetwork)
|
||||
actionResult = await waitForCompletion(this, tab.page, async () => racingAction?.()) ?? undefined;
|
||||
else
|
||||
actionResult = await racingAction?.() ?? undefined;
|
||||
} finally {
|
||||
if (captureSnapshot && !this._javaScriptBlocked())
|
||||
await tab.captureSnapshot();
|
||||
}
|
||||
|
||||
const result: string[] = [];
|
||||
result.push(`- Ran Playwright code:
|
||||
\`\`\`js
|
||||
${code.join('\n')}
|
||||
\`\`\`
|
||||
`);
|
||||
|
||||
if (this.modalStates().length) {
|
||||
result.push(...this.modalStatesMarkdown());
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: result.join('\n'),
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
if (this.tabs().length > 1)
|
||||
result.push(await this.listTabsMarkdown(), '');
|
||||
|
||||
if (this.tabs().length > 1)
|
||||
result.push('### Current tab');
|
||||
|
||||
result.push(
|
||||
`- Page URL: ${tab.page.url()}`,
|
||||
`- Page Title: ${await tab.page.title()}`
|
||||
);
|
||||
|
||||
if (captureSnapshot && tab.hasSnapshot())
|
||||
result.push(tab.snapshotOrDie().text());
|
||||
|
||||
const content = actionResult?.content ?? [];
|
||||
|
||||
return {
|
||||
content: [
|
||||
...content,
|
||||
{
|
||||
type: 'text',
|
||||
text: result.join('\n'),
|
||||
}
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
async waitForTimeout(time: number) {
|
||||
if (this._currentTab && !this._javaScriptBlocked())
|
||||
await this._currentTab.page.evaluate(() => new Promise(f => setTimeout(f, 1000)));
|
||||
else
|
||||
await new Promise(f => setTimeout(f, time));
|
||||
}
|
||||
|
||||
private async _raceAgainstModalDialogs(action: () => Promise<ToolActionResult>): Promise<ToolActionResult> {
|
||||
this._pendingAction = {
|
||||
dialogShown: new ManualPromise(),
|
||||
};
|
||||
|
||||
let result: ToolActionResult | undefined;
|
||||
try {
|
||||
await Promise.race([
|
||||
action().then(r => result = r),
|
||||
this._pendingAction.dialogShown,
|
||||
]);
|
||||
} finally {
|
||||
this._pendingAction = undefined;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private _javaScriptBlocked(): boolean {
|
||||
return this._modalStates.some(state => state.type === 'dialog');
|
||||
}
|
||||
|
||||
dialogShown(tab: Tab, dialog: playwright.Dialog) {
|
||||
this.setModalState({
|
||||
type: 'dialog',
|
||||
description: `"${dialog.type()}" dialog with message "${dialog.message()}"`,
|
||||
dialog,
|
||||
}, tab);
|
||||
this._pendingAction?.dialogShown.resolve();
|
||||
}
|
||||
|
||||
private _onPageCreated(page: playwright.Page) {
|
||||
@@ -105,6 +242,7 @@ export class Context {
|
||||
}
|
||||
|
||||
private _onPageClosed(tab: Tab) {
|
||||
this._modalStates = this._modalStates.filter(state => state.tab !== tab);
|
||||
const index = this._tabs.indexOf(tab);
|
||||
if (index === -1)
|
||||
return;
|
||||
@@ -112,18 +250,21 @@ export class Context {
|
||||
|
||||
if (this._currentTab === tab)
|
||||
this._currentTab = this._tabs[Math.min(index, this._tabs.length - 1)];
|
||||
const browser = this._browser;
|
||||
if (this._browserContext && !this._tabs.length) {
|
||||
void this._browserContext.close().then(() => browser?.close()).catch(() => {});
|
||||
this._browser = undefined;
|
||||
this._browserContext = undefined;
|
||||
}
|
||||
if (this._browserContext && !this._tabs.length)
|
||||
void this.close();
|
||||
}
|
||||
|
||||
async close() {
|
||||
if (!this._browserContext)
|
||||
return;
|
||||
await this._browserContext.close();
|
||||
const browserContext = this._browserContext;
|
||||
const browser = this._browser;
|
||||
this._browserContext = undefined;
|
||||
this._browser = undefined;
|
||||
|
||||
await browserContext?.close().then(async () => {
|
||||
await browser?.close();
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
private async _ensureBrowserContext() {
|
||||
@@ -172,11 +313,10 @@ export class Context {
|
||||
}
|
||||
}
|
||||
|
||||
class Tab {
|
||||
export class Tab {
|
||||
readonly context: Context;
|
||||
readonly page: playwright.Page;
|
||||
private _console: playwright.ConsoleMessage[] = [];
|
||||
private _fileChooser: playwright.FileChooser | undefined;
|
||||
private _snapshot: PageSnapshot | undefined;
|
||||
private _onPageClose: (tab: Tab) => void;
|
||||
|
||||
@@ -190,13 +330,19 @@ class Tab {
|
||||
this._console.length = 0;
|
||||
});
|
||||
page.on('close', () => this._onClose());
|
||||
page.on('filechooser', chooser => this._fileChooser = chooser);
|
||||
page.on('filechooser', chooser => {
|
||||
this.context.setModalState({
|
||||
type: 'fileChooser',
|
||||
description: 'File chooser',
|
||||
fileChooser: chooser,
|
||||
}, this);
|
||||
});
|
||||
page.on('dialog', dialog => this.context.dialogShown(this, dialog));
|
||||
page.setDefaultNavigationTimeout(60000);
|
||||
page.setDefaultTimeout(5000);
|
||||
}
|
||||
|
||||
private _onClose() {
|
||||
this._fileChooser = undefined;
|
||||
this._console.length = 0;
|
||||
this._onPageClose(this);
|
||||
}
|
||||
@@ -207,44 +353,11 @@ class Tab {
|
||||
await this.page.waitForLoadState('load', { timeout: 5000 }).catch(() => {});
|
||||
}
|
||||
|
||||
async run(callback: (tab: Tab) => Promise<void>, options?: RunOptions): Promise<ToolResult> {
|
||||
try {
|
||||
if (!options?.noClearFileChooser)
|
||||
this._fileChooser = undefined;
|
||||
if (options?.waitForCompletion)
|
||||
await waitForCompletion(this.page, () => callback(this));
|
||||
else
|
||||
await callback(this);
|
||||
} finally {
|
||||
if (options?.captureSnapshot)
|
||||
this._snapshot = await PageSnapshot.create(this.page);
|
||||
}
|
||||
const tabList = this.context.tabs().length > 1 ? await this.context.listTabs() + '\n\nCurrent tab:' + '\n' : '';
|
||||
const snapshot = this._snapshot?.text({ status: options?.status, hasFileChooser: !!this._fileChooser }) ?? options?.status ?? '';
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: tabList + snapshot,
|
||||
}],
|
||||
};
|
||||
hasSnapshot(): boolean {
|
||||
return !!this._snapshot;
|
||||
}
|
||||
|
||||
async runAndWait(callback: (tab: Tab) => Promise<void>, options?: RunOptions): Promise<ToolResult> {
|
||||
return await this.run(callback, {
|
||||
waitForCompletion: true,
|
||||
...options,
|
||||
});
|
||||
}
|
||||
|
||||
async runAndWaitWithSnapshot(callback: (snapshot: PageSnapshot) => Promise<void>, options?: RunOptions): Promise<ToolResult> {
|
||||
return await this.run(tab => callback(tab.lastSnapshot()), {
|
||||
captureSnapshot: true,
|
||||
waitForCompletion: true,
|
||||
...options,
|
||||
});
|
||||
}
|
||||
|
||||
lastSnapshot(): PageSnapshot {
|
||||
snapshotOrDie(): PageSnapshot {
|
||||
if (!this._snapshot)
|
||||
throw new Error('No snapshot available');
|
||||
return this._snapshot;
|
||||
@@ -254,11 +367,8 @@ class Tab {
|
||||
return this._console;
|
||||
}
|
||||
|
||||
async submitFileChooser(paths: string[]) {
|
||||
if (!this._fileChooser)
|
||||
throw new Error('No file chooser visible');
|
||||
await this._fileChooser.setFiles(paths);
|
||||
this._fileChooser = undefined;
|
||||
async captureSnapshot() {
|
||||
this._snapshot = await PageSnapshot.create(this.page);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -275,35 +385,18 @@ class PageSnapshot {
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
text(options?: { status?: string, hasFileChooser?: boolean }): string {
|
||||
const results: string[] = [];
|
||||
if (options?.status) {
|
||||
results.push(options.status);
|
||||
results.push('');
|
||||
}
|
||||
if (options?.hasFileChooser) {
|
||||
results.push('- There is a file chooser visible that requires browser_file_upload to be called');
|
||||
results.push('');
|
||||
}
|
||||
results.push(this._text);
|
||||
return results.join('\n');
|
||||
text(): string {
|
||||
return this._text;
|
||||
}
|
||||
|
||||
private async _build(page: playwright.Page) {
|
||||
const yamlDocument = await this._snapshotFrame(page);
|
||||
const lines = [];
|
||||
lines.push(
|
||||
`- Page URL: ${page.url()}`,
|
||||
`- Page Title: ${await page.title()}`
|
||||
);
|
||||
lines.push(
|
||||
`- Page Snapshot`,
|
||||
'```yaml',
|
||||
yamlDocument.toString().trim(),
|
||||
'```',
|
||||
''
|
||||
);
|
||||
this._text = lines.join('\n');
|
||||
this._text = [
|
||||
`- Page Snapshot`,
|
||||
'```yaml',
|
||||
yamlDocument.toString({ indentSeq: false }).trim(),
|
||||
'```',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
private async _snapshotFrame(frame: playwright.Page | playwright.FrameLocator) {
|
||||
@@ -359,3 +452,7 @@ class PageSnapshot {
|
||||
return frame.locator(`aria-ref=${ref}`);
|
||||
}
|
||||
}
|
||||
|
||||
export async function generateLocator(locator: playwright.Locator): Promise<string> {
|
||||
return (locator as any)._generateLocatorString();
|
||||
}
|
||||
|
||||
81
src/index.ts
81
src/index.ts
@@ -14,8 +14,14 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
import fs from 'fs';
|
||||
|
||||
import { createServerWithTools } from './server';
|
||||
import common from './tools/common';
|
||||
import console from './tools/console';
|
||||
import dialogs from './tools/dialogs';
|
||||
import files from './tools/files';
|
||||
import install from './tools/install';
|
||||
import keyboard from './tools/keyboard';
|
||||
@@ -24,15 +30,15 @@ import pdf from './tools/pdf';
|
||||
import snapshot from './tools/snapshot';
|
||||
import tabs from './tools/tabs';
|
||||
import screen from './tools/screen';
|
||||
import { console as consoleResource } from './resources/console';
|
||||
|
||||
import type { Tool, ToolCapability } from './tools/tool';
|
||||
import type { Resource } from './resources/resource';
|
||||
import type { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
||||
import type { LaunchOptions } from 'playwright';
|
||||
|
||||
const snapshotTools: Tool[] = [
|
||||
...common(true),
|
||||
...console,
|
||||
...dialogs(true),
|
||||
...files(true),
|
||||
...install,
|
||||
...keyboard(true),
|
||||
@@ -44,6 +50,8 @@ const snapshotTools: Tool[] = [
|
||||
|
||||
const screenshotTools: Tool[] = [
|
||||
...common(false),
|
||||
...console,
|
||||
...dialogs(false),
|
||||
...files(false),
|
||||
...install,
|
||||
...keyboard(false),
|
||||
@@ -53,14 +61,11 @@ const screenshotTools: Tool[] = [
|
||||
...tabs(false),
|
||||
];
|
||||
|
||||
const resources: Resource[] = [
|
||||
consoleResource,
|
||||
];
|
||||
|
||||
type Options = {
|
||||
browserName?: 'chromium' | 'firefox' | 'webkit';
|
||||
browser?: string;
|
||||
userDataDir?: string;
|
||||
launchOptions?: LaunchOptions;
|
||||
headless?: boolean;
|
||||
executablePath?: string;
|
||||
cdpEndpoint?: string;
|
||||
vision?: boolean;
|
||||
capabilities?: ToolCapability[];
|
||||
@@ -68,17 +73,67 @@ type Options = {
|
||||
|
||||
const packageJSON = require('../package.json');
|
||||
|
||||
export function createServer(options?: Options): Server {
|
||||
export async function createServer(options?: Options): Promise<Server> {
|
||||
let browserName: 'chromium' | 'firefox' | 'webkit';
|
||||
let channel: string | undefined;
|
||||
switch (options?.browser) {
|
||||
case 'chrome':
|
||||
case 'chrome-beta':
|
||||
case 'chrome-canary':
|
||||
case 'chrome-dev':
|
||||
case 'msedge':
|
||||
case 'msedge-beta':
|
||||
case 'msedge-canary':
|
||||
case 'msedge-dev':
|
||||
browserName = 'chromium';
|
||||
channel = options.browser;
|
||||
break;
|
||||
case 'chromium':
|
||||
browserName = 'chromium';
|
||||
break;
|
||||
case 'firefox':
|
||||
browserName = 'firefox';
|
||||
break;
|
||||
case 'webkit':
|
||||
browserName = 'webkit';
|
||||
break;
|
||||
default:
|
||||
browserName = 'chromium';
|
||||
channel = 'chrome';
|
||||
}
|
||||
const userDataDir = options?.userDataDir ?? await createUserDataDir(browserName);
|
||||
|
||||
const launchOptions: LaunchOptions = {
|
||||
headless: !!(options?.headless ?? (os.platform() === 'linux' && !process.env.DISPLAY)),
|
||||
channel,
|
||||
executablePath: options?.executablePath,
|
||||
};
|
||||
|
||||
const allTools = options?.vision ? screenshotTools : snapshotTools;
|
||||
const tools = allTools.filter(tool => !options?.capabilities || tool.capability === 'core' || options.capabilities.includes(tool.capability));
|
||||
return createServerWithTools({
|
||||
name: 'Playwright',
|
||||
version: packageJSON.version,
|
||||
tools,
|
||||
resources,
|
||||
browserName: options?.browserName,
|
||||
userDataDir: options?.userDataDir ?? '',
|
||||
launchOptions: options?.launchOptions,
|
||||
resources: [],
|
||||
browserName,
|
||||
userDataDir,
|
||||
launchOptions,
|
||||
cdpEndpoint: options?.cdpEndpoint,
|
||||
});
|
||||
}
|
||||
|
||||
async function createUserDataDir(browserName: 'chromium' | 'firefox' | 'webkit') {
|
||||
let cacheDirectory: string;
|
||||
if (process.platform === 'linux')
|
||||
cacheDirectory = process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache');
|
||||
else if (process.platform === 'darwin')
|
||||
cacheDirectory = path.join(os.homedir(), 'Library', 'Caches');
|
||||
else if (process.platform === 'win32')
|
||||
cacheDirectory = process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local');
|
||||
else
|
||||
throw new Error('Unsupported platform: ' + process.platform);
|
||||
const result = path.join(cacheDirectory, 'ms-playwright', `mcp-${browserName}-profile`);
|
||||
await fs.promises.mkdir(result, { recursive: true });
|
||||
return result;
|
||||
}
|
||||
|
||||
53
src/javascript.ts
Normal file
53
src/javascript.ts
Normal file
@@ -0,0 +1,53 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// adapted from:
|
||||
// - https://github.com/microsoft/playwright/blob/76ee48dc9d4034536e3ec5b2c7ce8be3b79418a8/packages/playwright-core/src/utils/isomorphic/stringUtils.ts
|
||||
// - https://github.com/microsoft/playwright/blob/76ee48dc9d4034536e3ec5b2c7ce8be3b79418a8/packages/playwright-core/src/server/codegen/javascript.ts
|
||||
|
||||
// NOTE: this function should not be used to escape any selectors.
|
||||
export function escapeWithQuotes(text: string, char: string = '\'') {
|
||||
const stringified = JSON.stringify(text);
|
||||
const escapedText = stringified.substring(1, stringified.length - 1).replace(/\\"/g, '"');
|
||||
if (char === '\'')
|
||||
return char + escapedText.replace(/[']/g, '\\\'') + char;
|
||||
if (char === '"')
|
||||
return char + escapedText.replace(/["]/g, '\\"') + char;
|
||||
if (char === '`')
|
||||
return char + escapedText.replace(/[`]/g, '`') + char;
|
||||
throw new Error('Invalid escape char');
|
||||
}
|
||||
|
||||
export function quote(text: string) {
|
||||
return escapeWithQuotes(text, '\'');
|
||||
}
|
||||
|
||||
export function formatObject(value: any, indent = ' '): string {
|
||||
if (typeof value === 'string')
|
||||
return quote(value);
|
||||
if (Array.isArray(value))
|
||||
return `[${value.map(o => formatObject(o)).join(', ')}]`;
|
||||
if (typeof value === 'object') {
|
||||
const keys = Object.keys(value).filter(key => value[key] !== undefined).sort();
|
||||
if (!keys.length)
|
||||
return '{}';
|
||||
const tokens: string[] = [];
|
||||
for (const key of keys)
|
||||
tokens.push(`${key}: ${formatObject(value[key])}`);
|
||||
return `{\n${indent}${tokens.join(`,\n${indent}`)}\n}`;
|
||||
}
|
||||
return String(value);
|
||||
}
|
||||
127
src/manualPromise.ts
Normal file
127
src/manualPromise.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
export class ManualPromise<T = void> extends Promise<T> {
|
||||
private _resolve!: (t: T) => void;
|
||||
private _reject!: (e: Error) => void;
|
||||
private _isDone: boolean;
|
||||
|
||||
constructor() {
|
||||
let resolve: (t: T) => void;
|
||||
let reject: (e: Error) => void;
|
||||
super((f, r) => {
|
||||
resolve = f;
|
||||
reject = r;
|
||||
});
|
||||
this._isDone = false;
|
||||
this._resolve = resolve!;
|
||||
this._reject = reject!;
|
||||
}
|
||||
|
||||
isDone() {
|
||||
return this._isDone;
|
||||
}
|
||||
|
||||
resolve(t: T) {
|
||||
this._isDone = true;
|
||||
this._resolve(t);
|
||||
}
|
||||
|
||||
reject(e: Error) {
|
||||
this._isDone = true;
|
||||
this._reject(e);
|
||||
}
|
||||
|
||||
static override get [Symbol.species]() {
|
||||
return Promise;
|
||||
}
|
||||
|
||||
override get [Symbol.toStringTag]() {
|
||||
return 'ManualPromise';
|
||||
}
|
||||
}
|
||||
|
||||
export class LongStandingScope {
|
||||
private _terminateError: Error | undefined;
|
||||
private _closeError: Error | undefined;
|
||||
private _terminatePromises = new Map<ManualPromise<Error>, string[]>();
|
||||
private _isClosed = false;
|
||||
|
||||
reject(error: Error) {
|
||||
this._isClosed = true;
|
||||
this._terminateError = error;
|
||||
for (const p of this._terminatePromises.keys())
|
||||
p.resolve(error);
|
||||
}
|
||||
|
||||
close(error: Error) {
|
||||
this._isClosed = true;
|
||||
this._closeError = error;
|
||||
for (const [p, frames] of this._terminatePromises)
|
||||
p.resolve(cloneError(error, frames));
|
||||
}
|
||||
|
||||
isClosed() {
|
||||
return this._isClosed;
|
||||
}
|
||||
|
||||
static async raceMultiple<T>(scopes: LongStandingScope[], promise: Promise<T>): Promise<T> {
|
||||
return Promise.race(scopes.map(s => s.race(promise)));
|
||||
}
|
||||
|
||||
async race<T>(promise: Promise<T> | Promise<T>[]): Promise<T> {
|
||||
return this._race(Array.isArray(promise) ? promise : [promise], false) as Promise<T>;
|
||||
}
|
||||
|
||||
async safeRace<T>(promise: Promise<T>, defaultValue?: T): Promise<T> {
|
||||
return this._race([promise], true, defaultValue);
|
||||
}
|
||||
|
||||
private async _race(promises: Promise<any>[], safe: boolean, defaultValue?: any): Promise<any> {
|
||||
const terminatePromise = new ManualPromise<Error>();
|
||||
const frames = captureRawStack();
|
||||
if (this._terminateError)
|
||||
terminatePromise.resolve(this._terminateError);
|
||||
if (this._closeError)
|
||||
terminatePromise.resolve(cloneError(this._closeError, frames));
|
||||
this._terminatePromises.set(terminatePromise, frames);
|
||||
try {
|
||||
return await Promise.race([
|
||||
terminatePromise.then(e => safe ? defaultValue : Promise.reject(e)),
|
||||
...promises
|
||||
]);
|
||||
} finally {
|
||||
this._terminatePromises.delete(terminatePromise);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function cloneError(error: Error, frames: string[]) {
|
||||
const clone = new Error();
|
||||
clone.name = error.name;
|
||||
clone.message = error.message;
|
||||
clone.stack = [error.name + ':' + error.message, ...frames].join('\n');
|
||||
return clone;
|
||||
}
|
||||
|
||||
function captureRawStack(): string[] {
|
||||
const stackTraceLimit = Error.stackTraceLimit;
|
||||
Error.stackTraceLimit = 50;
|
||||
const error = new Error();
|
||||
const stack = error.stack || '';
|
||||
Error.stackTraceLimit = stackTraceLimit;
|
||||
return stack.split('\n');
|
||||
}
|
||||
@@ -15,9 +15,6 @@
|
||||
*/
|
||||
|
||||
import http from 'http';
|
||||
import fs from 'fs';
|
||||
import os from 'os';
|
||||
import path from 'path';
|
||||
|
||||
import { program } from 'commander';
|
||||
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
||||
@@ -27,7 +24,6 @@ import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
||||
import { createServer } from './index';
|
||||
import { ServerList } from './server';
|
||||
|
||||
import type { LaunchOptions } from 'playwright';
|
||||
import assert from 'assert';
|
||||
import { ToolCapability } from './tools/tool';
|
||||
|
||||
@@ -45,46 +41,11 @@ program
|
||||
.option('--user-data-dir <path>', 'Path to the user data directory')
|
||||
.option('--vision', 'Run server that uses screenshots (Aria snapshots are used by default)')
|
||||
.action(async options => {
|
||||
let browserName: 'chromium' | 'firefox' | 'webkit';
|
||||
let channel: string | undefined;
|
||||
switch (options.browser) {
|
||||
case 'chrome':
|
||||
case 'chrome-beta':
|
||||
case 'chrome-canary':
|
||||
case 'chrome-dev':
|
||||
case 'msedge':
|
||||
case 'msedge-beta':
|
||||
case 'msedge-canary':
|
||||
case 'msedge-dev':
|
||||
browserName = 'chromium';
|
||||
channel = options.browser;
|
||||
break;
|
||||
case 'chromium':
|
||||
browserName = 'chromium';
|
||||
break;
|
||||
case 'firefox':
|
||||
browserName = 'firefox';
|
||||
break;
|
||||
case 'webkit':
|
||||
browserName = 'webkit';
|
||||
break;
|
||||
default:
|
||||
browserName = 'chromium';
|
||||
channel = 'chrome';
|
||||
}
|
||||
|
||||
const launchOptions: LaunchOptions = {
|
||||
headless: options.headless ?? !process.env.DISPLAY,
|
||||
channel,
|
||||
executablePath: options.executablePath,
|
||||
};
|
||||
|
||||
const userDataDir = options.userDataDir ?? await createUserDataDir(browserName);
|
||||
|
||||
const serverList = new ServerList(() => createServer({
|
||||
browserName,
|
||||
userDataDir,
|
||||
launchOptions,
|
||||
browser: options.browser,
|
||||
userDataDir: options.userDataDir,
|
||||
headless: options.headless,
|
||||
executablePath: options.executablePath,
|
||||
vision: !!options.vision,
|
||||
cdpEndpoint: options.cdpEndpoint,
|
||||
capabilities: options.caps?.split(',').map((c: string) => c.trim() as ToolCapability),
|
||||
@@ -113,21 +74,6 @@ function setupExitWatchdog(serverList: ServerList) {
|
||||
|
||||
program.parse(process.argv);
|
||||
|
||||
async function createUserDataDir(browserName: 'chromium' | 'firefox' | 'webkit') {
|
||||
let cacheDirectory: string;
|
||||
if (process.platform === 'linux')
|
||||
cacheDirectory = process.env.XDG_CACHE_HOME || path.join(os.homedir(), '.cache');
|
||||
else if (process.platform === 'darwin')
|
||||
cacheDirectory = path.join(os.homedir(), 'Library', 'Caches');
|
||||
else if (process.platform === 'win32')
|
||||
cacheDirectory = process.env.LOCALAPPDATA || path.join(os.homedir(), 'AppData', 'Local');
|
||||
else
|
||||
throw new Error('Unsupported platform: ' + process.platform);
|
||||
const result = path.join(cacheDirectory, 'ms-playwright', `mcp-${browserName}-profile`);
|
||||
await fs.promises.mkdir(result, { recursive: true });
|
||||
return result;
|
||||
}
|
||||
|
||||
async function startSSEServer(port: number, serverList: ServerList) {
|
||||
const sessions = new Map<string, SSEServerTransport>();
|
||||
const httpServer = http.createServer(async (req, res) => {
|
||||
|
||||
@@ -32,7 +32,7 @@ type Options = ContextOptions & {
|
||||
|
||||
export function createServerWithTools(options: Options): Server {
|
||||
const { name, version, tools, resources } = options;
|
||||
const context = new Context(options);
|
||||
const context = new Context(tools, options);
|
||||
const server = new Server({ name, version }, {
|
||||
capabilities: {
|
||||
tools: {},
|
||||
@@ -57,9 +57,21 @@ export function createServerWithTools(options: Options): Server {
|
||||
};
|
||||
}
|
||||
|
||||
const modalStates = context.modalStates().map(state => state.type);
|
||||
if ((tool.clearsModalState && !modalStates.includes(tool.clearsModalState)) ||
|
||||
(!tool.clearsModalState && modalStates.length)) {
|
||||
const text = [
|
||||
`Tool "${request.params.name}" does not handle the modal state.`,
|
||||
...context.modalStatesMarkdown(),
|
||||
].join('\n');
|
||||
return {
|
||||
content: [{ type: 'text', text }],
|
||||
isError: true,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await tool.handle(context, request.params.arguments);
|
||||
return result;
|
||||
return await context.run(tool, request.params.arguments);
|
||||
} catch (error) {
|
||||
return {
|
||||
content: [{ type: 'text', text: String(error) }],
|
||||
@@ -89,14 +101,14 @@ export function createServerWithTools(options: Options): Server {
|
||||
|
||||
export class ServerList {
|
||||
private _servers: Server[] = [];
|
||||
private _serverFactory: () => Server;
|
||||
private _serverFactory: () => Promise<Server>;
|
||||
|
||||
constructor(serverFactory: () => Server) {
|
||||
constructor(serverFactory: () => Promise<Server>) {
|
||||
this._serverFactory = serverFactory;
|
||||
}
|
||||
|
||||
async create() {
|
||||
const server = this._serverFactory();
|
||||
const server = await this._serverFactory();
|
||||
this._servers.push(server);
|
||||
return server;
|
||||
}
|
||||
|
||||
@@ -23,41 +23,43 @@ const waitSchema = z.object({
|
||||
time: z.number().describe('The time to wait in seconds'),
|
||||
});
|
||||
|
||||
const wait: Tool = {
|
||||
const wait: ToolFactory = captureSnapshot => ({
|
||||
capability: 'wait',
|
||||
|
||||
schema: {
|
||||
name: 'browser_wait',
|
||||
description: 'Wait for a specified time in seconds',
|
||||
inputSchema: zodToJsonSchema(waitSchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = waitSchema.parse(params);
|
||||
await new Promise(f => setTimeout(f, Math.min(10000, validatedParams.time * 1000)));
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: `Waited for ${validatedParams.time} seconds`,
|
||||
}],
|
||||
code: [`// Waited for ${validatedParams.time} seconds`],
|
||||
captureSnapshot,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
const closeSchema = z.object({});
|
||||
|
||||
const close: Tool = {
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_close',
|
||||
description: 'Close the page',
|
||||
inputSchema: zodToJsonSchema(closeSchema),
|
||||
},
|
||||
|
||||
handle: async context => {
|
||||
await context.close();
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: `Page closed`,
|
||||
}],
|
||||
code: [`// Internal to close the page`],
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
};
|
||||
@@ -74,22 +76,32 @@ const resize: ToolFactory = captureSnapshot => ({
|
||||
description: 'Resize the browser window',
|
||||
inputSchema: zodToJsonSchema(resizeSchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = resizeSchema.parse(params);
|
||||
|
||||
const tab = context.currentTab();
|
||||
return await tab.run(
|
||||
tab => tab.page.setViewportSize({ width: validatedParams.width, height: validatedParams.height }),
|
||||
{
|
||||
status: `Resized browser window`,
|
||||
captureSnapshot,
|
||||
}
|
||||
);
|
||||
const tab = context.currentTabOrDie();
|
||||
|
||||
const code = [
|
||||
`// Resize browser window to ${validatedParams.width}x${validatedParams.height}`,
|
||||
`await page.setViewportSize({ width: ${validatedParams.width}, height: ${validatedParams.height} });`
|
||||
];
|
||||
|
||||
const action = async () => {
|
||||
await tab.page.setViewportSize({ width: validatedParams.width, height: validatedParams.height });
|
||||
};
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
waitForNetwork: true
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
close,
|
||||
wait,
|
||||
wait(captureSnapshot),
|
||||
resize(captureSnapshot)
|
||||
];
|
||||
|
||||
@@ -14,22 +14,36 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import type { Resource } from './resource';
|
||||
import { z } from 'zod';
|
||||
import { zodToJsonSchema } from 'zod-to-json-schema';
|
||||
|
||||
export const console: Resource = {
|
||||
import type { Tool } from './tool';
|
||||
|
||||
const consoleSchema = z.object({});
|
||||
|
||||
const console: Tool = {
|
||||
capability: 'core',
|
||||
schema: {
|
||||
uri: 'browser://console',
|
||||
name: 'Page console',
|
||||
mimeType: 'text/plain',
|
||||
name: 'browser_console_messages',
|
||||
description: 'Returns all console messages',
|
||||
inputSchema: zodToJsonSchema(consoleSchema),
|
||||
},
|
||||
|
||||
read: async (context, uri) => {
|
||||
const messages = await context.currentTab().console();
|
||||
handle: async context => {
|
||||
const messages = await context.currentTabOrDie().console();
|
||||
const log = messages.map(message => `[${message.type().toUpperCase()}] ${message.text()}`).join('\n');
|
||||
return [{
|
||||
uri,
|
||||
mimeType: 'text/plain',
|
||||
text: log
|
||||
}];
|
||||
return {
|
||||
code: [`// <internal code to get console messages>`],
|
||||
action: async () => {
|
||||
return {
|
||||
content: [{ type: 'text', text: log }]
|
||||
};
|
||||
},
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
export default [
|
||||
console,
|
||||
];
|
||||
65
src/tools/dialogs.ts
Normal file
65
src/tools/dialogs.ts
Normal file
@@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { z } from 'zod';
|
||||
import { zodToJsonSchema } from 'zod-to-json-schema';
|
||||
|
||||
import type { ToolFactory } from './tool';
|
||||
|
||||
const handleDialogSchema = z.object({
|
||||
accept: z.boolean().describe('Whether to accept the dialog.'),
|
||||
promptText: z.string().optional().describe('The text of the prompt in case of a prompt dialog.'),
|
||||
});
|
||||
|
||||
const handleDialog: ToolFactory = captureSnapshot => ({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_handle_dialog',
|
||||
description: 'Handle a dialog',
|
||||
inputSchema: zodToJsonSchema(handleDialogSchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = handleDialogSchema.parse(params);
|
||||
const dialogState = context.modalStates().find(state => state.type === 'dialog');
|
||||
if (!dialogState)
|
||||
throw new Error('No dialog visible');
|
||||
|
||||
if (validatedParams.accept)
|
||||
await dialogState.dialog.accept(validatedParams.promptText);
|
||||
else
|
||||
await dialogState.dialog.dismiss();
|
||||
|
||||
context.clearModalState(dialogState);
|
||||
|
||||
const code = [
|
||||
`// <internal code to handle "${dialogState.dialog.type()}" dialog>`,
|
||||
];
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
|
||||
clearsModalState: 'dialog',
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
handleDialog(captureSnapshot),
|
||||
];
|
||||
@@ -25,22 +25,36 @@ const uploadFileSchema = z.object({
|
||||
|
||||
const uploadFile: ToolFactory = captureSnapshot => ({
|
||||
capability: 'files',
|
||||
|
||||
schema: {
|
||||
name: 'browser_file_upload',
|
||||
description: 'Upload one or multiple files',
|
||||
inputSchema: zodToJsonSchema(uploadFileSchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = uploadFileSchema.parse(params);
|
||||
const tab = context.currentTab();
|
||||
return await tab.runAndWait(async () => {
|
||||
await tab.submitFileChooser(validatedParams.paths);
|
||||
}, {
|
||||
status: `Chose files ${validatedParams.paths.join(', ')}`,
|
||||
const modalState = context.modalStates().find(state => state.type === 'fileChooser');
|
||||
if (!modalState)
|
||||
throw new Error('No file chooser visible');
|
||||
|
||||
const code = [
|
||||
`// <internal code to chose files ${validatedParams.paths.join(', ')}`,
|
||||
];
|
||||
|
||||
const action = async () => {
|
||||
await modalState.fileChooser.setFiles(validatedParams.paths);
|
||||
context.clearModalState(modalState);
|
||||
};
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
noClearFileChooser: true,
|
||||
});
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
clearsModalState: 'fileChooser',
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
|
||||
@@ -48,10 +48,9 @@ const install: Tool = {
|
||||
});
|
||||
});
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: `Browser ${channel} installed`,
|
||||
}],
|
||||
code: [`// Browser ${channel} installed`],
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -25,19 +25,30 @@ const pressKeySchema = z.object({
|
||||
|
||||
const pressKey: ToolFactory = captureSnapshot => ({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_press_key',
|
||||
description: 'Press a key on the keyboard',
|
||||
inputSchema: zodToJsonSchema(pressKeySchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = pressKeySchema.parse(params);
|
||||
return await context.currentTab().runAndWait(async tab => {
|
||||
await tab.page.keyboard.press(validatedParams.key);
|
||||
}, {
|
||||
status: `Pressed key ${validatedParams.key}`,
|
||||
const tab = context.currentTabOrDie();
|
||||
|
||||
const code = [
|
||||
`// Press ${validatedParams.key}`,
|
||||
`await page.keyboard.press('${validatedParams.key}');`,
|
||||
];
|
||||
|
||||
const action = () => tab.page.keyboard.press(validatedParams.key);
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot,
|
||||
});
|
||||
waitForNetwork: true
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
@@ -25,45 +25,60 @@ const navigateSchema = z.object({
|
||||
|
||||
const navigate: ToolFactory = captureSnapshot => ({
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_navigate',
|
||||
description: 'Navigate to a URL',
|
||||
inputSchema: zodToJsonSchema(navigateSchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = navigateSchema.parse(params);
|
||||
const currentTab = await context.ensureTab();
|
||||
return await currentTab.run(async tab => {
|
||||
await tab.navigate(validatedParams.url);
|
||||
}, {
|
||||
status: `Navigated to ${validatedParams.url}`,
|
||||
const tab = await context.ensureTab();
|
||||
await tab.navigate(validatedParams.url);
|
||||
|
||||
const code = [
|
||||
`// Navigate to ${validatedParams.url}`,
|
||||
`await page.goto('${validatedParams.url}');`,
|
||||
];
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
});
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const goBackSchema = z.object({});
|
||||
|
||||
const goBack: ToolFactory = snapshot => ({
|
||||
const goBack: ToolFactory = captureSnapshot => ({
|
||||
capability: 'history',
|
||||
schema: {
|
||||
name: 'browser_navigate_back',
|
||||
description: 'Go back to the previous page',
|
||||
inputSchema: zodToJsonSchema(goBackSchema),
|
||||
},
|
||||
|
||||
handle: async context => {
|
||||
return await context.currentTab().runAndWait(async tab => {
|
||||
await tab.page.goBack();
|
||||
}, {
|
||||
status: 'Navigated back',
|
||||
captureSnapshot: snapshot,
|
||||
});
|
||||
const tab = await context.ensureTab();
|
||||
await tab.page.goBack();
|
||||
const code = [
|
||||
`// Navigate back`,
|
||||
`await page.goBack();`,
|
||||
];
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const goForwardSchema = z.object({});
|
||||
|
||||
const goForward: ToolFactory = snapshot => ({
|
||||
const goForward: ToolFactory = captureSnapshot => ({
|
||||
capability: 'history',
|
||||
schema: {
|
||||
name: 'browser_navigate_forward',
|
||||
@@ -71,12 +86,17 @@ const goForward: ToolFactory = snapshot => ({
|
||||
inputSchema: zodToJsonSchema(goForwardSchema),
|
||||
},
|
||||
handle: async context => {
|
||||
return await context.currentTab().runAndWait(async tab => {
|
||||
await tab.page.goForward();
|
||||
}, {
|
||||
status: 'Navigated forward',
|
||||
captureSnapshot: snapshot,
|
||||
});
|
||||
const tab = context.currentTabOrDie();
|
||||
await tab.page.goForward();
|
||||
const code = [
|
||||
`// Navigate forward`,
|
||||
`await page.goForward();`,
|
||||
];
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ import { z } from 'zod';
|
||||
import { zodToJsonSchema } from 'zod-to-json-schema';
|
||||
|
||||
import { sanitizeForFilePath } from './utils';
|
||||
import * as javascript from '../javascript';
|
||||
|
||||
import type { Tool } from './tool';
|
||||
|
||||
@@ -28,20 +29,27 @@ const pdfSchema = z.object({});
|
||||
|
||||
const pdf: Tool = {
|
||||
capability: 'pdf',
|
||||
|
||||
schema: {
|
||||
name: 'browser_pdf_save',
|
||||
description: 'Save page as PDF',
|
||||
inputSchema: zodToJsonSchema(pdfSchema),
|
||||
},
|
||||
|
||||
handle: async context => {
|
||||
const tab = context.currentTab();
|
||||
const tab = context.currentTabOrDie();
|
||||
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + '.pdf';
|
||||
await tab.page.pdf({ path: fileName });
|
||||
|
||||
const code = [
|
||||
`// Save page as ${fileName}`,
|
||||
`await page.pdf(${javascript.formatObject({ path: fileName })});`,
|
||||
];
|
||||
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: `Saved as ${fileName}`,
|
||||
}],
|
||||
code,
|
||||
action: async () => tab.page.pdf({ path: fileName }).then(() => {}),
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
import { z } from 'zod';
|
||||
import { zodToJsonSchema } from 'zod-to-json-schema';
|
||||
|
||||
import * as javascript from '../javascript';
|
||||
|
||||
import type { Tool } from './tool';
|
||||
|
||||
const screenshot: Tool = {
|
||||
@@ -29,9 +31,24 @@ const screenshot: Tool = {
|
||||
|
||||
handle: async context => {
|
||||
const tab = await context.ensureTab();
|
||||
const screenshot = await tab.page.screenshot({ type: 'jpeg', quality: 50, scale: 'css' });
|
||||
const options = { type: 'jpeg' as 'jpeg', quality: 50, scale: 'css' as 'css' };
|
||||
|
||||
const code = [
|
||||
`// Take a screenshot of the current page`,
|
||||
`await page.screenshot(${javascript.formatObject(options)});`,
|
||||
];
|
||||
|
||||
const action = () => tab.page.screenshot(options).then(buffer => {
|
||||
return {
|
||||
content: [{ type: 'image' as 'image', data: buffer.toString('base64'), mimeType: 'image/jpeg' }],
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: 'image/jpeg' }],
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
};
|
||||
@@ -55,10 +72,17 @@ const moveMouse: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = moveMouseSchema.parse(params);
|
||||
const tab = context.currentTab();
|
||||
await tab.page.mouse.move(validatedParams.x, validatedParams.y);
|
||||
const tab = context.currentTabOrDie();
|
||||
const code = [
|
||||
`// Move mouse to (${validatedParams.x}, ${validatedParams.y})`,
|
||||
`await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`,
|
||||
];
|
||||
const action = () => tab.page.mouse.move(validatedParams.x, validatedParams.y);
|
||||
return {
|
||||
content: [{ type: 'text', text: `Moved mouse to (${validatedParams.x}, ${validatedParams.y})` }],
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
};
|
||||
@@ -77,14 +101,25 @@ const click: Tool = {
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
return await context.currentTab().runAndWait(async tab => {
|
||||
const validatedParams = clickSchema.parse(params);
|
||||
const validatedParams = clickSchema.parse(params);
|
||||
const tab = context.currentTabOrDie();
|
||||
const code = [
|
||||
`// Click mouse at coordinates (${validatedParams.x}, ${validatedParams.y})`,
|
||||
`await page.mouse.move(${validatedParams.x}, ${validatedParams.y});`,
|
||||
`await page.mouse.down();`,
|
||||
`await page.mouse.up();`,
|
||||
];
|
||||
const action = async () => {
|
||||
await tab.page.mouse.move(validatedParams.x, validatedParams.y);
|
||||
await tab.page.mouse.down();
|
||||
await tab.page.mouse.up();
|
||||
}, {
|
||||
status: 'Clicked mouse',
|
||||
});
|
||||
};
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -97,6 +132,7 @@ const dragSchema = elementSchema.extend({
|
||||
|
||||
const drag: Tool = {
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_screen_drag',
|
||||
description: 'Drag left mouse button',
|
||||
@@ -105,14 +141,29 @@ const drag: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = dragSchema.parse(params);
|
||||
return await context.currentTab().runAndWait(async tab => {
|
||||
const tab = context.currentTabOrDie();
|
||||
|
||||
const code = [
|
||||
`// Drag mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`,
|
||||
`await page.mouse.move(${validatedParams.startX}, ${validatedParams.startY});`,
|
||||
`await page.mouse.down();`,
|
||||
`await page.mouse.move(${validatedParams.endX}, ${validatedParams.endY});`,
|
||||
`await page.mouse.up();`,
|
||||
];
|
||||
|
||||
const action = async () => {
|
||||
await tab.page.mouse.move(validatedParams.startX, validatedParams.startY);
|
||||
await tab.page.mouse.down();
|
||||
await tab.page.mouse.move(validatedParams.endX, validatedParams.endY);
|
||||
await tab.page.mouse.up();
|
||||
}, {
|
||||
status: `Dragged mouse from (${validatedParams.startX}, ${validatedParams.startY}) to (${validatedParams.endX}, ${validatedParams.endY})`,
|
||||
});
|
||||
};
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -123,6 +174,7 @@ const typeSchema = z.object({
|
||||
|
||||
const type: Tool = {
|
||||
capability: 'core',
|
||||
|
||||
schema: {
|
||||
name: 'browser_screen_type',
|
||||
description: 'Type text',
|
||||
@@ -131,13 +183,30 @@ const type: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = typeSchema.parse(params);
|
||||
return await context.currentTab().runAndWait(async tab => {
|
||||
const tab = context.currentTabOrDie();
|
||||
|
||||
const code = [
|
||||
`// Type ${validatedParams.text}`,
|
||||
`await page.keyboard.type('${validatedParams.text}');`,
|
||||
];
|
||||
|
||||
const action = async () => {
|
||||
await tab.page.keyboard.type(validatedParams.text);
|
||||
if (validatedParams.submit)
|
||||
await tab.page.keyboard.press('Enter');
|
||||
}, {
|
||||
status: `Typed text "${validatedParams.text}"`,
|
||||
});
|
||||
};
|
||||
|
||||
if (validatedParams.submit) {
|
||||
code.push(`// Submit text`);
|
||||
code.push(`await page.keyboard.press('Enter');`);
|
||||
}
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
@@ -14,9 +14,16 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import path from 'path';
|
||||
import os from 'os';
|
||||
|
||||
import { z } from 'zod';
|
||||
import zodToJsonSchema from 'zod-to-json-schema';
|
||||
|
||||
import { sanitizeForFilePath } from './utils';
|
||||
import { generateLocator } from '../context';
|
||||
import * as javascript from '../javascript';
|
||||
|
||||
import type * as playwright from 'playwright';
|
||||
import type { Tool } from './tool';
|
||||
|
||||
@@ -29,8 +36,13 @@ const snapshot: Tool = {
|
||||
},
|
||||
|
||||
handle: async context => {
|
||||
const tab = await context.ensureTab();
|
||||
return await tab.run(async () => {}, { captureSnapshot: true });
|
||||
await context.ensureTab();
|
||||
|
||||
return {
|
||||
code: [`// <internal code to capture accessibility snapshot>`],
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -49,12 +61,20 @@ const click: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = elementSchema.parse(params);
|
||||
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => {
|
||||
const locator = snapshot.refLocator(validatedParams.ref);
|
||||
await locator.click();
|
||||
}, {
|
||||
status: `Clicked "${validatedParams.element}"`,
|
||||
});
|
||||
const tab = context.currentTabOrDie();
|
||||
const locator = tab.snapshotOrDie().refLocator(validatedParams.ref);
|
||||
|
||||
const code = [
|
||||
`// Click ${validatedParams.element}`,
|
||||
`await page.${await generateLocator(locator)}.click();`
|
||||
];
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => locator.click(),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -75,13 +95,21 @@ const drag: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = dragSchema.parse(params);
|
||||
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => {
|
||||
const startLocator = snapshot.refLocator(validatedParams.startRef);
|
||||
const endLocator = snapshot.refLocator(validatedParams.endRef);
|
||||
await startLocator.dragTo(endLocator);
|
||||
}, {
|
||||
status: `Dragged "${validatedParams.startElement}" to "${validatedParams.endElement}"`,
|
||||
});
|
||||
const snapshot = context.currentTabOrDie().snapshotOrDie();
|
||||
const startLocator = snapshot.refLocator(validatedParams.startRef);
|
||||
const endLocator = snapshot.refLocator(validatedParams.endRef);
|
||||
|
||||
const code = [
|
||||
`// Drag ${validatedParams.startElement} to ${validatedParams.endElement}`,
|
||||
`await page.${await generateLocator(startLocator)}.dragTo(page.${await generateLocator(endLocator)});`
|
||||
];
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => startLocator.dragTo(endLocator),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -95,12 +123,20 @@ const hover: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = elementSchema.parse(params);
|
||||
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => {
|
||||
const locator = snapshot.refLocator(validatedParams.ref);
|
||||
await locator.hover();
|
||||
}, {
|
||||
status: `Hovered over "${validatedParams.element}"`,
|
||||
});
|
||||
const snapshot = context.currentTabOrDie().snapshotOrDie();
|
||||
const locator = snapshot.refLocator(validatedParams.ref);
|
||||
|
||||
const code = [
|
||||
`// Hover over ${validatedParams.element}`,
|
||||
`await page.${await generateLocator(locator)}.hover();`
|
||||
];
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => locator.hover(),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -120,17 +156,34 @@ const type: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = typeSchema.parse(params);
|
||||
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => {
|
||||
const locator = snapshot.refLocator(validatedParams.ref);
|
||||
if (validatedParams.slowly)
|
||||
await locator.pressSequentially(validatedParams.text);
|
||||
else
|
||||
await locator.fill(validatedParams.text);
|
||||
if (validatedParams.submit)
|
||||
await locator.press('Enter');
|
||||
}, {
|
||||
status: `Typed "${validatedParams.text}" into "${validatedParams.element}"`,
|
||||
});
|
||||
const snapshot = context.currentTabOrDie().snapshotOrDie();
|
||||
const locator = snapshot.refLocator(validatedParams.ref);
|
||||
|
||||
const code: string[] = [];
|
||||
const steps: (() => Promise<void>)[] = [];
|
||||
|
||||
if (validatedParams.slowly) {
|
||||
code.push(`// Press "${validatedParams.text}" sequentially into "${validatedParams.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.pressSequentially(${javascript.quote(validatedParams.text)});`);
|
||||
steps.push(() => locator.pressSequentially(validatedParams.text));
|
||||
} else {
|
||||
code.push(`// Fill "${validatedParams.text}" into "${validatedParams.element}"`);
|
||||
code.push(`await page.${await generateLocator(locator)}.fill(${javascript.quote(validatedParams.text)});`);
|
||||
steps.push(() => locator.fill(validatedParams.text));
|
||||
}
|
||||
|
||||
if (validatedParams.submit) {
|
||||
code.push(`// Submit text`);
|
||||
code.push(`await page.${await generateLocator(locator)}.press('Enter');`);
|
||||
steps.push(() => locator.press('Enter'));
|
||||
}
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => steps.reduce((acc, step) => acc.then(step), Promise.resolve()),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
@@ -148,17 +201,32 @@ const selectOption: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = selectOptionSchema.parse(params);
|
||||
return await context.currentTab().runAndWaitWithSnapshot(async snapshot => {
|
||||
const locator = snapshot.refLocator(validatedParams.ref);
|
||||
await locator.selectOption(validatedParams.values);
|
||||
}, {
|
||||
status: `Selected option in "${validatedParams.element}"`,
|
||||
});
|
||||
const snapshot = context.currentTabOrDie().snapshotOrDie();
|
||||
const locator = snapshot.refLocator(validatedParams.ref);
|
||||
|
||||
const code = [
|
||||
`// Select options [${validatedParams.values.join(', ')}] in ${validatedParams.element}`,
|
||||
`await page.${await generateLocator(locator)}.selectOption(${javascript.formatObject(validatedParams.values)});`
|
||||
];
|
||||
|
||||
return {
|
||||
code,
|
||||
action: () => locator.selectOption(validatedParams.values).then(() => {}),
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: true,
|
||||
};
|
||||
},
|
||||
};
|
||||
|
||||
const screenshotSchema = z.object({
|
||||
raw: z.boolean().optional().describe('Whether to return without compression (in PNG format). Default is false, which returns a JPEG image.'),
|
||||
element: z.string().optional().describe('Human-readable element description used to obtain permission to screenshot the element. If not provided, the screenshot will be taken of viewport. If element is provided, ref must be provided too.'),
|
||||
ref: z.string().optional().describe('Exact target element reference from the page snapshot. If not provided, the screenshot will be taken of viewport. If ref is provided, element must be provided too.'),
|
||||
}).refine(data => {
|
||||
return !!data.element === !!data.ref;
|
||||
}, {
|
||||
message: 'Both element and ref must be provided or neither.',
|
||||
path: ['ref', 'element']
|
||||
});
|
||||
|
||||
const screenshot: Tool = {
|
||||
@@ -171,15 +239,45 @@ const screenshot: Tool = {
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = screenshotSchema.parse(params);
|
||||
const tab = context.currentTab();
|
||||
const options: playwright.PageScreenshotOptions = validatedParams.raw ? { type: 'png', scale: 'css' } : { type: 'jpeg', quality: 50, scale: 'css' };
|
||||
const screenshot = await tab.page.screenshot(options);
|
||||
return {
|
||||
content: [{ type: 'image', data: screenshot.toString('base64'), mimeType: validatedParams.raw ? 'image/png' : 'image/jpeg' }],
|
||||
const tab = context.currentTabOrDie();
|
||||
const snapshot = tab.snapshotOrDie();
|
||||
const fileType = validatedParams.raw ? 'png' : 'jpeg';
|
||||
const fileName = path.join(os.tmpdir(), sanitizeForFilePath(`page-${new Date().toISOString()}`)) + `.${fileType}`;
|
||||
const options: playwright.PageScreenshotOptions = { type: fileType, quality: fileType === 'png' ? undefined : 50, scale: 'css', path: fileName };
|
||||
const isElementScreenshot = validatedParams.element && validatedParams.ref;
|
||||
|
||||
const code = [
|
||||
`// Screenshot ${isElementScreenshot ? validatedParams.element : 'viewport'} and save it as ${fileName}`,
|
||||
];
|
||||
|
||||
const locator = validatedParams.ref ? snapshot.refLocator(validatedParams.ref) : null;
|
||||
|
||||
if (locator)
|
||||
code.push(`await page.${await generateLocator(locator)}.screenshot(${javascript.formatObject(options)});`);
|
||||
else
|
||||
code.push(`await page.screenshot(${javascript.formatObject(options)});`);
|
||||
|
||||
const action = async () => {
|
||||
const screenshot = locator ? await locator.screenshot(options) : await tab.page.screenshot(options);
|
||||
return {
|
||||
content: [{
|
||||
type: 'image' as 'image',
|
||||
data: screenshot.toString('base64'),
|
||||
mimeType: fileType === 'png' ? 'image/png' : 'image/jpeg',
|
||||
}]
|
||||
};
|
||||
};
|
||||
},
|
||||
|
||||
return {
|
||||
code,
|
||||
action,
|
||||
captureSnapshot: true,
|
||||
waitForNetwork: false,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
export default [
|
||||
snapshot,
|
||||
click,
|
||||
|
||||
@@ -21,17 +21,25 @@ import type { ToolFactory, Tool } from './tool';
|
||||
|
||||
const listTabs: Tool = {
|
||||
capability: 'tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_list',
|
||||
description: 'List browser tabs',
|
||||
inputSchema: zodToJsonSchema(z.object({})),
|
||||
},
|
||||
|
||||
handle: async context => {
|
||||
await context.ensureTab();
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: await context.listTabs(),
|
||||
}],
|
||||
code: [`// <internal code to list tabs>`],
|
||||
captureSnapshot: false,
|
||||
waitForNetwork: false,
|
||||
resultOverride: {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: await context.listTabsMarkdown(),
|
||||
}],
|
||||
},
|
||||
};
|
||||
},
|
||||
};
|
||||
@@ -42,16 +50,25 @@ const selectTabSchema = z.object({
|
||||
|
||||
const selectTab: ToolFactory = captureSnapshot => ({
|
||||
capability: 'tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_select',
|
||||
description: 'Select a tab by index',
|
||||
inputSchema: zodToJsonSchema(selectTabSchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = selectTabSchema.parse(params);
|
||||
await context.selectTab(validatedParams.index);
|
||||
const currentTab = await context.ensureTab();
|
||||
return await currentTab.run(async () => {}, { captureSnapshot });
|
||||
const code = [
|
||||
`// <internal code to select tab ${validatedParams.index}>`,
|
||||
];
|
||||
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
@@ -59,21 +76,31 @@ const newTabSchema = z.object({
|
||||
url: z.string().optional().describe('The URL to navigate to in the new tab. If not provided, the new tab will be blank.'),
|
||||
});
|
||||
|
||||
const newTab: Tool = {
|
||||
const newTab: ToolFactory = captureSnapshot => ({
|
||||
capability: 'tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_new',
|
||||
description: 'Open a new tab',
|
||||
inputSchema: zodToJsonSchema(newTabSchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = newTabSchema.parse(params);
|
||||
await context.newTab();
|
||||
if (validatedParams.url)
|
||||
await context.currentTab().navigate(validatedParams.url);
|
||||
return await context.currentTab().run(async () => {}, { captureSnapshot: true });
|
||||
await context.currentTabOrDie().navigate(validatedParams.url);
|
||||
|
||||
const code = [
|
||||
`// <internal code to open a new tab>`,
|
||||
];
|
||||
return {
|
||||
code,
|
||||
captureSnapshot,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
const closeTabSchema = z.object({
|
||||
index: z.number().optional().describe('The index of the tab to close. Closes current tab if not provided.'),
|
||||
@@ -81,29 +108,30 @@ const closeTabSchema = z.object({
|
||||
|
||||
const closeTab: ToolFactory = captureSnapshot => ({
|
||||
capability: 'tabs',
|
||||
|
||||
schema: {
|
||||
name: 'browser_tab_close',
|
||||
description: 'Close a tab',
|
||||
inputSchema: zodToJsonSchema(closeTabSchema),
|
||||
},
|
||||
|
||||
handle: async (context, params) => {
|
||||
const validatedParams = closeTabSchema.parse(params);
|
||||
await context.closeTab(validatedParams.index);
|
||||
const currentTab = context.currentTab();
|
||||
if (currentTab)
|
||||
return await currentTab.run(async () => {}, { captureSnapshot });
|
||||
const code = [
|
||||
`// <internal code to close tab ${validatedParams.index}>`,
|
||||
];
|
||||
return {
|
||||
content: [{
|
||||
type: 'text',
|
||||
text: await context.listTabs(),
|
||||
}],
|
||||
code,
|
||||
captureSnapshot,
|
||||
waitForNetwork: false
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
export default (captureSnapshot: boolean) => [
|
||||
listTabs,
|
||||
newTab,
|
||||
newTab(captureSnapshot),
|
||||
selectTab(captureSnapshot),
|
||||
closeTab(captureSnapshot),
|
||||
];
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
import type { ImageContent, TextContent } from '@modelcontextprotocol/sdk/types';
|
||||
import type { JsonSchema7Type } from 'zod-to-json-schema';
|
||||
import type { Context } from '../context';
|
||||
|
||||
import type * as playwright from 'playwright';
|
||||
export type ToolCapability = 'core' | 'tabs' | 'pdf' | 'history' | 'wait' | 'files' | 'install';
|
||||
|
||||
export type ToolSchema = {
|
||||
@@ -26,14 +26,34 @@ export type ToolSchema = {
|
||||
inputSchema: JsonSchema7Type;
|
||||
};
|
||||
|
||||
export type FileUploadModalState = {
|
||||
type: 'fileChooser';
|
||||
description: string;
|
||||
fileChooser: playwright.FileChooser;
|
||||
};
|
||||
|
||||
export type DialogModalState = {
|
||||
type: 'dialog';
|
||||
description: string;
|
||||
dialog: playwright.Dialog;
|
||||
};
|
||||
|
||||
export type ModalState = FileUploadModalState | DialogModalState;
|
||||
|
||||
export type ToolActionResult = { content?: (ImageContent | TextContent)[] } | undefined | void;
|
||||
|
||||
export type ToolResult = {
|
||||
content: (ImageContent | TextContent)[];
|
||||
isError?: boolean;
|
||||
code: string[];
|
||||
action?: () => Promise<ToolActionResult>;
|
||||
captureSnapshot: boolean;
|
||||
waitForNetwork: boolean;
|
||||
resultOverride?: ToolActionResult;
|
||||
};
|
||||
|
||||
export type Tool = {
|
||||
capability: ToolCapability;
|
||||
schema: ToolSchema;
|
||||
clearsModalState?: ModalState['type'];
|
||||
handle: (context: Context, params?: Record<string, any>) => Promise<ToolResult>;
|
||||
};
|
||||
|
||||
|
||||
@@ -15,8 +15,9 @@
|
||||
*/
|
||||
|
||||
import type * as playwright from 'playwright';
|
||||
import type { Context } from '../context';
|
||||
|
||||
export async function waitForCompletion<R>(page: playwright.Page, callback: () => Promise<R>): Promise<R> {
|
||||
export async function waitForCompletion<R>(context: Context, page: playwright.Page, callback: () => Promise<R>): Promise<R> {
|
||||
const requests = new Set<playwright.Request>();
|
||||
let frameNavigated = false;
|
||||
let waitCallback: () => void = () => {};
|
||||
@@ -62,7 +63,7 @@ export async function waitForCompletion<R>(page: playwright.Page, callback: () =
|
||||
if (!requests.size && !frameNavigated)
|
||||
waitCallback();
|
||||
await waitBarrier;
|
||||
await page.evaluate(() => new Promise(f => setTimeout(f, 1000)));
|
||||
await context.waitForTimeout(1000);
|
||||
return result;
|
||||
} finally {
|
||||
dispose();
|
||||
|
||||
@@ -20,8 +20,10 @@ test('test snapshot tool list', async ({ client }) => {
|
||||
const { tools } = await client.listTools();
|
||||
expect(new Set(tools.map(t => t.name))).toEqual(new Set([
|
||||
'browser_click',
|
||||
'browser_console_messages',
|
||||
'browser_drag',
|
||||
'browser_file_upload',
|
||||
'browser_handle_dialog',
|
||||
'browser_hover',
|
||||
'browser_select_option',
|
||||
'browser_type',
|
||||
@@ -47,7 +49,9 @@ test('test vision tool list', async ({ visionClient }) => {
|
||||
const { tools: visionTools } = await visionClient.listTools();
|
||||
expect(new Set(visionTools.map(t => t.name))).toEqual(new Set([
|
||||
'browser_close',
|
||||
'browser_console_messages',
|
||||
'browser_file_upload',
|
||||
'browser_handle_dialog',
|
||||
'browser_install',
|
||||
'browser_navigate_back',
|
||||
'browser_navigate_forward',
|
||||
@@ -70,12 +74,7 @@ test('test vision tool list', async ({ visionClient }) => {
|
||||
|
||||
test('test resources list', async ({ client }) => {
|
||||
const { resources } = await client.listResources();
|
||||
expect(resources).toEqual([
|
||||
expect.objectContaining({
|
||||
uri: 'browser://console',
|
||||
mimeType: 'text/plain',
|
||||
}),
|
||||
]);
|
||||
expect(resources).toEqual([]);
|
||||
});
|
||||
|
||||
test('test capabilities', async ({ startClient }) => {
|
||||
|
||||
@@ -23,17 +23,7 @@ test('cdp server', async ({ cdpEndpoint, startClient }) => {
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
|
||||
},
|
||||
})).toHaveTextContent(`
|
||||
Navigated to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
|
||||
- Page URL: data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
- Page Title: Title
|
||||
- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- text: Hello, world!
|
||||
\`\`\`
|
||||
`
|
||||
);
|
||||
})).toContainTextContent(`- text: Hello, world!`);
|
||||
});
|
||||
|
||||
test('cdp server reuse tab', async ({ cdpEndpoint, startClient }) => {
|
||||
@@ -51,6 +41,11 @@ test('cdp server reuse tab', async ({ cdpEndpoint, startClient }) => {
|
||||
name: 'browser_snapshot',
|
||||
arguments: {},
|
||||
})).toHaveTextContent(`
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// <internal code to capture accessibility snapshot>
|
||||
\`\`\`
|
||||
|
||||
- Page URL: data:text/html,hello world
|
||||
- Page Title:
|
||||
- Page Snapshot
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
import { test, expect } from './fixtures';
|
||||
|
||||
test('browser://console', async ({ client }) => {
|
||||
test('browser_console_messages', async ({ client }) => {
|
||||
await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
@@ -24,12 +24,12 @@ test('browser://console', async ({ client }) => {
|
||||
},
|
||||
});
|
||||
|
||||
const resource = await client.readResource({
|
||||
uri: 'browser://console',
|
||||
const resource = await client.callTool({
|
||||
name: 'browser_console_messages',
|
||||
arguments: {},
|
||||
});
|
||||
expect(resource.contents).toEqual([{
|
||||
uri: 'browser://console',
|
||||
mimeType: 'text/plain',
|
||||
text: '[LOG] Hello, world!\n[ERROR] Error',
|
||||
}]);
|
||||
expect(resource).toHaveTextContent([
|
||||
'[LOG] Hello, world!',
|
||||
'[ERROR] Error',
|
||||
].join('\n'));
|
||||
});
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import fs from 'fs/promises';
|
||||
import { test, expect } from './fixtures';
|
||||
|
||||
test('browser_navigate', async ({ client }) => {
|
||||
@@ -24,7 +23,11 @@ test('browser_navigate', async ({ client }) => {
|
||||
url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
|
||||
},
|
||||
})).toHaveTextContent(`
|
||||
Navigated to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// Navigate to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
await page.goto('data:text/html,<html><title>Title</title><body>Hello, world!</body></html>');
|
||||
\`\`\`
|
||||
|
||||
- Page URL: data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
- Page Title: Title
|
||||
@@ -50,7 +53,12 @@ test('browser_click', async ({ client }) => {
|
||||
element: 'Submit button',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toHaveTextContent(`Clicked "Submit button"
|
||||
})).toHaveTextContent(`
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// Click Submit button
|
||||
await page.getByRole('button', { name: 'Submit' }).click();
|
||||
\`\`\`
|
||||
|
||||
- Page URL: data:text/html,<html><title>Title</title><button>Submit</button></html>
|
||||
- Page Title: Title
|
||||
@@ -61,7 +69,6 @@ test('browser_click', async ({ client }) => {
|
||||
`);
|
||||
});
|
||||
|
||||
|
||||
test('browser_select_option', async ({ client }) => {
|
||||
await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
@@ -77,15 +84,20 @@ test('browser_select_option', async ({ client }) => {
|
||||
ref: 's1e3',
|
||||
values: ['bar'],
|
||||
},
|
||||
})).toHaveTextContent(`Selected option in "Select"
|
||||
})).toHaveTextContent(`
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// Select options [bar] in Select
|
||||
await page.getByRole('combobox').selectOption(['bar']);
|
||||
\`\`\`
|
||||
|
||||
- Page URL: data:text/html,<html><title>Title</title><select><option value="foo">Foo</option><option value="bar">Bar</option></select></html>
|
||||
- Page Title: Title
|
||||
- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- combobox [ref=s2e3]:
|
||||
- option "Foo" [ref=s2e4]
|
||||
- option "Bar" [selected] [ref=s2e5]
|
||||
- option "Foo" [ref=s2e4]
|
||||
- option "Bar" [selected] [ref=s2e5]
|
||||
\`\`\`
|
||||
`);
|
||||
});
|
||||
@@ -105,77 +117,25 @@ test('browser_select_option (multiple)', async ({ client }) => {
|
||||
ref: 's1e3',
|
||||
values: ['bar', 'baz'],
|
||||
},
|
||||
})).toHaveTextContent(`Selected option in "Select"
|
||||
})).toHaveTextContent(`
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// Select options [bar, baz] in Select
|
||||
await page.getByRole('listbox').selectOption(['bar', 'baz']);
|
||||
\`\`\`
|
||||
|
||||
- Page URL: data:text/html,<html><title>Title</title><select multiple><option value="foo">Foo</option><option value="bar">Bar</option><option value="baz">Baz</option></select></html>
|
||||
- Page Title: Title
|
||||
- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- listbox [ref=s2e3]:
|
||||
- option "Foo" [ref=s2e4]
|
||||
- option "Bar" [selected] [ref=s2e5]
|
||||
- option "Baz" [selected] [ref=s2e6]
|
||||
- option "Foo" [ref=s2e4]
|
||||
- option "Bar" [selected] [ref=s2e5]
|
||||
- option "Baz" [selected] [ref=s2e6]
|
||||
\`\`\`
|
||||
`);
|
||||
});
|
||||
|
||||
test('browser_file_upload', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- textbox [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
|
||||
|
||||
const filePath = test.info().outputPath('test.txt');
|
||||
await fs.writeFile(filePath, 'Hello, world!');
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_file_upload',
|
||||
arguments: {
|
||||
paths: [filePath],
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
|
||||
expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
|
||||
}
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's3e3',
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
|
||||
expect(response).toContainTextContent('button "Button" [ref=s4e4]');
|
||||
}
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's4e4',
|
||||
},
|
||||
});
|
||||
|
||||
expect(response, 'not submitting browser_file_upload dismisses file chooser').not.toContainTextContent('There is a file chooser visible that requires browser_file_upload to be called');
|
||||
}
|
||||
});
|
||||
|
||||
test('browser_type', async ({ client }) => {
|
||||
await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
@@ -192,14 +152,10 @@ test('browser_type', async ({ client }) => {
|
||||
submit: true,
|
||||
},
|
||||
});
|
||||
const resource = await client.readResource({
|
||||
uri: 'browser://console',
|
||||
});
|
||||
expect(resource.contents).toEqual([{
|
||||
uri: 'browser://console',
|
||||
mimeType: 'text/plain',
|
||||
text: '[LOG] Key pressed: Enter , Text: Hi!',
|
||||
}]);
|
||||
expect(await client.callTool({
|
||||
name: 'browser_console_messages',
|
||||
arguments: {},
|
||||
})).toHaveTextContent('[LOG] Key pressed: Enter , Text: Hi!');
|
||||
});
|
||||
|
||||
test('browser_type (slowly)', async ({ client }) => {
|
||||
@@ -219,19 +175,15 @@ test('browser_type (slowly)', async ({ client }) => {
|
||||
slowly: true,
|
||||
},
|
||||
});
|
||||
const resource = await client.readResource({
|
||||
uri: 'browser://console',
|
||||
});
|
||||
expect(resource.contents).toEqual([{
|
||||
uri: 'browser://console',
|
||||
mimeType: 'text/plain',
|
||||
text: [
|
||||
'[LOG] Key pressed: H Text: ',
|
||||
'[LOG] Key pressed: i Text: H',
|
||||
'[LOG] Key pressed: ! Text: Hi',
|
||||
'[LOG] Key pressed: Enter Text: Hi!',
|
||||
].join('\n'),
|
||||
}]);
|
||||
expect(await client.callTool({
|
||||
name: 'browser_console_messages',
|
||||
arguments: {},
|
||||
})).toHaveTextContent([
|
||||
'[LOG] Key pressed: H Text: ',
|
||||
'[LOG] Key pressed: i Text: H',
|
||||
'[LOG] Key pressed: ! Text: Hi',
|
||||
'[LOG] Key pressed: Enter Text: Hi!',
|
||||
].join('\n'));
|
||||
});
|
||||
|
||||
test('browser_resize', async ({ client }) => {
|
||||
@@ -249,6 +201,10 @@ test('browser_resize', async ({ client }) => {
|
||||
height: 780,
|
||||
},
|
||||
});
|
||||
expect(response).toContainTextContent('Resized browser window');
|
||||
expect(response).toContainTextContent('Window size: 390x780');
|
||||
expect(response).toContainTextContent(`- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// Resize browser window to 390x780
|
||||
await page.setViewportSize({ width: 390, height: 780 });
|
||||
\`\`\``);
|
||||
await expect.poll(() => client.callTool({ name: 'browser_snapshot' })).toContainTextContent('Window size: 390x780');
|
||||
});
|
||||
192
tests/dialogs.spec.ts
Normal file
192
tests/dialogs.spec.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { test, expect } from './fixtures';
|
||||
|
||||
// https://github.com/microsoft/playwright/issues/35663
|
||||
test.skip(({ mcpBrowser, mcpHeadless }) => mcpBrowser === 'webkit' && mcpHeadless);
|
||||
|
||||
test('alert dialog', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><button onclick="alert(\'Alert\')">Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- button "Button" [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toHaveTextContent(`- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// Click Button
|
||||
await page.getByRole('button', { name: 'Button' }).click();
|
||||
\`\`\`
|
||||
|
||||
### Modal state
|
||||
- ["alert" dialog with message "Alert"]: can be handled by the "browser_handle_dialog" tool`);
|
||||
|
||||
const result = await client.callTool({
|
||||
name: 'browser_handle_dialog',
|
||||
arguments: {
|
||||
accept: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result).not.toContainTextContent('### Modal state');
|
||||
expect(result).toHaveTextContent(`- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// <internal code to handle "alert" dialog>
|
||||
\`\`\`
|
||||
|
||||
- Page URL: data:text/html,<html><title>Title</title><button onclick="alert('Alert')">Button</button></html>
|
||||
- Page Title: Title
|
||||
- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- button "Button" [ref=s2e3]
|
||||
\`\`\`
|
||||
`);
|
||||
});
|
||||
|
||||
test('two alert dialogs', async ({ client }) => {
|
||||
test.fixme(true, 'Race between the dialog and ariaSnapshot');
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><button onclick="alert(\'Alert 1\');alert(\'Alert 2\');">Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- button "Button" [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toHaveTextContent(`- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// Click Button
|
||||
await page.getByRole('button', { name: 'Button' }).click();
|
||||
\`\`\`
|
||||
|
||||
### Modal state
|
||||
- ["alert" dialog with message "Alert 1"]: can be handled by the "browser_handle_dialog" tool`);
|
||||
|
||||
const result = await client.callTool({
|
||||
name: 'browser_handle_dialog',
|
||||
arguments: {
|
||||
accept: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result).not.toContainTextContent('### Modal state');
|
||||
});
|
||||
|
||||
test('confirm dialog (true)', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><button onclick="document.body.textContent = confirm(\'Confirm\')">Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- button "Button" [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toContainTextContent(`### Modal state
|
||||
- ["confirm" dialog with message "Confirm"]: can be handled by the "browser_handle_dialog" tool`);
|
||||
|
||||
const result = await client.callTool({
|
||||
name: 'browser_handle_dialog',
|
||||
arguments: {
|
||||
accept: true,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result).not.toContainTextContent('### Modal state');
|
||||
expect(result).toContainTextContent('// <internal code to handle "confirm" dialog>');
|
||||
expect(result).toContainTextContent(`- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- text: "true"
|
||||
\`\`\``);
|
||||
});
|
||||
|
||||
test('confirm dialog (false)', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><button onclick="document.body.textContent = confirm(\'Confirm\')">Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- button "Button" [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toContainTextContent(`### Modal state
|
||||
- ["confirm" dialog with message "Confirm"]: can be handled by the "browser_handle_dialog" tool`);
|
||||
|
||||
const result = await client.callTool({
|
||||
name: 'browser_handle_dialog',
|
||||
arguments: {
|
||||
accept: false,
|
||||
},
|
||||
});
|
||||
|
||||
expect(result).toContainTextContent(`- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- text: "false"
|
||||
\`\`\``);
|
||||
});
|
||||
|
||||
test('prompt dialog', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><button onclick="document.body.textContent = prompt(\'Prompt\')">Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- button "Button" [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toContainTextContent(`### Modal state
|
||||
- ["prompt" dialog with message "Prompt"]: can be handled by the "browser_handle_dialog" tool`);
|
||||
|
||||
const result = await client.callTool({
|
||||
name: 'browser_handle_dialog',
|
||||
arguments: {
|
||||
accept: true,
|
||||
promptText: 'Answer',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result).toContainTextContent(`- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- text: Answer
|
||||
\`\`\``);
|
||||
});
|
||||
77
tests/files.spec.ts
Normal file
77
tests/files.spec.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { test, expect } from './fixtures';
|
||||
import fs from 'fs/promises';
|
||||
|
||||
test('browser_file_upload', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><input type="file" /><button>Button</button></html>',
|
||||
},
|
||||
})).toContainTextContent('- textbox [ref=s1e3]');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toContainTextContent(`### Modal state
|
||||
- [File chooser]: can be handled by the "browser_file_upload" tool`);
|
||||
|
||||
const filePath = test.info().outputPath('test.txt');
|
||||
await fs.writeFile(filePath, 'Hello, world!');
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_file_upload',
|
||||
arguments: {
|
||||
paths: [filePath],
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).not.toContainTextContent('### Modal state');
|
||||
expect(response).toContainTextContent('textbox [ref=s3e3]: C:\\fakepath\\test.txt');
|
||||
}
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Textbox',
|
||||
ref: 's3e3',
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).toContainTextContent('- [File chooser]: can be handled by the \"browser_file_upload\" tool');
|
||||
}
|
||||
|
||||
{
|
||||
const response = await client.callTool({
|
||||
name: 'browser_click',
|
||||
arguments: {
|
||||
element: 'Button',
|
||||
ref: 's4e4',
|
||||
},
|
||||
});
|
||||
|
||||
expect(response).toContainTextContent(`Tool "browser_click" does not handle the modal state.
|
||||
### Modal state
|
||||
- [File chooser]: can be handled by the "browser_file_upload" tool`);
|
||||
}
|
||||
});
|
||||
@@ -22,7 +22,7 @@ import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'
|
||||
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
import { spawn } from 'child_process';
|
||||
|
||||
type Fixtures = {
|
||||
type TestFixtures = {
|
||||
client: Client;
|
||||
visionClient: Client;
|
||||
startClient: (options?: { args?: string[] }) => Promise<Client>;
|
||||
@@ -30,7 +30,12 @@ type Fixtures = {
|
||||
cdpEndpoint: string;
|
||||
};
|
||||
|
||||
export const test = baseTest.extend<Fixtures>({
|
||||
type WorkerFixtures = {
|
||||
mcpHeadless: boolean;
|
||||
mcpBrowser: string | undefined;
|
||||
};
|
||||
|
||||
export const test = baseTest.extend<TestFixtures, WorkerFixtures>({
|
||||
|
||||
client: async ({ startClient }, use) => {
|
||||
await use(await startClient());
|
||||
@@ -40,12 +45,16 @@ export const test = baseTest.extend<Fixtures>({
|
||||
await use(await startClient({ args: ['--vision'] }));
|
||||
},
|
||||
|
||||
startClient: async ({ }, use, testInfo) => {
|
||||
startClient: async ({ mcpHeadless, mcpBrowser }, use, testInfo) => {
|
||||
const userDataDir = testInfo.outputPath('user-data-dir');
|
||||
let client: StdioClientTransport | undefined;
|
||||
|
||||
use(async options => {
|
||||
const args = ['--headless', '--user-data-dir', userDataDir];
|
||||
const args = ['--user-data-dir', userDataDir];
|
||||
if (mcpHeadless)
|
||||
args.push('--headless');
|
||||
if (mcpBrowser)
|
||||
args.push(`--browser=${mcpBrowser}`);
|
||||
if (options?.args)
|
||||
args.push(...options.args);
|
||||
const transport = new StdioClientTransport({
|
||||
@@ -89,6 +98,12 @@ export const test = baseTest.extend<Fixtures>({
|
||||
await use(`http://localhost:${port}`);
|
||||
browserProcess.kill();
|
||||
},
|
||||
|
||||
mcpHeadless: [async ({ headless }, use) => {
|
||||
await use(headless);
|
||||
}, { scope: 'worker' }],
|
||||
|
||||
mcpBrowser: ['chromium', { option: true, scope: 'worker' }],
|
||||
});
|
||||
|
||||
type Response = Awaited<ReturnType<Client['callTool']>>;
|
||||
|
||||
@@ -26,12 +26,11 @@ test('stitched aria frames', async ({ client }) => {
|
||||
\`\`\`yaml
|
||||
- heading "Hello" [level=1] [ref=s1e3]
|
||||
- iframe [ref=s1e4]:
|
||||
- button "World" [ref=f1s1e3]
|
||||
- main [ref=f1s1e4]:
|
||||
- iframe [ref=f1s1e5]:
|
||||
- paragraph [ref=f2s1e3]: Nested
|
||||
\`\`\`
|
||||
`);
|
||||
- button "World" [ref=f1s1e3]
|
||||
- main [ref=f1s1e4]:
|
||||
- iframe [ref=f1s1e5]:
|
||||
- paragraph [ref=f2s1e3]: Nested
|
||||
\`\`\``);
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_click',
|
||||
@@ -39,5 +38,5 @@ test('stitched aria frames', async ({ client }) => {
|
||||
element: 'World',
|
||||
ref: 'f1s1e3',
|
||||
},
|
||||
})).toContainTextContent('Clicked "World"');
|
||||
})).toContainTextContent(`// Click World`);
|
||||
});
|
||||
|
||||
@@ -26,23 +26,14 @@ test('test reopen browser', async ({ client }) => {
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_close',
|
||||
})).toHaveTextContent('Page closed');
|
||||
})).toContainTextContent('No open pages available');
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
|
||||
},
|
||||
})).toHaveTextContent(`
|
||||
Navigated to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
|
||||
- Page URL: data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
- Page Title: Title
|
||||
- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- text: Hello, world!
|
||||
\`\`\`
|
||||
`);
|
||||
})).toContainTextContent(`- text: Hello, world!`);
|
||||
});
|
||||
|
||||
test('executable path', async ({ startClient }) => {
|
||||
|
||||
@@ -30,26 +30,17 @@ test('save as pdf unavailable', async ({ startClient }) => {
|
||||
})).toHaveTextContent(/Tool \"browser_pdf_save\" not found/);
|
||||
});
|
||||
|
||||
test('save as pdf', async ({ client }) => {
|
||||
test('save as pdf', async ({ client, mcpBrowser }) => {
|
||||
test.skip(!!mcpBrowser && !['chromium', 'chrome', 'msedge'].includes(mcpBrowser), 'Save as PDF is only supported in Chromium.');
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
|
||||
},
|
||||
})).toHaveTextContent(`
|
||||
Navigated to data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
|
||||
- Page URL: data:text/html,<html><title>Title</title><body>Hello, world!</body></html>
|
||||
- Page Title: Title
|
||||
- Page Snapshot
|
||||
\`\`\`yaml
|
||||
- text: Hello, world!
|
||||
\`\`\`
|
||||
`
|
||||
);
|
||||
})).toContainTextContent(`- text: Hello, world!`);
|
||||
|
||||
const response = await client.callTool({
|
||||
name: 'browser_pdf_save',
|
||||
});
|
||||
expect(response).toHaveTextContent(/^Saved as.*page-[^:]+.pdf$/);
|
||||
expect(response).toHaveTextContent(/Save page as.*page-[^:]+.pdf/);
|
||||
});
|
||||
|
||||
72
tests/screenshot.spec.ts
Normal file
72
tests/screenshot.spec.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import { test, expect } from './fixtures';
|
||||
|
||||
test('browser_take_screenshot (viewport)', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><body>Hello, world!</body></html>',
|
||||
},
|
||||
})).toContainTextContent(`Navigate to data:text/html`);
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_take_screenshot',
|
||||
arguments: {},
|
||||
})).toEqual({
|
||||
content: [
|
||||
{
|
||||
data: expect.any(String),
|
||||
mimeType: 'image/jpeg',
|
||||
type: 'image',
|
||||
},
|
||||
{
|
||||
text: expect.stringContaining(`Screenshot viewport and save it as`),
|
||||
type: 'text',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
test('browser_take_screenshot (element)', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_navigate',
|
||||
arguments: {
|
||||
url: 'data:text/html,<html><title>Title</title><button>Hello, world!</button></html>',
|
||||
},
|
||||
})).toContainTextContent(`[ref=s1e3]`);
|
||||
|
||||
expect(await client.callTool({
|
||||
name: 'browser_take_screenshot',
|
||||
arguments: {
|
||||
element: 'hello button',
|
||||
ref: 's1e3',
|
||||
},
|
||||
})).toEqual({
|
||||
content: [
|
||||
{
|
||||
data: expect.any(String),
|
||||
mimeType: 'image/jpeg',
|
||||
type: 'image',
|
||||
},
|
||||
{
|
||||
text: expect.stringContaining(`page.getByRole('button', { name: 'Hello, world!' }).screenshot`),
|
||||
type: 'text',
|
||||
},
|
||||
],
|
||||
});
|
||||
});
|
||||
@@ -29,13 +29,34 @@ async function createTab(client: Client, title: string, body: string) {
|
||||
});
|
||||
}
|
||||
|
||||
test('list initial tabs', async ({ client }) => {
|
||||
expect(await client.callTool({
|
||||
name: 'browser_tab_list',
|
||||
})).toHaveTextContent(`### Open tabs
|
||||
- 1: (current) [] (about:blank)`);
|
||||
});
|
||||
|
||||
test('list first tab', async ({ client }) => {
|
||||
await createTab(client, 'Tab one', 'Body one');
|
||||
expect(await client.callTool({
|
||||
name: 'browser_tab_list',
|
||||
})).toHaveTextContent(`### Open tabs
|
||||
- 1: [] (about:blank)
|
||||
- 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)`);
|
||||
});
|
||||
|
||||
test('create new tab', async ({ client }) => {
|
||||
expect(await createTab(client, 'Tab one', 'Body one')).toHaveTextContent(`
|
||||
Open tabs:
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// <internal code to open a new tab>
|
||||
\`\`\`
|
||||
|
||||
### Open tabs
|
||||
- 1: [] (about:blank)
|
||||
- 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)
|
||||
|
||||
Current tab:
|
||||
### Current tab
|
||||
- Page URL: data:text/html,<title>Tab one</title><body>Body one</body>
|
||||
- Page Title: Tab one
|
||||
- Page Snapshot
|
||||
@@ -44,12 +65,17 @@ Current tab:
|
||||
\`\`\``);
|
||||
|
||||
expect(await createTab(client, 'Tab two', 'Body two')).toHaveTextContent(`
|
||||
Open tabs:
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// <internal code to open a new tab>
|
||||
\`\`\`
|
||||
|
||||
### Open tabs
|
||||
- 1: [] (about:blank)
|
||||
- 2: [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)
|
||||
- 3: (current) [Tab two] (data:text/html,<title>Tab two</title><body>Body two</body>)
|
||||
|
||||
Current tab:
|
||||
### Current tab
|
||||
- Page URL: data:text/html,<title>Tab two</title><body>Body two</body>
|
||||
- Page Title: Tab two
|
||||
- Page Snapshot
|
||||
@@ -67,12 +93,17 @@ test('select tab', async ({ client }) => {
|
||||
index: 2,
|
||||
},
|
||||
})).toHaveTextContent(`
|
||||
Open tabs:
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// <internal code to select tab 2>
|
||||
\`\`\`
|
||||
|
||||
### Open tabs
|
||||
- 1: [] (about:blank)
|
||||
- 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)
|
||||
- 3: [Tab two] (data:text/html,<title>Tab two</title><body>Body two</body>)
|
||||
|
||||
Current tab:
|
||||
### Current tab
|
||||
- Page URL: data:text/html,<title>Tab one</title><body>Body one</body>
|
||||
- Page Title: Tab one
|
||||
- Page Snapshot
|
||||
@@ -90,11 +121,16 @@ test('close tab', async ({ client }) => {
|
||||
index: 3,
|
||||
},
|
||||
})).toHaveTextContent(`
|
||||
Open tabs:
|
||||
- Ran Playwright code:
|
||||
\`\`\`js
|
||||
// <internal code to close tab 3>
|
||||
\`\`\`
|
||||
|
||||
### Open tabs
|
||||
- 1: [] (about:blank)
|
||||
- 2: (current) [Tab one] (data:text/html,<title>Tab one</title><body>Body one</body>)
|
||||
|
||||
Current tab:
|
||||
### Current tab
|
||||
- Page URL: data:text/html,<title>Tab one</title><body>Body one</body>
|
||||
- Page Title: Tab one
|
||||
- Page Snapshot
|
||||
|
||||
178
utils/update-readme.js
Normal file
178
utils/update-readme.js
Normal file
@@ -0,0 +1,178 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Copyright (c) Microsoft Corporation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
// @ts-check
|
||||
|
||||
const fs = require('node:fs');
|
||||
const path = require('node:path');
|
||||
|
||||
const commonTools = require('../lib/tools/common').default;
|
||||
const consoleTools = require('../lib/tools/console').default;
|
||||
const dialogsTools = require('../lib/tools/dialogs').default;
|
||||
const filesTools = require('../lib/tools/files').default;
|
||||
const installTools = require('../lib/tools/install').default;
|
||||
const keyboardTools = require('../lib/tools/keyboard').default;
|
||||
const navigateTools = require('../lib/tools/navigate').default;
|
||||
const pdfTools = require('../lib/tools/pdf').default;
|
||||
const snapshotTools = require('../lib/tools/snapshot').default;
|
||||
const tabsTools = require('../lib/tools/tabs').default;
|
||||
const screenTools = require('../lib/tools/screen').default;
|
||||
|
||||
// Category definitions for tools
|
||||
const categories = {
|
||||
'Snapshot-based Interactions': [
|
||||
...snapshotTools,
|
||||
],
|
||||
'Vision-based Interactions': [
|
||||
...screenTools
|
||||
],
|
||||
'Tab Management': [
|
||||
...tabsTools(true),
|
||||
],
|
||||
'Navigation': [
|
||||
...navigateTools(true),
|
||||
],
|
||||
'Keyboard': [
|
||||
...keyboardTools(true)
|
||||
],
|
||||
'Console': [
|
||||
...consoleTools
|
||||
],
|
||||
'Files and Media': [
|
||||
...filesTools(true),
|
||||
...pdfTools
|
||||
],
|
||||
'Utilities': [
|
||||
...commonTools(true),
|
||||
...installTools,
|
||||
...dialogsTools(true),
|
||||
],
|
||||
};
|
||||
|
||||
const kStartMarker = `<!--- Generated by ${path.basename(__filename)} -->`;
|
||||
const kEndMarker = `<!--- End of generated section -->`;
|
||||
|
||||
/**
|
||||
* @param {ParsedToolSchema} tool
|
||||
* @returns {string}
|
||||
*/
|
||||
function formatToolForReadme(tool) {
|
||||
const lines = /** @type {string[]} */ ([]);
|
||||
lines.push(`<!-- NOTE: This has been generated via ${path.basename(__filename)} -->\n\n`);
|
||||
lines.push(`- **${tool.name}**\n`);
|
||||
lines.push(` - Description: ${tool.description}\n`);
|
||||
|
||||
if (tool.parameters && tool.parameters.length > 0) {
|
||||
lines.push(` - Parameters:\n`);
|
||||
tool.parameters.forEach(param => {
|
||||
const meta = /** @type {string[]} */ ([]);
|
||||
if (param.type)
|
||||
meta.push(param.type);
|
||||
if (param.optional)
|
||||
meta.push('optional');
|
||||
lines.push(` - \`${param.name}\` ${meta.length ? `(${meta.join(', ')})` : ''}: ${param.description}\n`);
|
||||
});
|
||||
} else {
|
||||
lines.push(` - Parameters: None\n`);
|
||||
}
|
||||
|
||||
lines.push('\n');
|
||||
return lines.join('');
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {{
|
||||
* name: any;
|
||||
* description: any;
|
||||
* parameters: {
|
||||
* name: string;
|
||||
* description: string;
|
||||
* optional: boolean;
|
||||
* type: string;
|
||||
* }[];
|
||||
*}} ParsedToolSchema
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {import('../src/tools/tool').ToolSchema} schema
|
||||
* @returns {ParsedToolSchema}
|
||||
*/
|
||||
function processToolSchema(schema) {
|
||||
const inputSchema = /** @type {import('zod-to-json-schema').JsonSchema7ObjectType} */ (schema.inputSchema || {});
|
||||
if (inputSchema.type !== 'object')
|
||||
throw new Error(`Tool ${schema.name} input schema is not an object`);
|
||||
|
||||
// In JSON Schema, properties are considered optional unless listed in the required array
|
||||
const requiredParams = inputSchema?.required || [];
|
||||
|
||||
const parameters = Object.entries(inputSchema.properties).map(([name, prop]) => {
|
||||
return {
|
||||
name,
|
||||
description: prop.description || '',
|
||||
optional: !requiredParams.includes(name),
|
||||
type: /** @type {any} */ (prop).type,
|
||||
};
|
||||
});
|
||||
|
||||
return {
|
||||
name: schema.name,
|
||||
description: schema.description,
|
||||
parameters
|
||||
};
|
||||
}
|
||||
|
||||
async function updateReadme() {
|
||||
console.log('Loading tool information from compiled modules...');
|
||||
|
||||
// Count the tools processed
|
||||
const totalTools = Object.values(categories).flat().length;
|
||||
console.log(`Found ${totalTools} tools`);
|
||||
|
||||
const generatedLines = /** @type {string[]} */ ([]);
|
||||
|
||||
for (const [category, categoryTools] of Object.entries(categories)) {
|
||||
generatedLines.push(`### ${category}\n\n`);
|
||||
for (const tool of categoryTools) {
|
||||
const scheme = processToolSchema(tool.schema);
|
||||
generatedLines.push(formatToolForReadme(scheme));
|
||||
}
|
||||
}
|
||||
|
||||
const readmePath = path.join(__dirname, '..', 'README.md');
|
||||
const readmeContent = await fs.promises.readFile(readmePath, 'utf-8');
|
||||
const startMarker = readmeContent.indexOf(kStartMarker);
|
||||
const endMarker = readmeContent.indexOf(kEndMarker);
|
||||
if (startMarker === -1 || endMarker === -1)
|
||||
throw new Error('Markers for generated section not found in README');
|
||||
|
||||
const newReadmeContent = [
|
||||
readmeContent.slice(0, startMarker),
|
||||
kStartMarker + '\n\n',
|
||||
generatedLines.join(''),
|
||||
kEndMarker,
|
||||
readmeContent.slice(endMarker + kEndMarker.length),
|
||||
].join('');
|
||||
|
||||
// Write updated README
|
||||
await fs.promises.writeFile(readmePath, newReadmeContent, 'utf-8');
|
||||
console.log('README updated successfully');
|
||||
}
|
||||
|
||||
// Run the update
|
||||
updateReadme().catch(err => {
|
||||
console.error('Error updating README:', err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user