feat: browser_choose_file (#52)

Resolves https://github.com/microsoft/playwright-mcp/issues/31.

Prompt used for testing: 
```
upload test.txt to dropbox
```

This won't work for asynchronously opened filechoosers, but let's start
with the synchronous variant. I also tested with including the file
chooser open state in the snapshot, but that didn't give better results.

I also tested with OneDrive and WeTransfer, but somehow our ARIA
snapshots for those pages are missing some elements that are crucial to
locate the upload buttons.
This commit is contained in:
Simon Knott
2025-03-27 20:49:57 +01:00
committed by GitHub
parent f033213618
commit 5e200405e5
6 changed files with 152 additions and 10 deletions

View File

@@ -23,6 +23,7 @@ export class Context {
private _page: playwright.Page | undefined;
private _console: playwright.ConsoleMessage[] = [];
private _createPagePromise: Promise<playwright.Page> | undefined;
private _fileChooser: playwright.FileChooser | undefined;
private _lastSnapshotFrames: playwright.FrameLocator[] = [];
constructor(userDataDir: string, launchOptions?: playwright.LaunchOptions) {
@@ -41,6 +42,7 @@ export class Context {
this._console.length = 0;
});
page.on('close', () => this._onPageClose());
page.on('filechooser', chooser => this._fileChooser = chooser);
page.setDefaultNavigationTimeout(60000);
page.setDefaultTimeout(5000);
this._page = page;
@@ -58,6 +60,7 @@ export class Context {
this._createPagePromise = undefined;
this._browser = undefined;
this._page = undefined;
this._fileChooser = undefined;
this._console.length = 0;
}
@@ -77,6 +80,21 @@ export class Context {
await this._page.close();
}
async submitFileChooser(paths: string[]) {
if (!this._fileChooser)
throw new Error('No file chooser visible');
await this._fileChooser.setFiles(paths);
this._fileChooser = undefined;
}
hasFileChooser() {
return !!this._fileChooser;
}
clearFileChooser() {
this._fileChooser = undefined;
}
private async _createPage(): Promise<{ browser?: playwright.Browser, page: playwright.Page }> {
if (process.env.PLAYWRIGHT_WS_ENDPOINT) {
const url = new URL(process.env.PLAYWRIGHT_WS_ENDPOINT);

View File

@@ -36,6 +36,7 @@ const snapshotTools: Tool[] = [
common.navigate(true),
common.goBack(true),
common.goForward(true),
common.chooseFile(true),
snapshot.snapshot,
snapshot.click,
snapshot.hover,
@@ -49,6 +50,7 @@ const screenshotTools: Tool[] = [
common.navigate(false),
common.goBack(false),
common.goForward(false),
common.chooseFile(false),
screenshot.screenshot,
screenshot.moveMouse,
screenshot.click,

View File

@@ -156,3 +156,21 @@ export const close: Tool = {
};
},
};
const chooseFileSchema = z.object({
paths: z.array(z.string()).describe('The absolute paths to the files to upload. Can be a single file or multiple files.'),
});
export const chooseFile: ToolFactory = snapshot => ({
schema: {
name: 'browser_choose_file',
description: 'Choose one or multiple files to upload',
inputSchema: zodToJsonSchema(chooseFileSchema),
},
handle: async (context, params) => {
const validatedParams = chooseFileSchema.parse(params);
return await runAndWait(context, `Chose files ${validatedParams.paths.join(', ')}`, async () => {
await context.submitFileChooser(validatedParams.paths);
}, snapshot);
},
});

View File

@@ -73,23 +73,36 @@ async function waitForCompletion<R>(page: playwright.Page, callback: () => Promi
export async function runAndWait(context: Context, status: string, callback: (page: playwright.Page) => Promise<any>, snapshot: boolean = false): Promise<ToolResult> {
const page = context.existingPage();
const dismissFileChooser = context.hasFileChooser();
await waitForCompletion(page, () => callback(page));
return snapshot ? captureAriaSnapshot(context, status) : {
if (dismissFileChooser)
context.clearFileChooser();
const result: ToolResult = snapshot ? await captureAriaSnapshot(context, status) : {
content: [{ type: 'text', text: status }],
};
return result;
}
export async function captureAriaSnapshot(context: Context, status: string = ''): Promise<ToolResult> {
const page = context.existingPage();
const lines = [];
if (status)
lines.push(`${status}`);
lines.push(
'',
`- Page URL: ${page.url()}`,
`- Page Title: ${await page.title()}`
);
if (context.hasFileChooser())
lines.push(`- There is a file chooser visible that requires browser_choose_file to be called`);
lines.push(
`- Page Snapshot`,
'```yaml',
await context.allFramesSnapshot(),
'```',
''
);
return {
content: [{ type: 'text', text: `${status ? `${status}\n` : ''}
- Page URL: ${page.url()}
- Page Title: ${await page.title()}
- Page Snapshot
\`\`\`yaml
${await context.allFramesSnapshot()}
\`\`\`
`
}],
content: [{ type: 'text', text: lines.join('\n') }],
};
}