chore: extract loop tools into a separate folder (#755)

This commit is contained in:
Pavel Feldman
2025-07-24 16:22:03 -07:00
committed by GitHub
parent e153ac3b7c
commit ecfa10448b
11 changed files with 296 additions and 155 deletions

View File

@@ -41,15 +41,16 @@ export type LLMConversation = {
};
export interface LLMDelegate {
createConversation(task: string, tools: Tool[]): LLMConversation;
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation;
makeApiCall(conversation: LLMConversation): Promise<LLMToolCall[]>;
addToolResults(conversation: LLMConversation, results: Array<{ toolCallId: string; content: string; isError?: boolean }>): void;
checkDoneToolCall(toolCall: LLMToolCall): string | null;
}
export async function runTask(delegate: LLMDelegate, client: Client, task: string): Promise<string> {
export async function runTask(delegate: LLMDelegate, client: Client, task: string, oneShot: boolean = false): Promise<string> {
const { tools } = await client.listTools();
const conversation = delegate.createConversation(task, tools);
const taskContent = oneShot ? `Perform following task: ${task}.` : `Perform following task: ${task}. Once the task is complete, call the "done" tool.`;
const conversation = delegate.createConversation(taskContent, tools, oneShot);
for (let iteration = 0; iteration < 5; ++iteration) {
debug('history')('Making API call for iteration', iteration);
@@ -99,8 +100,10 @@ export async function runTask(delegate: LLMDelegate, client: Client, task: strin
}
}
// Add tool results to conversation
delegate.addToolResults(conversation, toolResults);
if (oneShot)
return toolResults.map(result => result.content).join('\n');
else
delegate.addToolResults(conversation, toolResults);
}
throw new Error('Failed to perform step, max attempts reached');

View File

@@ -14,38 +14,48 @@
* limitations under the License.
*/
import Anthropic from '@anthropic-ai/sdk';
import type Anthropic from '@anthropic-ai/sdk';
import type { LLMDelegate, LLMConversation, LLMToolCall, LLMTool } from './loop.js';
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
const model = 'claude-sonnet-4-20250514';
export class ClaudeDelegate implements LLMDelegate {
private anthropic = new Anthropic();
private _anthropic: Anthropic | undefined;
createConversation(task: string, tools: Tool[]): LLMConversation {
async anthropic(): Promise<Anthropic> {
if (!this._anthropic) {
const anthropic = await import('@anthropic-ai/sdk');
this._anthropic = new anthropic.Anthropic();
}
return this._anthropic;
}
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation {
const llmTools: LLMTool[] = tools.map(tool => ({
name: tool.name,
description: tool.description || '',
inputSchema: tool.inputSchema,
}));
// Add the "done" tool
llmTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task.' },
if (!oneShot) {
llmTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task.' },
},
required: ['result'],
},
},
});
});
}
return {
messages: [{
role: 'user',
content: `Perform following task: ${task}. Once the task is complete, call the "done" tool.`
content: task
}],
tools: llmTools,
};
@@ -119,7 +129,8 @@ export class ClaudeDelegate implements LLMDelegate {
input_schema: tool.inputSchema,
}));
const response = await this.anthropic.messages.create({
const anthropic = await this.anthropic();
const response = await anthropic.messages.create({
model,
max_tokens: 10000,
messages: claudeMessages,

View File

@@ -14,39 +14,48 @@
* limitations under the License.
*/
import OpenAI from 'openai';
import type OpenAI from 'openai';
import type { LLMDelegate, LLMConversation, LLMToolCall, LLMTool } from './loop.js';
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
const model = 'gpt-4.1';
export class OpenAIDelegate implements LLMDelegate {
private openai = new OpenAI();
private _openai: OpenAI | undefined;
createConversation(task: string, tools: Tool[]): LLMConversation {
async openai(): Promise<OpenAI> {
if (!this._openai) {
const oai = await import('openai');
this._openai = new oai.OpenAI();
}
return this._openai;
}
createConversation(task: string, tools: Tool[], oneShot: boolean): LLMConversation {
const genericTools: LLMTool[] = tools.map(tool => ({
name: tool.name,
description: tool.description || '',
inputSchema: tool.inputSchema,
}));
// Add the "done" tool
genericTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task.' },
if (!oneShot) {
genericTools.push({
name: 'done',
description: 'Call this tool when the task is complete.',
inputSchema: {
type: 'object',
properties: {
result: { type: 'string', description: 'The result of the task.' },
},
required: ['result'],
},
required: ['result'],
},
});
});
}
return {
messages: [{
role: 'user',
content: `Peform following task: ${task}. Once the task is complete, call the "done" tool.`
content: task
}],
tools: genericTools,
};
@@ -108,7 +117,8 @@ export class OpenAIDelegate implements LLMDelegate {
},
}));
const response = await this.openai.chat.completions.create({
const openai = await this.openai();
const response = await openai.chat.completions.create({
model,
messages: openaiMessages,
tools: openaiTools,

View File

@@ -1,85 +0,0 @@
/**
* Copyright (c) Microsoft Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import dotenv from 'dotenv';
import { z } from 'zod';
import { contextFactory } from '../browserContextFactory.js';
import { BrowserServerBackend } from '../browserServerBackend.js';
import { Context } from '../context.js';
import { logUnhandledError } from '../log.js';
import { InProcessTransport } from '../mcp/inProcessTransport.js';
import * as mcpServer from '../mcp/server.js';
import * as mcpTransport from '../mcp/transport.js';
import { packageJSON } from '../package.js';
import { runTask } from './loop.js';
import { OpenAIDelegate } from './loopOpenAI.js';
import type { FullConfig } from '../config.js';
import type { ServerBackend } from '../mcp/server.js';
const oneToolSchema: mcpServer.ToolSchema<any> = {
name: 'browser',
title: 'Perform a task with the browser',
description: 'Perform a task with the browser. It can click, type, export, capture screenshot, drag, hover, select options, etc.',
inputSchema: z.object({
task: z.string().describe('The task to perform with the browser'),
}),
type: 'readOnly',
};
export async function runOneTool(config: FullConfig) {
dotenv.config();
const serverBackendFactory = () => new OneToolServerBackend(config);
await mcpTransport.start(serverBackendFactory, config.server);
}
class OneToolServerBackend implements ServerBackend {
readonly name = 'Playwright';
readonly version = packageJSON.version;
private _innerClient: Client | undefined;
private _config: FullConfig;
constructor(config: FullConfig) {
this._config = config;
}
async initialize() {
const client = new Client({ name: 'Playwright Proxy', version: '1.0.0' });
const browserContextFactory = contextFactory(this._config.browser);
const server = mcpServer.createServer(new BrowserServerBackend(this._config, browserContextFactory));
await client.connect(new InProcessTransport(server));
await client.ping();
this._innerClient = client;
}
tools(): mcpServer.ToolSchema<any>[] {
return [oneToolSchema];
}
async callTool(schema: mcpServer.ToolSchema<any>, parsedArguments: any): Promise<mcpServer.ToolResponse> {
const delegate = new OpenAIDelegate();
const result = await runTask(delegate, this._innerClient!, parsedArguments.task as string);
return {
content: [{ type: 'text', text: result }],
};
}
serverClosed() {
void Context.disposeAll().catch(logUnhandledError);
}
}