diff --git a/.changeset/easy-toys-wash.md b/.changeset/easy-toys-wash.md new file mode 100644 index 00000000..05391705 --- /dev/null +++ b/.changeset/easy-toys-wash.md @@ -0,0 +1,7 @@ +--- +'task-master-ai': patch +--- + +- Adds support for the OpenRouter AI provider. Users can now configure models available through OpenRouter (requiring an `OPENROUTER_API_KEY`) via the `task-master models` command, granting access to a wide range of additional LLMs. +- IMPORTANT FYI ABOUT OPENROUTER: Taskmaster relies on AI SDK, which itself relies on tool use. It looks like **free** models sometimes do not include tool use. For example, Gemini 2.5 pro (free) failed via OpenRouter (no tool use) but worked fine on the paid version of the model. Custom model support for Open Router is considered experimental and likely will not be further improved for some time. + diff --git a/.taskmasterconfig b/.taskmasterconfig index 718ad6df..cacd529e 100644 --- a/.taskmasterconfig +++ b/.taskmasterconfig @@ -2,7 +2,7 @@ "models": { "main": { "provider": "openrouter", - "modelId": "meta-llama/llama-4-maverick:free", + "modelId": "google/gemini-2.5-pro-exp-03-25", "maxTokens": 100000, "temperature": 0.2 }, diff --git a/scripts/modules/ai-services-unified.js b/scripts/modules/ai-services-unified.js index 6995dd43..45fc5776 100644 --- a/scripts/modules/ai-services-unified.js +++ b/scripts/modules/ai-services-unified.js @@ -27,6 +27,7 @@ import * as perplexity from '../../src/ai-providers/perplexity.js'; import * as google from '../../src/ai-providers/google.js'; // Import Google provider import * as openai from '../../src/ai-providers/openai.js'; // ADD: Import OpenAI provider import * as xai from '../../src/ai-providers/xai.js'; // ADD: Import xAI provider +import * as openrouter from '../../src/ai-providers/openrouter.js'; // ADD: Import OpenRouter provider // TODO: Import other provider modules when implemented (ollama, etc.) // --- Provider Function Map --- @@ -61,6 +62,12 @@ const PROVIDER_FUNCTIONS = { generateText: xai.generateXaiText, streamText: xai.streamXaiText, generateObject: xai.generateXaiObject // Note: Object generation might be unsupported + }, + openrouter: { + // ADD: OpenRouter entry + generateText: openrouter.generateOpenRouterText, + streamText: openrouter.streamOpenRouterText, + generateObject: openrouter.generateOpenRouterObject } // TODO: Add entries for ollama, etc. when implemented }; @@ -148,7 +155,7 @@ function _resolveApiKey(providerName, session) { perplexity: 'PERPLEXITY_API_KEY', mistral: 'MISTRAL_API_KEY', azure: 'AZURE_OPENAI_API_KEY', - openrouter: 'OPENROUTER_API_KEY', + openrouter: 'OPENROUTER_API_KEY', // ADD OpenRouter key xai: 'XAI_API_KEY' }; @@ -410,11 +417,34 @@ async function _unifiedServiceRunner(serviceType, params) { const cleanMessage = _extractErrorMessage(error); // Extract clean message log( 'error', // Log as error since this role attempt failed - `Service call failed for role ${currentRole} (Provider: ${providerName || 'unknown'}): ${cleanMessage}` // Log the clean message + `Service call failed for role ${currentRole} (Provider: ${providerName || 'unknown'}, Model: ${modelId || 'unknown'}): ${cleanMessage}` // Log the clean message ); lastError = error; // Store the original error for potential debugging lastCleanErrorMessage = cleanMessage; // Store the clean message for final throw - // Continue to the next role in the sequence + + // --- ADDED: Specific check for tool use error in generateObject --- + if (serviceType === 'generateObject') { + const lowerCaseMessage = cleanMessage.toLowerCase(); + // Check for specific error messages indicating lack of tool support + if ( + lowerCaseMessage.includes( + 'no endpoints found that support tool use' + ) || + lowerCaseMessage.includes('does not support tool_use') || + lowerCaseMessage.includes('tool use is not supported') || + lowerCaseMessage.includes('tools are not supported') || + lowerCaseMessage.includes('function calling is not supported') + ) { + const specificErrorMsg = `Model '${modelId || 'unknown'}' via provider '${providerName || 'unknown'}' does not support the 'tool use' required by generateObjectService. Please configure a model that supports tool/function calling for the '${currentRole}' role, or use generateTextService if structured output is not strictly required.`; + log('error', `[Tool Support Error] ${specificErrorMsg}`); + // Throw a more specific error immediately, breaking the fallback loop for this specific issue. + // Using a generic Error for simplicity, could use a custom ConfigurationError. + throw new Error(specificErrorMsg); + } + } + // --- END ADDED --- + + // Continue to the next role in the sequence if it wasn't a specific tool support error } } diff --git a/scripts/modules/supported-models.json b/scripts/modules/supported-models.json index 9003cf04..a16fee33 100644 --- a/scripts/modules/supported-models.json +++ b/scripts/modules/supported-models.json @@ -356,34 +356,6 @@ "allowed_roles": ["main", "fallback"], "max_tokens": 1048576 }, - { - "id": "meta-llama/llama-4-maverick:free", - "swe_score": 0, - "cost_per_1m_tokens": { "input": 0, "output": 0 }, - "allowed_roles": ["main", "fallback"], - "max_tokens": 256000 - }, - { - "id": "meta-llama/llama-4-maverick", - "swe_score": 0, - "cost_per_1m_tokens": { "input": 0.17, "output": 0.6 }, - "allowed_roles": ["main", "fallback"], - "max_tokens": 1048576 - }, - { - "id": "meta-llama/llama-4-scout:free", - "swe_score": 0, - "cost_per_1m_tokens": { "input": 0, "output": 0 }, - "allowed_roles": ["main", "fallback"], - "max_tokens": 512000 - }, - { - "id": "meta-llama/llama-4-scout", - "swe_score": 0, - "cost_per_1m_tokens": { "input": 0.08, "output": 0.3 }, - "allowed_roles": ["main", "fallback"], - "max_tokens": 1048576 - }, { "id": "google/gemma-3-12b-it:free", "swe_score": 0, diff --git a/src/ai-providers/openrouter.js b/src/ai-providers/openrouter.js new file mode 100644 index 00000000..594d208c --- /dev/null +++ b/src/ai-providers/openrouter.js @@ -0,0 +1,165 @@ +import { createOpenRouter } from '@openrouter/ai-sdk-provider'; +import { generateText, streamText, generateObject } from 'ai'; +import { log } from '../../scripts/modules/utils.js'; // Assuming utils.js is in scripts/modules + +/** + * Generates text using an OpenRouter chat model. + * + * @param {object} params - Parameters for the text generation. + * @param {string} params.apiKey - OpenRouter API key. + * @param {string} params.modelId - The OpenRouter model ID (e.g., 'anthropic/claude-3.5-sonnet'). + * @param {Array} params.messages - Array of message objects (system, user, assistant). + * @param {number} [params.maxTokens] - Maximum tokens to generate. + * @param {number} [params.temperature] - Sampling temperature. + * @returns {Promise} The generated text content. + * @throws {Error} If the API call fails. + */ +async function generateOpenRouterText({ + apiKey, + modelId, + messages, + maxTokens, + temperature, + ...rest // Capture any other Vercel AI SDK compatible parameters +}) { + if (!apiKey) throw new Error('OpenRouter API key is required.'); + if (!modelId) throw new Error('OpenRouter model ID is required.'); + if (!messages || messages.length === 0) + throw new Error('Messages array cannot be empty.'); + + try { + const openrouter = createOpenRouter({ apiKey }); + const model = openrouter.chat(modelId); // Assuming chat model + + const { text } = await generateText({ + model, + messages, + maxTokens, + temperature, + ...rest // Pass any additional parameters + }); + return text; + } catch (error) { + log( + 'error', + `OpenRouter generateText failed for model ${modelId}: ${error.message}` + ); + // Re-throw the error for the unified layer to handle retries/fallbacks + throw error; + } +} + +/** + * Streams text using an OpenRouter chat model. + * + * @param {object} params - Parameters for the text streaming. + * @param {string} params.apiKey - OpenRouter API key. + * @param {string} params.modelId - The OpenRouter model ID (e.g., 'anthropic/claude-3.5-sonnet'). + * @param {Array} params.messages - Array of message objects (system, user, assistant). + * @param {number} [params.maxTokens] - Maximum tokens to generate. + * @param {number} [params.temperature] - Sampling temperature. + * @returns {Promise>} A readable stream of text deltas. + * @throws {Error} If the API call fails. + */ +async function streamOpenRouterText({ + apiKey, + modelId, + messages, + maxTokens, + temperature, + ...rest +}) { + if (!apiKey) throw new Error('OpenRouter API key is required.'); + if (!modelId) throw new Error('OpenRouter model ID is required.'); + if (!messages || messages.length === 0) + throw new Error('Messages array cannot be empty.'); + + try { + const openrouter = createOpenRouter({ apiKey }); + const model = openrouter.chat(modelId); + + // Directly return the stream from the Vercel AI SDK function + const stream = await streamText({ + model, + messages, + maxTokens, + temperature, + ...rest + }); + return stream; + } catch (error) { + log( + 'error', + `OpenRouter streamText failed for model ${modelId}: ${error.message}` + ); + throw error; + } +} + +/** + * Generates a structured object using an OpenRouter chat model. + * + * @param {object} params - Parameters for object generation. + * @param {string} params.apiKey - OpenRouter API key. + * @param {string} params.modelId - The OpenRouter model ID. + * @param {import('zod').ZodSchema} params.schema - The Zod schema for the expected object. + * @param {Array} params.messages - Array of message objects. + * @param {string} [params.objectName='generated_object'] - Name for object/tool. + * @param {number} [params.maxRetries=3] - Max retries for object generation. + * @param {number} [params.maxTokens] - Maximum tokens. + * @param {number} [params.temperature] - Temperature. + * @returns {Promise} The generated object matching the schema. + * @throws {Error} If the API call fails or validation fails. + */ +async function generateOpenRouterObject({ + apiKey, + modelId, + schema, + messages, + objectName = 'generated_object', + maxRetries = 3, + maxTokens, + temperature, + ...rest +}) { + if (!apiKey) throw new Error('OpenRouter API key is required.'); + if (!modelId) throw new Error('OpenRouter model ID is required.'); + if (!schema) throw new Error('Zod schema is required for object generation.'); + if (!messages || messages.length === 0) + throw new Error('Messages array cannot be empty.'); + + try { + const openrouter = createOpenRouter({ apiKey }); + const model = openrouter.chat(modelId); + + const { object } = await generateObject({ + model, + schema, + mode: 'tool', // Standard mode for most object generation + tool: { + // Define the tool based on the schema + name: objectName, + description: `Generate an object conforming to the ${objectName} schema.`, + parameters: schema + }, + messages, + maxTokens, + temperature, + maxRetries, // Pass maxRetries if supported by generateObject + ...rest + }); + return object; + } catch (error) { + log( + 'error', + `OpenRouter generateObject failed for model ${modelId}: ${error.message}` + ); + throw error; + } +} + +export { + generateOpenRouterText, + streamOpenRouterText, + generateOpenRouterObject +}; diff --git a/tasks/task_061.txt b/tasks/task_061.txt index 561e6dad..84ec2dc1 100644 --- a/tasks/task_061.txt +++ b/tasks/task_061.txt @@ -1779,7 +1779,7 @@ export async function generateGoogleObject({ ### Details: -## 28. Implement `openrouter.js` Provider Module [in-progress] +## 28. Implement `openrouter.js` Provider Module [done] ### Dependencies: None ### Description: Create and implement the `openrouter.js` module within `src/ai-providers/`. This module should contain functions to interact with various models via OpenRouter using the **`@openrouter/ai-sdk-provider` library**, adhering to the standardized input/output format defined for `ai-services-unified.js`. Note the specific library used. ### Details: diff --git a/tasks/task_074.txt b/tasks/task_074.txt new file mode 100644 index 00000000..263412bf --- /dev/null +++ b/tasks/task_074.txt @@ -0,0 +1,36 @@ +# Task ID: 74 +# Title: Task 74: Implement Local Kokoro TTS Support +# Status: pending +# Dependencies: None +# Priority: medium +# Description: Integrate Text-to-Speech (TTS) functionality using a locally running Google Cloud Text-to-Speech (Kokoro) instance, enabling the application to synthesize speech from text. +# Details: +Implementation Details: +1. **Kokoro Setup:** Assume the user has a local Kokoro TTS instance running and accessible via a network address (e.g., http://localhost:port). +2. **Configuration:** Introduce new configuration options (e.g., in `.taskmasterconfig`) to enable/disable TTS, specify the provider ('kokoro_local'), and configure the Kokoro endpoint URL (`tts.kokoro.url`). Consider adding options for voice selection and language if the Kokoro API supports them. +3. **API Interaction:** Implement a client module to interact with the local Kokoro TTS API. This module should handle sending text input and receiving audio data (likely in formats like WAV or MP3). +4. **Audio Playback:** Integrate a cross-platform audio playback library (e.g., `playsound`, `simpleaudio`, or platform-specific APIs) to play the synthesized audio received from Kokoro. +5. **Integration Point:** Identify initial areas in the application where TTS will be used (e.g., a command to read out the current task's title and description). Design the integration to be extensible for future use cases. +6. **Error Handling:** Implement robust error handling for scenarios like: Kokoro instance unreachable, API errors during synthesis, invalid configuration, audio playback failures. Provide informative feedback to the user. +7. **Dependencies:** Add any necessary HTTP client or audio playback libraries as project dependencies. + +# Test Strategy: +1. **Unit Tests:** + * Mock the Kokoro API client. Verify that the TTS module correctly formats requests based on input text and configuration. + * Test handling of successful API responses (parsing audio data placeholder). + * Test handling of various API error responses (e.g., 404, 500). + * Mock the audio playback library. Verify that the received audio data is passed correctly to the playback function. + * Test configuration loading and validation logic. +2. **Integration Tests:** + * Requires a running local Kokoro TTS instance (or a compatible mock server). + * Send actual text snippets through the TTS module to the local Kokoro instance. + * Verify that valid audio data is received (e.g., check format, non-zero size). Direct audio playback verification might be difficult in automated tests, focus on the data transfer. + * Test the end-to-end flow by triggering TTS from an application command and ensuring no exceptions occur during synthesis and playback initiation. + * Test error handling by attempting synthesis with the Kokoro instance stopped or misconfigured. +3. **Manual Testing:** + * Configure the application to point to a running local Kokoro instance. + * Trigger TTS for various text inputs (short, long, special characters). + * Verify that the audio is played back clearly and accurately reflects the input text. + * Test enabling/disabling TTS via configuration. + * Test behavior when the Kokoro endpoint is incorrect or the server is down. + * Verify performance and responsiveness. diff --git a/tasks/tasks.json b/tasks/tasks.json index 42ea4a61..597c482a 100644 --- a/tasks/tasks.json +++ b/tasks/tasks.json @@ -3288,7 +3288,7 @@ "title": "Implement `openrouter.js` Provider Module", "description": "Create and implement the `openrouter.js` module within `src/ai-providers/`. This module should contain functions to interact with various models via OpenRouter using the **`@openrouter/ai-sdk-provider` library**, adhering to the standardized input/output format defined for `ai-services-unified.js`. Note the specific library used.", "details": "", - "status": "in-progress", + "status": "done", "dependencies": [], "parentTaskId": 61 }, @@ -3920,6 +3920,17 @@ "dependencies": [], "priority": "medium", "subtasks": [] + }, + { + "id": 74, + "title": "Task 74: Implement Local Kokoro TTS Support", + "description": "Integrate Text-to-Speech (TTS) functionality using a locally running Google Cloud Text-to-Speech (Kokoro) instance, enabling the application to synthesize speech from text.", + "details": "Implementation Details:\n1. **Kokoro Setup:** Assume the user has a local Kokoro TTS instance running and accessible via a network address (e.g., http://localhost:port).\n2. **Configuration:** Introduce new configuration options (e.g., in `.taskmasterconfig`) to enable/disable TTS, specify the provider ('kokoro_local'), and configure the Kokoro endpoint URL (`tts.kokoro.url`). Consider adding options for voice selection and language if the Kokoro API supports them.\n3. **API Interaction:** Implement a client module to interact with the local Kokoro TTS API. This module should handle sending text input and receiving audio data (likely in formats like WAV or MP3).\n4. **Audio Playback:** Integrate a cross-platform audio playback library (e.g., `playsound`, `simpleaudio`, or platform-specific APIs) to play the synthesized audio received from Kokoro.\n5. **Integration Point:** Identify initial areas in the application where TTS will be used (e.g., a command to read out the current task's title and description). Design the integration to be extensible for future use cases.\n6. **Error Handling:** Implement robust error handling for scenarios like: Kokoro instance unreachable, API errors during synthesis, invalid configuration, audio playback failures. Provide informative feedback to the user.\n7. **Dependencies:** Add any necessary HTTP client or audio playback libraries as project dependencies.", + "testStrategy": "1. **Unit Tests:** \n * Mock the Kokoro API client. Verify that the TTS module correctly formats requests based on input text and configuration.\n * Test handling of successful API responses (parsing audio data placeholder).\n * Test handling of various API error responses (e.g., 404, 500).\n * Mock the audio playback library. Verify that the received audio data is passed correctly to the playback function.\n * Test configuration loading and validation logic.\n2. **Integration Tests:**\n * Requires a running local Kokoro TTS instance (or a compatible mock server).\n * Send actual text snippets through the TTS module to the local Kokoro instance.\n * Verify that valid audio data is received (e.g., check format, non-zero size). Direct audio playback verification might be difficult in automated tests, focus on the data transfer.\n * Test the end-to-end flow by triggering TTS from an application command and ensuring no exceptions occur during synthesis and playback initiation.\n * Test error handling by attempting synthesis with the Kokoro instance stopped or misconfigured.\n3. **Manual Testing:**\n * Configure the application to point to a running local Kokoro instance.\n * Trigger TTS for various text inputs (short, long, special characters).\n * Verify that the audio is played back clearly and accurately reflects the input text.\n * Test enabling/disabling TTS via configuration.\n * Test behavior when the Kokoro endpoint is incorrect or the server is down.\n * Verify performance and responsiveness.", + "status": "pending", + "dependencies": [], + "priority": "medium", + "subtasks": [] } ] } \ No newline at end of file