# Task ID: 85 # Title: Update ai-services-unified.js for dynamic token limits # Status: pending # Dependencies: 83, 84 # Priority: medium # Description: Modify the _unifiedServiceRunner function in ai-services-unified.js to use the new token counting utility and dynamically adjust output token limits based on input length. # Details: 1. Import the token counter in `ai-services-unified.js`: ```javascript const { countTokens } = require('./token-counter'); const { getParametersForRole, getModelCapabilities } = require('./config-manager'); ``` 2. Update the `_unifiedServiceRunner` function to implement dynamic token limit adjustment: ```javascript async function _unifiedServiceRunner({ serviceType, provider, modelId, systemPrompt, prompt, temperature, currentRole, effectiveProjectRoot, // ... other parameters }) { // Get role parameters with new token limits const roleParams = getParametersForRole(currentRole, effectiveProjectRoot); // Get model capabilities const modelCapabilities = getModelCapabilities(provider, modelId); // Count tokens in the prompts const systemPromptTokens = countTokens(systemPrompt, provider, modelId); const userPromptTokens = countTokens(prompt, provider, modelId); const totalPromptTokens = systemPromptTokens + userPromptTokens; // Validate against input token limits if (totalPromptTokens > roleParams.maxInputTokens) { throw new Error( `Prompt (${totalPromptTokens} tokens) exceeds configured max input tokens (${roleParams.maxInputTokens}) for role '${currentRole}'.` ); } // Validate against model's absolute context window if (modelCapabilities.contextWindowTokens && totalPromptTokens > modelCapabilities.contextWindowTokens) { throw new Error( `Prompt (${totalPromptTokens} tokens) exceeds model's context window (${modelCapabilities.contextWindowTokens}) for ${modelId}.` ); } // Calculate available output tokens // If model has a combined context window, we need to subtract input tokens let availableOutputTokens = roleParams.maxOutputTokens; // If model has a context window constraint, ensure we don't exceed it if (modelCapabilities.contextWindowTokens) { const remainingContextTokens = modelCapabilities.contextWindowTokens - totalPromptTokens; availableOutputTokens = Math.min(availableOutputTokens, remainingContextTokens); } // Also respect the model's absolute max output limit if (modelCapabilities.maxOutputTokens) { availableOutputTokens = Math.min(availableOutputTokens, modelCapabilities.maxOutputTokens); } // Prepare API call parameters const callParams = { apiKey, modelId, maxTokens: availableOutputTokens, // Use dynamically calculated output limit temperature: roleParams.temperature, messages, baseUrl, ...(serviceType === 'generateObject' && { schema, objectName }), ...restApiParams }; // Log token usage information console.debug(`Token usage: ${totalPromptTokens} input tokens, ${availableOutputTokens} max output tokens`); // Rest of the function remains the same... } ``` 3. Update the error handling to provide clear messages about token limits: ```javascript try { // Existing code... } catch (error) { if (error.message.includes('tokens')) { // Token-related errors should be clearly identified console.error(`Token limit error: ${error.message}`); } throw error; } ``` # Test Strategy: 1. Test with prompts of various lengths to verify dynamic adjustment 2. Test with different models to ensure model-specific limits are respected 3. Verify error messages are clear when limits are exceeded 4. Test edge cases: very short prompts, prompts near the limit 5. Integration test with actual API calls to verify the calculated limits work in practice