fix(ai): Correctly imports generateText in openai.js, adds specific cause and reason for OpenRouter failures in the openrouter.js catch, performs complexity analysis on all tm tasks, adds new tasks to further improve the maxTokens to take input and output maximum into account. Adjusts default fallback max tokens so 3.5 does not fail.

This commit is contained in:
Eyal Toledano
2025-05-17 18:42:57 -04:00
parent 8a3b611fc2
commit 026815353f
12 changed files with 1364 additions and 304 deletions

104
tasks/task_085.txt Normal file
View File

@@ -0,0 +1,104 @@
# Task ID: 85
# Title: Update ai-services-unified.js for dynamic token limits
# Status: pending
# Dependencies: 83, 84
# Priority: medium
# Description: Modify the _unifiedServiceRunner function in ai-services-unified.js to use the new token counting utility and dynamically adjust output token limits based on input length.
# Details:
1. Import the token counter in `ai-services-unified.js`:
```javascript
const { countTokens } = require('./token-counter');
const { getParametersForRole, getModelCapabilities } = require('./config-manager');
```
2. Update the `_unifiedServiceRunner` function to implement dynamic token limit adjustment:
```javascript
async function _unifiedServiceRunner({
serviceType,
provider,
modelId,
systemPrompt,
prompt,
temperature,
currentRole,
effectiveProjectRoot,
// ... other parameters
}) {
// Get role parameters with new token limits
const roleParams = getParametersForRole(currentRole, effectiveProjectRoot);
// Get model capabilities
const modelCapabilities = getModelCapabilities(provider, modelId);
// Count tokens in the prompts
const systemPromptTokens = countTokens(systemPrompt, provider, modelId);
const userPromptTokens = countTokens(prompt, provider, modelId);
const totalPromptTokens = systemPromptTokens + userPromptTokens;
// Validate against input token limits
if (totalPromptTokens > roleParams.maxInputTokens) {
throw new Error(
`Prompt (${totalPromptTokens} tokens) exceeds configured max input tokens (${roleParams.maxInputTokens}) for role '${currentRole}'.`
);
}
// Validate against model's absolute context window
if (modelCapabilities.contextWindowTokens && totalPromptTokens > modelCapabilities.contextWindowTokens) {
throw new Error(
`Prompt (${totalPromptTokens} tokens) exceeds model's context window (${modelCapabilities.contextWindowTokens}) for ${modelId}.`
);
}
// Calculate available output tokens
// If model has a combined context window, we need to subtract input tokens
let availableOutputTokens = roleParams.maxOutputTokens;
// If model has a context window constraint, ensure we don't exceed it
if (modelCapabilities.contextWindowTokens) {
const remainingContextTokens = modelCapabilities.contextWindowTokens - totalPromptTokens;
availableOutputTokens = Math.min(availableOutputTokens, remainingContextTokens);
}
// Also respect the model's absolute max output limit
if (modelCapabilities.maxOutputTokens) {
availableOutputTokens = Math.min(availableOutputTokens, modelCapabilities.maxOutputTokens);
}
// Prepare API call parameters
const callParams = {
apiKey,
modelId,
maxTokens: availableOutputTokens, // Use dynamically calculated output limit
temperature: roleParams.temperature,
messages,
baseUrl,
...(serviceType === 'generateObject' && { schema, objectName }),
...restApiParams
};
// Log token usage information
console.debug(`Token usage: ${totalPromptTokens} input tokens, ${availableOutputTokens} max output tokens`);
// Rest of the function remains the same...
}
```
3. Update the error handling to provide clear messages about token limits:
```javascript
try {
// Existing code...
} catch (error) {
if (error.message.includes('tokens')) {
// Token-related errors should be clearly identified
console.error(`Token limit error: ${error.message}`);
}
throw error;
}
```
# Test Strategy:
1. Test with prompts of various lengths to verify dynamic adjustment
2. Test with different models to ensure model-specific limits are respected
3. Verify error messages are clear when limits are exceeded
4. Test edge cases: very short prompts, prompts near the limit
5. Integration test with actual API calls to verify the calculated limits work in practice