105 lines
3.7 KiB
Plaintext
105 lines
3.7 KiB
Plaintext
# Task ID: 85
|
|
# Title: Update ai-services-unified.js for dynamic token limits
|
|
# Status: pending
|
|
# Dependencies: 83, 84
|
|
# Priority: medium
|
|
# Description: Modify the _unifiedServiceRunner function in ai-services-unified.js to use the new token counting utility and dynamically adjust output token limits based on input length.
|
|
# Details:
|
|
1. Import the token counter in `ai-services-unified.js`:
|
|
```javascript
|
|
const { countTokens } = require('./token-counter');
|
|
const { getParametersForRole, getModelCapabilities } = require('./config-manager');
|
|
```
|
|
|
|
2. Update the `_unifiedServiceRunner` function to implement dynamic token limit adjustment:
|
|
```javascript
|
|
async function _unifiedServiceRunner({
|
|
serviceType,
|
|
provider,
|
|
modelId,
|
|
systemPrompt,
|
|
prompt,
|
|
temperature,
|
|
currentRole,
|
|
effectiveProjectRoot,
|
|
// ... other parameters
|
|
}) {
|
|
// Get role parameters with new token limits
|
|
const roleParams = getParametersForRole(currentRole, effectiveProjectRoot);
|
|
|
|
// Get model capabilities
|
|
const modelCapabilities = getModelCapabilities(provider, modelId);
|
|
|
|
// Count tokens in the prompts
|
|
const systemPromptTokens = countTokens(systemPrompt, provider, modelId);
|
|
const userPromptTokens = countTokens(prompt, provider, modelId);
|
|
const totalPromptTokens = systemPromptTokens + userPromptTokens;
|
|
|
|
// Validate against input token limits
|
|
if (totalPromptTokens > roleParams.maxInputTokens) {
|
|
throw new Error(
|
|
`Prompt (${totalPromptTokens} tokens) exceeds configured max input tokens (${roleParams.maxInputTokens}) for role '${currentRole}'.`
|
|
);
|
|
}
|
|
|
|
// Validate against model's absolute context window
|
|
if (modelCapabilities.contextWindowTokens && totalPromptTokens > modelCapabilities.contextWindowTokens) {
|
|
throw new Error(
|
|
`Prompt (${totalPromptTokens} tokens) exceeds model's context window (${modelCapabilities.contextWindowTokens}) for ${modelId}.`
|
|
);
|
|
}
|
|
|
|
// Calculate available output tokens
|
|
// If model has a combined context window, we need to subtract input tokens
|
|
let availableOutputTokens = roleParams.maxOutputTokens;
|
|
|
|
// If model has a context window constraint, ensure we don't exceed it
|
|
if (modelCapabilities.contextWindowTokens) {
|
|
const remainingContextTokens = modelCapabilities.contextWindowTokens - totalPromptTokens;
|
|
availableOutputTokens = Math.min(availableOutputTokens, remainingContextTokens);
|
|
}
|
|
|
|
// Also respect the model's absolute max output limit
|
|
if (modelCapabilities.maxOutputTokens) {
|
|
availableOutputTokens = Math.min(availableOutputTokens, modelCapabilities.maxOutputTokens);
|
|
}
|
|
|
|
// Prepare API call parameters
|
|
const callParams = {
|
|
apiKey,
|
|
modelId,
|
|
maxTokens: availableOutputTokens, // Use dynamically calculated output limit
|
|
temperature: roleParams.temperature,
|
|
messages,
|
|
baseUrl,
|
|
...(serviceType === 'generateObject' && { schema, objectName }),
|
|
...restApiParams
|
|
};
|
|
|
|
// Log token usage information
|
|
console.debug(`Token usage: ${totalPromptTokens} input tokens, ${availableOutputTokens} max output tokens`);
|
|
|
|
// Rest of the function remains the same...
|
|
}
|
|
```
|
|
|
|
3. Update the error handling to provide clear messages about token limits:
|
|
```javascript
|
|
try {
|
|
// Existing code...
|
|
} catch (error) {
|
|
if (error.message.includes('tokens')) {
|
|
// Token-related errors should be clearly identified
|
|
console.error(`Token limit error: ${error.message}`);
|
|
}
|
|
throw error;
|
|
}
|
|
```
|
|
|
|
# Test Strategy:
|
|
1. Test with prompts of various lengths to verify dynamic adjustment
|
|
2. Test with different models to ensure model-specific limits are respected
|
|
3. Verify error messages are clear when limits are exceeded
|
|
4. Test edge cases: very short prompts, prompts near the limit
|
|
5. Integration test with actual API calls to verify the calculated limits work in practice
|