claude-task-master/.taskmaster/tasks/task_085.txt

# Task ID: 85
# Title: Update ai-services-unified.js for dynamic token limits
# Status: pending
# Dependencies: 83, 84
# Priority: medium
# Description: Modify the _unifiedServiceRunner function in ai-services-unified.js to use the new token counting utility and dynamically adjust output token limits based on input length.
# Details:
1. Import the token counter in `ai-services-unified.js`:
```javascript
const { countTokens } = require('./token-counter');
const { getParametersForRole, getModelCapabilities } = require('./config-manager');
```

2. Update the `_unifiedServiceRunner` function to implement dynamic token limit adjustment:
```javascript
async function _unifiedServiceRunner({
  serviceType,
  provider,
  modelId,
  systemPrompt,
  prompt,
  temperature,
  currentRole,
  effectiveProjectRoot,
  // ... other parameters
}) {
  // Get role parameters with new token limits
  const roleParams = getParametersForRole(currentRole, effectiveProjectRoot);

  // Get model capabilities
  const modelCapabilities = getModelCapabilities(provider, modelId);

  // Count tokens in the prompts
  const systemPromptTokens = countTokens(systemPrompt, provider, modelId);
  const userPromptTokens = countTokens(prompt, provider, modelId);
  const totalPromptTokens = systemPromptTokens + userPromptTokens;

  // Validate against input token limits
  if (totalPromptTokens > roleParams.maxInputTokens) {
    throw new Error(
      `Prompt (${totalPromptTokens} tokens) exceeds configured max input tokens (${roleParams.maxInputTokens}) for role '${currentRole}'.`
    );
  }

  // Validate against model's absolute context window
  if (modelCapabilities.contextWindowTokens && totalPromptTokens > modelCapabilities.contextWindowTokens) {
    throw new Error(
      `Prompt (${totalPromptTokens} tokens) exceeds model's context window (${modelCapabilities.contextWindowTokens}) for ${modelId}.`
    );
  }

  // Calculate available output tokens
  // If model has a combined context window, we need to subtract input tokens
  let availableOutputTokens = roleParams.maxOutputTokens;

  // If model has a context window constraint, ensure we don't exceed it
  if (modelCapabilities.contextWindowTokens) {
    const remainingContextTokens = modelCapabilities.contextWindowTokens - totalPromptTokens;
    availableOutputTokens = Math.min(availableOutputTokens, remainingContextTokens);
  }

  // Also respect the model's absolute max output limit
  if (modelCapabilities.maxOutputTokens) {
    availableOutputTokens = Math.min(availableOutputTokens, modelCapabilities.maxOutputTokens);
  }

  // Prepare API call parameters
  const callParams = {
    apiKey,
    modelId,
    maxTokens: availableOutputTokens, // Use dynamically calculated output limit
    temperature: roleParams.temperature,
    messages,
    baseUrl,
    ...(serviceType === 'generateObject' && { schema, objectName }),
    ...restApiParams
  };

  // Log token usage information
  console.debug(`Token usage: ${totalPromptTokens} input tokens, ${availableOutputTokens} max output tokens`);

  // Rest of the function remains the same...
}
```

3. Update the error handling to provide clear messages about token limits:
```javascript
try {
  // Existing code...
} catch (error) {
  if (error.message.includes('tokens')) {
    // Token-related errors should be clearly identified
    console.error(`Token limit error: ${error.message}`);
  }
  throw error;
}
```

# Test Strategy:
1. Test with prompts of various lengths to verify dynamic adjustment
2. Test with different models to ensure model-specific limits are respected
3. Verify error messages are clear when limits are exceeded
4. Test edge cases: very short prompts, prompts near the limit
5. Integration test with actual API calls to verify the calculated limits work in practice