Refactor: Improve MCP logging, update E2E & tests
Refactors MCP server logging and updates testing infrastructure.
- MCP Server:
- Replaced manual logger wrappers with centralized `createLogWrapper` utility.
- Updated direct function calls to use `{ session, mcpLog }` context.
- Removed deprecated `model` parameter from analyze, expand-all, expand-task tools.
- Adjusted MCP tool import paths and parameter descriptions.
- Documentation:
- Modified `docs/configuration.md`.
- Modified `docs/tutorial.md`.
- Testing:
- E2E Script (`run_e2e.sh`):
- Removed `set -e`.
- Added LLM analysis function (`analyze_log_with_llm`) & integration.
- Adjusted test run directory creation timing.
- Added debug echo statements.
- Deleted Unit Tests: Removed `ai-client-factory.test.js`, `ai-client-utils.test.js`, `ai-services.test.js`.
- Modified Fixtures: Updated `scripts/task-complexity-report.json`.
- Dev Scripts:
- Modified `scripts/dev.js`.
This commit is contained in:
@@ -1,7 +1,5 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status.
|
||||
set -e
|
||||
# Treat unset variables as an error when substituting.
|
||||
set -u
|
||||
# Prevent errors in pipelines from being masked.
|
||||
@@ -33,6 +31,11 @@ mkdir -p "$LOG_DIR"
|
||||
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
||||
LOG_FILE="$LOG_DIR/e2e_run_$TIMESTAMP.log"
|
||||
|
||||
# Define and create the test run directory *before* the main pipe
|
||||
mkdir -p "$BASE_TEST_DIR" # Ensure base exists first
|
||||
TEST_RUN_DIR="$BASE_TEST_DIR/run_$TIMESTAMP"
|
||||
mkdir -p "$TEST_RUN_DIR"
|
||||
|
||||
# Echo starting message to the original terminal BEFORE the main piped block
|
||||
echo "Starting E2E test. Output will be shown here and saved to: $LOG_FILE"
|
||||
echo "Running from directory: $(pwd)"
|
||||
@@ -82,6 +85,125 @@ overall_start_time=$(date +%s)
|
||||
echo " STEP ${test_step_count}: [$(_get_elapsed_time_for_log)] $(date +"%Y-%m-%d %H:%M:%S") $1"
|
||||
echo "============================================="
|
||||
}
|
||||
|
||||
analyze_log_with_llm() {
|
||||
local log_file="$1"
|
||||
local provider_summary_log="provider_add_task_summary.log" # File summarizing provider test outcomes
|
||||
local api_key=""
|
||||
local api_endpoint="https://api.anthropic.com/v1/messages"
|
||||
local api_key_name="CLAUDE_API_KEY"
|
||||
|
||||
echo "" # Add a newline before analysis starts
|
||||
log_info "Attempting LLM analysis of log: $log_file"
|
||||
|
||||
# Check for jq and curl
|
||||
if ! command -v jq &> /dev/null; then
|
||||
log_error "LLM Analysis requires 'jq'. Skipping analysis."
|
||||
return 1
|
||||
fi
|
||||
if ! command -v curl &> /dev/null; then
|
||||
log_error "LLM Analysis requires 'curl'. Skipping analysis."
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Check for API Key in the TEST_RUN_DIR/.env (copied earlier)
|
||||
if [ -f ".env" ]; then
|
||||
# Using grep and sed for better handling of potential quotes/spaces
|
||||
api_key=$(grep "^${api_key_name}=" .env | sed -e "s/^${api_key_name}=//" -e 's/^[[:space:]"]*//' -e 's/[[:space:]"]*$//')
|
||||
fi
|
||||
|
||||
if [ -z "$api_key" ]; then
|
||||
log_error "${api_key_name} not found or empty in .env file in the test run directory ($(pwd)/.env). Skipping LLM analysis."
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$log_file" ]; then
|
||||
log_error "Log file not found: $log_file. Skipping LLM analysis."
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_info "Reading log file content..."
|
||||
local log_content
|
||||
# Read entire file, handle potential errors
|
||||
log_content=$(cat "$log_file") || {
|
||||
log_error "Failed to read log file: $log_file. Skipping LLM analysis."
|
||||
return 1
|
||||
}
|
||||
|
||||
# Prepare the prompt
|
||||
# Using printf with %s for the log content is generally safer than direct variable expansion
|
||||
local prompt_template='Analyze the following E2E test log for the task-master tool. The log contains output from various '\''task-master'\'' commands executed sequentially.\n\nYour goal is to:\n1. Verify if the key E2E steps completed successfully based on the log messages (e.g., init, parse PRD, list tasks, analyze complexity, expand task, set status, manage models, add/remove dependencies, add/update/remove tasks/subtasks, generate files).\n2. **Specifically analyze the Multi-Provider Add-Task Test Sequence:**\n a. Identify which providers were tested for `add-task`. Look for log steps like "Testing Add-Task with Provider: ..." and the summary log `'"$provider_summary_log"'`.\n b. For each tested provider, determine if `add-task` succeeded or failed. Note the created task ID if successful.\n c. Review the corresponding `add_task_show_output_<provider>_id_<id>.log` file (if created) for each successful `add-task` execution.\n d. **Compare the quality and completeness** of the task generated by each successful provider based on their `show` output. Assign a score (e.g., 1-10, 10 being best) based on relevance to the prompt, detail level, and correctness.\n e. Note any providers where `add-task` failed or where the task ID could not be extracted.\n3. Identify any general explicit "[ERROR]" messages or stack traces throughout the *entire* log.\n4. Identify any potential warnings or unusual output that might indicate a problem even if not marked as an explicit error.\n5. Provide an overall assessment of the test run'\''s health based *only* on the log content.\n\nReturn your analysis **strictly** in the following JSON format. Do not include any text outside of the JSON structure:\n\n{\n "overall_status": "Success|Failure|Warning",\n "verified_steps": [ "Initialization", "PRD Parsing", /* ...other general steps observed... */ ],\n "provider_add_task_comparison": {\n "prompt_used": "... (extract from log if possible or state 'standard auth prompt') ...",\n "provider_results": {\n "anthropic": { "status": "Success|Failure|ID_Extraction_Failed|Set_Model_Failed", "task_id": "...", "score": "X/10 | N/A", "notes": "..." },\n "openai": { "status": "Success|Failure|...", "task_id": "...", "score": "X/10 | N/A", "notes": "..." },\n /* ... include all tested providers ... */\n },\n "comparison_summary": "Brief overall comparison of generated tasks..."\n },\n "detected_issues": [ { "severity": "Error|Warning|Anomaly", "description": "...", "log_context": "[Optional, short snippet from log near the issue]" } ],\n "llm_summary_points": [ "Overall summary point 1", "Provider comparison highlight", "Any major issues noted" ]\n}\n\nHere is the main log content:\n\n%s'
|
||||
|
||||
local full_prompt
|
||||
printf -v full_prompt "$prompt_template" "$log_content"
|
||||
|
||||
# Construct the JSON payload for Claude Messages API
|
||||
# Using jq for robust JSON construction
|
||||
local payload
|
||||
payload=$(jq -n --arg prompt "$full_prompt" '{
|
||||
"model": "claude-3-7-sonnet-20250219",
|
||||
"max_tokens": 10000,
|
||||
"messages": [
|
||||
{"role": "user", "content": $prompt}
|
||||
],
|
||||
"temperature": 0.0
|
||||
}') || {
|
||||
log_error "Failed to create JSON payload using jq."
|
||||
return 1
|
||||
}
|
||||
|
||||
log_info "Sending request to LLM API endpoint: $api_endpoint ..."
|
||||
local response_raw response_http_code response_body
|
||||
# Capture body and HTTP status code separately
|
||||
response_raw=$(curl -s -w "\nHTTP_STATUS_CODE:%{http_code}" -X POST "$api_endpoint" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "x-api-key: $api_key" \
|
||||
-H "anthropic-version: 2023-06-01" \
|
||||
--data "$payload")
|
||||
|
||||
# Extract status code and body
|
||||
response_http_code=$(echo "$response_raw" | grep '^HTTP_STATUS_CODE:' | sed 's/HTTP_STATUS_CODE://')
|
||||
response_body=$(echo "$response_raw" | sed '$d') # Remove last line (status code)
|
||||
|
||||
if [ "$response_http_code" != "200" ]; then
|
||||
log_error "LLM API call failed with HTTP status $response_http_code."
|
||||
log_error "Response Body: $response_body"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [ -z "$response_body" ]; then
|
||||
log_error "LLM API call returned empty response body."
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_info "Received LLM response (HTTP 200). Parsing analysis JSON..."
|
||||
|
||||
# Extract the analysis JSON string from the API response (adjust jq path if needed)
|
||||
local analysis_json_string
|
||||
analysis_json_string=$(echo "$response_body" | jq -r '.content[0].text' 2>/dev/null) # Assumes Messages API structure
|
||||
|
||||
if [ -z "$analysis_json_string" ]; then
|
||||
log_error "Failed to extract 'content[0].text' from LLM response JSON."
|
||||
log_error "Full API response body: $response_body"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Validate and pretty-print the extracted JSON
|
||||
if ! echo "$analysis_json_string" | jq -e . > /dev/null 2>&1; then
|
||||
log_error "Extracted content from LLM is not valid JSON."
|
||||
log_error "Raw extracted content: $analysis_json_string"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_success "LLM analysis completed successfully."
|
||||
echo ""
|
||||
echo "--- LLM Analysis ---"
|
||||
# Pretty print the JSON analysis
|
||||
echo "$analysis_json_string" | jq '.'
|
||||
echo "--------------------"
|
||||
|
||||
return 0
|
||||
}
|
||||
# ---
|
||||
|
||||
# --- Test Setup (Output to tee) ---
|
||||
@@ -95,12 +217,9 @@ overall_start_time=$(date +%s)
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$BASE_TEST_DIR"
|
||||
log_info "Ensured base test directory exists: $BASE_TEST_DIR"
|
||||
|
||||
TEST_RUN_DIR="$BASE_TEST_DIR/run_$TIMESTAMP"
|
||||
mkdir -p "$TEST_RUN_DIR"
|
||||
log_info "Created test run directory: $TEST_RUN_DIR"
|
||||
log_info "Using test run directory (created earlier): $TEST_RUN_DIR"
|
||||
|
||||
# Check if source .env file exists
|
||||
if [ ! -f "$MAIN_ENV_FILE" ]; then
|
||||
@@ -209,8 +328,103 @@ overall_start_time=$(date +%s)
|
||||
log_step "Checking final model configuration"
|
||||
task-master models > models_final_config.log
|
||||
log_success "Final model config saved to models_final_config.log"
|
||||
|
||||
log_step "Resetting main model to default (Claude Sonnet) before provider tests"
|
||||
task-master models --set-main claude-3-7-sonnet-20250219
|
||||
log_success "Main model reset to claude-3-7-sonnet-20250219."
|
||||
|
||||
# === End Model Commands Test ===
|
||||
|
||||
# === Multi-Provider Add-Task Test ===
|
||||
log_step "Starting Multi-Provider Add-Task Test Sequence"
|
||||
|
||||
# Define providers, models, and flags
|
||||
# Array order matters: providers[i] corresponds to models[i] and flags[i]
|
||||
declare -a providers=("anthropic" "openai" "google" "perplexity" "xai" "openrouter")
|
||||
declare -a models=(
|
||||
"claude-3-7-sonnet-20250219"
|
||||
"gpt-4o"
|
||||
"gemini-2.5-pro-exp-03-25"
|
||||
"sonar-pro"
|
||||
"grok-3"
|
||||
"anthropic/claude-3.7-sonnet" # OpenRouter uses Claude 3.7
|
||||
)
|
||||
# Flags: Add provider-specific flags here, e.g., --openrouter. Use empty string if none.
|
||||
declare -a flags=("" "" "" "" "" "--openrouter")
|
||||
|
||||
# Consistent prompt for all providers
|
||||
add_task_prompt="Create a task to implement user authentication using OAuth 2.0 with Google as the provider. Include steps for registering the app, handling the callback, and storing user sessions."
|
||||
log_info "Using consistent prompt for add-task tests: \"$add_task_prompt\""
|
||||
|
||||
for i in "${!providers[@]}"; do
|
||||
provider="${providers[$i]}"
|
||||
model="${models[$i]}"
|
||||
flag="${flags[$i]}"
|
||||
|
||||
log_step "Testing Add-Task with Provider: $provider (Model: $model)"
|
||||
|
||||
# 1. Set the main model for this provider
|
||||
log_info "Setting main model to $model for $provider ${flag:+using flag $flag}..."
|
||||
set_model_cmd="task-master models --set-main \"$model\" $flag"
|
||||
echo "Executing: $set_model_cmd"
|
||||
if eval $set_model_cmd; then
|
||||
log_success "Successfully set main model for $provider."
|
||||
else
|
||||
log_error "Failed to set main model for $provider. Skipping add-task for this provider."
|
||||
# Optionally save failure info here if needed for LLM analysis
|
||||
echo "Provider $provider set-main FAILED" >> provider_add_task_summary.log
|
||||
continue # Skip to the next provider
|
||||
fi
|
||||
|
||||
# 2. Run add-task
|
||||
log_info "Running add-task with prompt..."
|
||||
add_task_output_file="add_task_raw_output_${provider}.log"
|
||||
# Run add-task and capture ALL output (stdout & stderr) to a file AND a variable
|
||||
add_task_cmd_output=$(task-master add-task --prompt "$add_task_prompt" 2>&1 | tee "$add_task_output_file")
|
||||
add_task_exit_code=${PIPESTATUS[0]}
|
||||
|
||||
# 3. Check for success and extract task ID
|
||||
new_task_id=""
|
||||
if [ $add_task_exit_code -eq 0 ] && echo "$add_task_cmd_output" | grep -q "Successfully added task with ID:"; then
|
||||
# Attempt to extract the ID (adjust grep/sed/awk as needed based on actual output format)
|
||||
new_task_id=$(echo "$add_task_cmd_output" | grep "Successfully added task with ID:" | sed 's/.*Successfully added task with ID: \([0-9.]\+\).*/\1/')
|
||||
if [ -n "$new_task_id" ]; then
|
||||
log_success "Add-task succeeded for $provider. New task ID: $new_task_id"
|
||||
echo "Provider $provider add-task SUCCESS (ID: $new_task_id)" >> provider_add_task_summary.log
|
||||
else
|
||||
# Succeeded but couldn't parse ID - treat as warning/anomaly
|
||||
log_error "Add-task command succeeded for $provider, but failed to extract task ID from output."
|
||||
echo "Provider $provider add-task SUCCESS (ID extraction FAILED)" >> provider_add_task_summary.log
|
||||
new_task_id="UNKNOWN_ID_EXTRACTION_FAILED"
|
||||
fi
|
||||
else
|
||||
log_error "Add-task command failed for $provider (Exit Code: $add_task_exit_code). See $add_task_output_file for details."
|
||||
echo "Provider $provider add-task FAILED (Exit Code: $add_task_exit_code)" >> provider_add_task_summary.log
|
||||
new_task_id="FAILED"
|
||||
fi
|
||||
|
||||
# 4. Run task show if ID was obtained (even if extraction failed, use placeholder)
|
||||
if [ "$new_task_id" != "FAILED" ] && [ "$new_task_id" != "UNKNOWN_ID_EXTRACTION_FAILED" ]; then
|
||||
log_info "Running task show for new task ID: $new_task_id"
|
||||
show_output_file="add_task_show_output_${provider}_id_${new_task_id}.log"
|
||||
if task-master show "$new_task_id" > "$show_output_file"; then
|
||||
log_success "Task show output saved to $show_output_file"
|
||||
else
|
||||
log_error "task show command failed for ID $new_task_id. Check log."
|
||||
# Still keep the file, it might contain error output
|
||||
fi
|
||||
elif [ "$new_task_id" == "UNKNOWN_ID_EXTRACTION_FAILED" ]; then
|
||||
log_info "Skipping task show for $provider due to ID extraction failure."
|
||||
else
|
||||
log_info "Skipping task show for $provider due to add-task failure."
|
||||
fi
|
||||
|
||||
done # End of provider loop
|
||||
|
||||
log_step "Finished Multi-Provider Add-Task Test Sequence"
|
||||
echo "Provider add-task summary log available at: provider_add_task_summary.log"
|
||||
# === End Multi-Provider Add-Task Test ===
|
||||
|
||||
log_step "Listing tasks again (final)"
|
||||
task-master list --with-subtasks > task_list_final.log
|
||||
log_success "Final task list saved to task_list_final.log"
|
||||
@@ -386,4 +600,26 @@ else
|
||||
fi
|
||||
echo "-------------------------"
|
||||
|
||||
# --- Attempt LLM Analysis ---
|
||||
echo "DEBUG: Entering LLM Analysis section..."
|
||||
# Run this *after* the main execution block and tee pipe finish writing the log file
|
||||
# It will read the completed log file and append its output to the terminal (and the log via subsequent writes if tee is still active, though it shouldn't be)
|
||||
# Change directory back into the test run dir where .env is located
|
||||
if [ -d "$TEST_RUN_DIR" ]; then
|
||||
echo "DEBUG: Found TEST_RUN_DIR: $TEST_RUN_DIR. Attempting cd..."
|
||||
cd "$TEST_RUN_DIR"
|
||||
echo "DEBUG: Changed directory to $(pwd). Calling analyze_log_with_llm..."
|
||||
analyze_log_with_llm "$LOG_FILE"
|
||||
echo "DEBUG: analyze_log_with_llm function call finished."
|
||||
# Optional: cd back again if needed, though script is ending
|
||||
# cd "$ORIGINAL_DIR"
|
||||
else
|
||||
# Use log_error format even outside the pipe for consistency
|
||||
current_time_for_error=$(date +%s)
|
||||
elapsed_seconds_for_error=$((current_time_for_error - overall_start_time)) # Use overall start time
|
||||
formatted_duration_for_error=$(_format_duration "$elapsed_seconds_for_error")
|
||||
echo "[ERROR] [$formatted_duration_for_error] $(date +"%Y-%m-%d %H:%M:%S") Test run directory $TEST_RUN_DIR not found. Cannot perform LLM analysis." >&2
|
||||
fi
|
||||
|
||||
echo "DEBUG: Reached end of script before final exit."
|
||||
exit $EXIT_CODE # Exit with the status of the main script block
|
||||
@@ -1,550 +0,0 @@
|
||||
import { jest } from '@jest/globals';
|
||||
import path from 'path'; // Needed for mocking fs
|
||||
|
||||
// --- Mock Vercel AI SDK Modules ---
|
||||
// Mock implementations - they just need to be callable and return a basic object
|
||||
const mockCreateOpenAI = jest.fn(() => ({ provider: 'openai', type: 'mock' }));
|
||||
const mockCreateAnthropic = jest.fn(() => ({
|
||||
provider: 'anthropic',
|
||||
type: 'mock'
|
||||
}));
|
||||
const mockCreateGoogle = jest.fn(() => ({ provider: 'google', type: 'mock' }));
|
||||
const mockCreatePerplexity = jest.fn(() => ({
|
||||
provider: 'perplexity',
|
||||
type: 'mock'
|
||||
}));
|
||||
const mockCreateOllama = jest.fn(() => ({ provider: 'ollama', type: 'mock' }));
|
||||
const mockCreateMistral = jest.fn(() => ({
|
||||
provider: 'mistral',
|
||||
type: 'mock'
|
||||
}));
|
||||
const mockCreateAzure = jest.fn(() => ({ provider: 'azure', type: 'mock' }));
|
||||
const mockCreateXai = jest.fn(() => ({ provider: 'xai', type: 'mock' }));
|
||||
// jest.unstable_mockModule('@ai-sdk/grok', () => ({
|
||||
// createGrok: mockCreateGrok
|
||||
// }));
|
||||
const mockCreateOpenRouter = jest.fn(() => ({
|
||||
provider: 'openrouter',
|
||||
type: 'mock'
|
||||
}));
|
||||
|
||||
jest.unstable_mockModule('@ai-sdk/openai', () => ({
|
||||
createOpenAI: mockCreateOpenAI
|
||||
}));
|
||||
jest.unstable_mockModule('@ai-sdk/anthropic', () => ({
|
||||
createAnthropic: mockCreateAnthropic
|
||||
}));
|
||||
jest.unstable_mockModule('@ai-sdk/google', () => ({
|
||||
createGoogle: mockCreateGoogle
|
||||
}));
|
||||
jest.unstable_mockModule('@ai-sdk/perplexity', () => ({
|
||||
createPerplexity: mockCreatePerplexity
|
||||
}));
|
||||
jest.unstable_mockModule('ollama-ai-provider', () => ({
|
||||
createOllama: mockCreateOllama
|
||||
}));
|
||||
jest.unstable_mockModule('@ai-sdk/mistral', () => ({
|
||||
createMistral: mockCreateMistral
|
||||
}));
|
||||
jest.unstable_mockModule('@ai-sdk/azure', () => ({
|
||||
createAzure: mockCreateAzure
|
||||
}));
|
||||
jest.unstable_mockModule('@ai-sdk/xai', () => ({
|
||||
createXai: mockCreateXai
|
||||
}));
|
||||
// jest.unstable_mockModule('@ai-sdk/openrouter', () => ({
|
||||
// createOpenRouter: mockCreateOpenRouter
|
||||
// }));
|
||||
jest.unstable_mockModule('@openrouter/ai-sdk-provider', () => ({
|
||||
createOpenRouter: mockCreateOpenRouter
|
||||
}));
|
||||
// TODO: Mock other providers (OpenRouter, Grok) when added
|
||||
|
||||
// --- Mock Config Manager ---
|
||||
const mockGetProviderAndModelForRole = jest.fn();
|
||||
const mockFindProjectRoot = jest.fn();
|
||||
jest.unstable_mockModule('../../scripts/modules/config-manager.js', () => ({
|
||||
getProviderAndModelForRole: mockGetProviderAndModelForRole,
|
||||
findProjectRoot: mockFindProjectRoot
|
||||
}));
|
||||
|
||||
// --- Mock File System (for supported-models.json loading) ---
|
||||
const mockFsExistsSync = jest.fn();
|
||||
const mockFsReadFileSync = jest.fn();
|
||||
jest.unstable_mockModule('fs', () => ({
|
||||
__esModule: true, // Important for ES modules with default exports
|
||||
default: {
|
||||
// Provide the default export expected by `import fs from 'fs'`
|
||||
existsSync: mockFsExistsSync,
|
||||
readFileSync: mockFsReadFileSync
|
||||
},
|
||||
// Also provide named exports if they were directly imported elsewhere, though not needed here
|
||||
existsSync: mockFsExistsSync,
|
||||
readFileSync: mockFsReadFileSync
|
||||
}));
|
||||
|
||||
// --- Mock path (specifically path.join used for supported-models.json) ---
|
||||
const mockPathJoin = jest.fn((...args) => args.join(path.sep)); // Simple mock
|
||||
const actualPath = jest.requireActual('path'); // Get the actual path module
|
||||
jest.unstable_mockModule('path', () => ({
|
||||
__esModule: true, // Indicate ES module mock
|
||||
default: {
|
||||
// Provide the default export
|
||||
...actualPath, // Spread actual functions
|
||||
join: mockPathJoin // Override join
|
||||
},
|
||||
// Also provide named exports for consistency
|
||||
...actualPath,
|
||||
join: mockPathJoin
|
||||
}));
|
||||
|
||||
// --- Define Mock Data ---
|
||||
const mockSupportedModels = {
|
||||
openai: [
|
||||
{ id: 'gpt-4o', allowed_roles: ['main', 'fallback'] },
|
||||
{ id: 'gpt-3.5-turbo', allowed_roles: ['main', 'fallback'] }
|
||||
],
|
||||
anthropic: [
|
||||
{ id: 'claude-3.5-sonnet-20240620', allowed_roles: ['main'] },
|
||||
{ id: 'claude-3-haiku-20240307', allowed_roles: ['fallback'] }
|
||||
],
|
||||
perplexity: [{ id: 'sonar-pro', allowed_roles: ['research'] }],
|
||||
ollama: [{ id: 'llama3', allowed_roles: ['main', 'fallback'] }],
|
||||
google: [{ id: 'gemini-pro', allowed_roles: ['main'] }],
|
||||
mistral: [{ id: 'mistral-large-latest', allowed_roles: ['main'] }],
|
||||
azure: [{ id: 'azure-gpt4o', allowed_roles: ['main'] }],
|
||||
xai: [{ id: 'grok-basic', allowed_roles: ['main'] }],
|
||||
openrouter: [{ id: 'openrouter-model', allowed_roles: ['main'] }]
|
||||
// Add other providers as needed for tests
|
||||
};
|
||||
|
||||
// --- Import the module AFTER mocks ---
|
||||
const { getClient, clearClientCache, _resetSupportedModelsCache } =
|
||||
await import('../../scripts/modules/ai-client-factory.js');
|
||||
|
||||
describe('AI Client Factory (Role-Based)', () => {
|
||||
const OLD_ENV = process.env;
|
||||
|
||||
beforeEach(() => {
|
||||
// Reset state before each test
|
||||
clearClientCache(); // Use the correct function name
|
||||
_resetSupportedModelsCache(); // Reset the models cache
|
||||
mockFsExistsSync.mockClear();
|
||||
mockFsReadFileSync.mockClear();
|
||||
mockGetProviderAndModelForRole.mockClear(); // Reset this mock too
|
||||
|
||||
// Reset environment to avoid test pollution
|
||||
process.env = { ...OLD_ENV };
|
||||
|
||||
// Default mock implementations (can be overridden)
|
||||
mockFindProjectRoot.mockReturnValue('/fake/project/root');
|
||||
mockPathJoin.mockImplementation((...args) => args.join(actualPath.sep)); // Use actualPath.sep
|
||||
|
||||
// Default FS mocks for model/config loading
|
||||
mockFsExistsSync.mockImplementation((filePath) => {
|
||||
// Default to true for the files we expect to load
|
||||
if (filePath.endsWith('supported-models.json')) return true;
|
||||
// Add other expected files if necessary
|
||||
return false; // Default to false for others
|
||||
});
|
||||
mockFsReadFileSync.mockImplementation((filePath) => {
|
||||
if (filePath.endsWith('supported-models.json')) {
|
||||
return JSON.stringify(mockSupportedModels);
|
||||
}
|
||||
// Throw if an unexpected file is read
|
||||
throw new Error(`Unexpected readFileSync call in test: ${filePath}`);
|
||||
});
|
||||
|
||||
// Default config mock
|
||||
mockGetProviderAndModelForRole.mockImplementation((role) => {
|
||||
if (role === 'main') return { provider: 'openai', modelId: 'gpt-4o' };
|
||||
if (role === 'research')
|
||||
return { provider: 'perplexity', modelId: 'sonar-pro' };
|
||||
if (role === 'fallback')
|
||||
return { provider: 'anthropic', modelId: 'claude-3-haiku-20240307' };
|
||||
return {}; // Default empty for unconfigured roles
|
||||
});
|
||||
|
||||
// Set default required env vars (can be overridden in tests)
|
||||
process.env.OPENAI_API_KEY = 'test-openai-key';
|
||||
process.env.ANTHROPIC_API_KEY = 'test-anthropic-key';
|
||||
process.env.PERPLEXITY_API_KEY = 'test-perplexity-key';
|
||||
process.env.GOOGLE_API_KEY = 'test-google-key';
|
||||
process.env.MISTRAL_API_KEY = 'test-mistral-key';
|
||||
process.env.AZURE_OPENAI_API_KEY = 'test-azure-key';
|
||||
process.env.AZURE_OPENAI_ENDPOINT = 'test-azure-endpoint';
|
||||
process.env.XAI_API_KEY = 'test-xai-key';
|
||||
process.env.OPENROUTER_API_KEY = 'test-openrouter-key';
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
process.env = OLD_ENV;
|
||||
});
|
||||
|
||||
test('should throw error if role is missing', () => {
|
||||
expect(() => getClient()).toThrow(
|
||||
"Client role ('main', 'research', 'fallback') must be specified."
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw error if config manager fails to get role config', () => {
|
||||
mockGetProviderAndModelForRole.mockImplementation((role) => {
|
||||
if (role === 'main') throw new Error('Config file not found');
|
||||
});
|
||||
expect(() => getClient('main')).toThrow(
|
||||
"Failed to get configuration for role 'main': Config file not found"
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw error if config manager returns undefined provider/model', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({}); // Empty object
|
||||
expect(() => getClient('main')).toThrow(
|
||||
"Could not determine provider or modelId for role 'main'"
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw error if configured model is not supported for the role', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'anthropic',
|
||||
modelId: 'claude-3.5-sonnet-20240620' // Only allowed for 'main' in mock data
|
||||
});
|
||||
expect(() => getClient('research')).toThrow(
|
||||
/Model 'claude-3.5-sonnet-20240620' from provider 'anthropic' is either not supported or not allowed for the 'research' role/
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw error if configured model is not found in supported list', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'openai',
|
||||
modelId: 'gpt-unknown'
|
||||
});
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Model 'gpt-unknown' from provider 'openai' is either not supported or not allowed for the 'main' role/
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw error if configured provider is not found in supported list', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'unknown-provider',
|
||||
modelId: 'some-model'
|
||||
});
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Model 'some-model' from provider 'unknown-provider' is either not supported or not allowed for the 'main' role/
|
||||
);
|
||||
});
|
||||
|
||||
test('should skip model validation if supported-models.json is not found', () => {
|
||||
mockFsExistsSync.mockReturnValue(false); // Simulate file not found
|
||||
const consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation(); // Suppress warning
|
||||
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'openai',
|
||||
modelId: 'gpt-any' // Doesn't matter, validation skipped
|
||||
});
|
||||
process.env.OPENAI_API_KEY = 'test-key';
|
||||
|
||||
expect(() => getClient('main')).not.toThrow(); // Should not throw validation error
|
||||
expect(mockCreateOpenAI).toHaveBeenCalled();
|
||||
expect(consoleWarnSpy).toHaveBeenCalledWith(
|
||||
expect.stringContaining('Skipping model validation')
|
||||
);
|
||||
consoleWarnSpy.mockRestore();
|
||||
});
|
||||
|
||||
test('should throw environment validation error', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'openai',
|
||||
modelId: 'gpt-4o'
|
||||
});
|
||||
delete process.env.OPENAI_API_KEY; // Trigger missing env var
|
||||
expect(() => getClient('main')).toThrow(
|
||||
// Expect the original error message from validateEnvironment
|
||||
/Missing environment variables for provider 'openai': OPENAI_API_KEY\. Please check your \.env file or session configuration\./
|
||||
);
|
||||
});
|
||||
|
||||
test('should successfully create client using config and process.env', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'openai',
|
||||
modelId: 'gpt-4o'
|
||||
});
|
||||
process.env.OPENAI_API_KEY = 'env-key';
|
||||
|
||||
const client = getClient('main');
|
||||
|
||||
expect(client).toBeDefined();
|
||||
expect(mockGetProviderAndModelForRole).toHaveBeenCalledWith('main');
|
||||
expect(mockCreateOpenAI).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ apiKey: 'env-key', model: 'gpt-4o' })
|
||||
);
|
||||
});
|
||||
|
||||
test('should successfully create client using config and session.env', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'anthropic',
|
||||
modelId: 'claude-3.5-sonnet-20240620'
|
||||
});
|
||||
delete process.env.ANTHROPIC_API_KEY;
|
||||
const session = { env: { ANTHROPIC_API_KEY: 'session-key' } };
|
||||
|
||||
const client = getClient('main', session);
|
||||
|
||||
expect(client).toBeDefined();
|
||||
expect(mockGetProviderAndModelForRole).toHaveBeenCalledWith('main');
|
||||
expect(mockCreateAnthropic).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'session-key',
|
||||
model: 'claude-3.5-sonnet-20240620'
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
test('should use overrideOptions when provided', () => {
|
||||
process.env.PERPLEXITY_API_KEY = 'env-key';
|
||||
const override = { provider: 'perplexity', modelId: 'sonar-pro' };
|
||||
|
||||
const client = getClient('research', null, override);
|
||||
|
||||
expect(client).toBeDefined();
|
||||
expect(mockGetProviderAndModelForRole).not.toHaveBeenCalled(); // Config shouldn't be called
|
||||
expect(mockCreatePerplexity).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ apiKey: 'env-key', model: 'sonar-pro' })
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw validation error even with override if role is disallowed', () => {
|
||||
process.env.OPENAI_API_KEY = 'env-key';
|
||||
// gpt-4o is not allowed for 'research' in mock data
|
||||
const override = { provider: 'openai', modelId: 'gpt-4o' };
|
||||
|
||||
expect(() => getClient('research', null, override)).toThrow(
|
||||
/Model 'gpt-4o' from provider 'openai' is either not supported or not allowed for the 'research' role/
|
||||
);
|
||||
expect(mockGetProviderAndModelForRole).not.toHaveBeenCalled();
|
||||
expect(mockCreateOpenAI).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
describe('Caching Behavior (Role-Based)', () => {
|
||||
test('should return cached client instance for the same provider/model derived from role', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'openai',
|
||||
modelId: 'gpt-4o'
|
||||
});
|
||||
process.env.OPENAI_API_KEY = 'test-key';
|
||||
|
||||
const client1 = getClient('main');
|
||||
const client2 = getClient('main'); // Same role, same config result
|
||||
|
||||
expect(client1).toBe(client2); // Should be the exact same instance
|
||||
expect(mockGetProviderAndModelForRole).toHaveBeenCalledTimes(2); // Config lookup happens each time
|
||||
expect(mockCreateOpenAI).toHaveBeenCalledTimes(1); // Instance created only once
|
||||
});
|
||||
|
||||
test('should return different client instances for different roles if config differs', () => {
|
||||
mockGetProviderAndModelForRole.mockImplementation((role) => {
|
||||
if (role === 'main') return { provider: 'openai', modelId: 'gpt-4o' };
|
||||
if (role === 'research')
|
||||
return { provider: 'perplexity', modelId: 'sonar-pro' };
|
||||
return {};
|
||||
});
|
||||
process.env.OPENAI_API_KEY = 'test-key-1';
|
||||
process.env.PERPLEXITY_API_KEY = 'test-key-2';
|
||||
|
||||
const client1 = getClient('main');
|
||||
const client2 = getClient('research');
|
||||
|
||||
expect(client1).not.toBe(client2);
|
||||
expect(mockCreateOpenAI).toHaveBeenCalledTimes(1);
|
||||
expect(mockCreatePerplexity).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
test('should return same client instance if different roles resolve to same provider/model', () => {
|
||||
mockGetProviderAndModelForRole.mockImplementation((role) => {
|
||||
// Both roles point to the same model
|
||||
return { provider: 'openai', modelId: 'gpt-4o' };
|
||||
});
|
||||
process.env.OPENAI_API_KEY = 'test-key';
|
||||
|
||||
const client1 = getClient('main');
|
||||
const client2 = getClient('fallback'); // Different role, same config result
|
||||
|
||||
expect(client1).toBe(client2); // Should be the exact same instance
|
||||
expect(mockCreateOpenAI).toHaveBeenCalledTimes(1); // Instance created only once
|
||||
});
|
||||
});
|
||||
|
||||
// Add tests for specific providers
|
||||
describe('Specific Provider Instantiation', () => {
|
||||
test('should successfully create Google client with GOOGLE_API_KEY', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'google',
|
||||
modelId: 'gemini-pro'
|
||||
}); // Assume gemini-pro is supported
|
||||
process.env.GOOGLE_API_KEY = 'test-google-key';
|
||||
const client = getClient('main');
|
||||
expect(client).toBeDefined();
|
||||
expect(mockCreateGoogle).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ apiKey: 'test-google-key' })
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw environment error if GOOGLE_API_KEY is missing', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'google',
|
||||
modelId: 'gemini-pro'
|
||||
});
|
||||
delete process.env.GOOGLE_API_KEY;
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Missing environment variables for provider 'google': GOOGLE_API_KEY/
|
||||
);
|
||||
});
|
||||
|
||||
test('should successfully create Ollama client with OLLAMA_BASE_URL', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'ollama',
|
||||
modelId: 'llama3'
|
||||
}); // Use supported llama3
|
||||
process.env.OLLAMA_BASE_URL = 'http://test-ollama:11434';
|
||||
const client = getClient('main');
|
||||
expect(client).toBeDefined();
|
||||
expect(mockCreateOllama).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ baseURL: 'http://test-ollama:11434' })
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw environment error if OLLAMA_BASE_URL is missing', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'ollama',
|
||||
modelId: 'llama3'
|
||||
});
|
||||
delete process.env.OLLAMA_BASE_URL;
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Missing environment variables for provider 'ollama': OLLAMA_BASE_URL/
|
||||
);
|
||||
});
|
||||
|
||||
test('should successfully create Mistral client with MISTRAL_API_KEY', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'mistral',
|
||||
modelId: 'mistral-large-latest'
|
||||
}); // Assume supported
|
||||
process.env.MISTRAL_API_KEY = 'test-mistral-key';
|
||||
const client = getClient('main');
|
||||
expect(client).toBeDefined();
|
||||
expect(mockCreateMistral).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ apiKey: 'test-mistral-key' })
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw environment error if MISTRAL_API_KEY is missing', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'mistral',
|
||||
modelId: 'mistral-large-latest'
|
||||
});
|
||||
delete process.env.MISTRAL_API_KEY;
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Missing environment variables for provider 'mistral': MISTRAL_API_KEY/
|
||||
);
|
||||
});
|
||||
|
||||
test('should successfully create Azure client with AZURE_OPENAI_API_KEY and AZURE_OPENAI_ENDPOINT', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'azure',
|
||||
modelId: 'azure-gpt4o'
|
||||
}); // Assume supported
|
||||
process.env.AZURE_OPENAI_API_KEY = 'test-azure-key';
|
||||
process.env.AZURE_OPENAI_ENDPOINT = 'https://test-azure.openai.azure.com';
|
||||
const client = getClient('main');
|
||||
expect(client).toBeDefined();
|
||||
expect(mockCreateAzure).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
apiKey: 'test-azure-key',
|
||||
endpoint: 'https://test-azure.openai.azure.com'
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw environment error if AZURE_OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT is missing', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'azure',
|
||||
modelId: 'azure-gpt4o'
|
||||
});
|
||||
process.env.AZURE_OPENAI_API_KEY = 'test-azure-key';
|
||||
delete process.env.AZURE_OPENAI_ENDPOINT;
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Missing environment variables for provider 'azure': AZURE_OPENAI_ENDPOINT/
|
||||
);
|
||||
|
||||
process.env.AZURE_OPENAI_ENDPOINT = 'https://test-azure.openai.azure.com';
|
||||
delete process.env.AZURE_OPENAI_API_KEY;
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Missing environment variables for provider 'azure': AZURE_OPENAI_API_KEY/
|
||||
);
|
||||
});
|
||||
|
||||
test('should successfully create xAI (Grok) client with XAI_API_KEY', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'xai',
|
||||
modelId: 'grok-basic'
|
||||
});
|
||||
process.env.XAI_API_KEY = 'test-xai-key-specific';
|
||||
const client = getClient('main');
|
||||
expect(client).toBeDefined();
|
||||
expect(mockCreateXai).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ apiKey: 'test-xai-key-specific' })
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw environment error if XAI_API_KEY is missing', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'xai',
|
||||
modelId: 'grok-basic'
|
||||
});
|
||||
delete process.env.XAI_API_KEY;
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Missing environment variables for provider 'xai': XAI_API_KEY/
|
||||
);
|
||||
});
|
||||
|
||||
test('should successfully create OpenRouter client with OPENROUTER_API_KEY', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'openrouter',
|
||||
modelId: 'openrouter-model'
|
||||
});
|
||||
process.env.OPENROUTER_API_KEY = 'test-openrouter-key-specific';
|
||||
const client = getClient('main');
|
||||
expect(client).toBeDefined();
|
||||
expect(mockCreateOpenRouter).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ apiKey: 'test-openrouter-key-specific' })
|
||||
);
|
||||
});
|
||||
|
||||
test('should throw environment error if OPENROUTER_API_KEY is missing', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'openrouter',
|
||||
modelId: 'openrouter-model'
|
||||
});
|
||||
delete process.env.OPENROUTER_API_KEY;
|
||||
expect(() => getClient('main')).toThrow(
|
||||
/Missing environment variables for provider 'openrouter': OPENROUTER_API_KEY/
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Environment Variable Precedence', () => {
|
||||
test('should prioritize process.env over session.env for API keys', () => {
|
||||
mockGetProviderAndModelForRole.mockReturnValue({
|
||||
provider: 'openai',
|
||||
modelId: 'gpt-4o'
|
||||
});
|
||||
process.env.OPENAI_API_KEY = 'process-env-key'; // This should be used
|
||||
const session = { env: { OPENAI_API_KEY: 'session-env-key' } };
|
||||
|
||||
const client = getClient('main', session);
|
||||
expect(client).toBeDefined();
|
||||
expect(mockCreateOpenAI).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ apiKey: 'process-env-key', model: 'gpt-4o' })
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,350 +0,0 @@
|
||||
/**
|
||||
* ai-client-utils.test.js
|
||||
* Tests for AI client utility functions
|
||||
*/
|
||||
|
||||
import { jest } from '@jest/globals';
|
||||
import {
|
||||
getAnthropicClientForMCP,
|
||||
getPerplexityClientForMCP,
|
||||
getModelConfig,
|
||||
getBestAvailableAIModel,
|
||||
handleClaudeError
|
||||
} from '../../mcp-server/src/core/utils/ai-client-utils.js';
|
||||
|
||||
// Mock the Anthropic constructor
|
||||
jest.mock('@anthropic-ai/sdk', () => {
|
||||
return {
|
||||
Anthropic: jest.fn().mockImplementation(() => {
|
||||
return {
|
||||
messages: {
|
||||
create: jest.fn().mockResolvedValue({})
|
||||
}
|
||||
};
|
||||
})
|
||||
};
|
||||
});
|
||||
|
||||
// Mock the OpenAI dynamic import
|
||||
jest.mock('openai', () => {
|
||||
return {
|
||||
default: jest.fn().mockImplementation(() => {
|
||||
return {
|
||||
chat: {
|
||||
completions: {
|
||||
create: jest.fn().mockResolvedValue({})
|
||||
}
|
||||
}
|
||||
};
|
||||
})
|
||||
};
|
||||
});
|
||||
|
||||
describe('AI Client Utilities', () => {
|
||||
const originalEnv = process.env;
|
||||
|
||||
beforeEach(() => {
|
||||
// Reset process.env before each test
|
||||
process.env = { ...originalEnv };
|
||||
|
||||
// Clear all mocks
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
// Restore process.env
|
||||
process.env = originalEnv;
|
||||
});
|
||||
|
||||
describe('getAnthropicClientForMCP', () => {
|
||||
it('should initialize client with API key from session', () => {
|
||||
// Setup
|
||||
const session = {
|
||||
env: {
|
||||
ANTHROPIC_API_KEY: 'test-key-from-session'
|
||||
}
|
||||
};
|
||||
const mockLog = { error: jest.fn() };
|
||||
|
||||
// Execute
|
||||
const client = getAnthropicClientForMCP(session, mockLog);
|
||||
|
||||
// Verify
|
||||
expect(client).toBeDefined();
|
||||
expect(client.messages.create).toBeDefined();
|
||||
expect(mockLog.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should fall back to process.env when session key is missing', () => {
|
||||
// Setup
|
||||
process.env.ANTHROPIC_API_KEY = 'test-key-from-env';
|
||||
const session = { env: {} };
|
||||
const mockLog = { error: jest.fn() };
|
||||
|
||||
// Execute
|
||||
const client = getAnthropicClientForMCP(session, mockLog);
|
||||
|
||||
// Verify
|
||||
expect(client).toBeDefined();
|
||||
expect(mockLog.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should throw error when API key is missing', () => {
|
||||
// Setup
|
||||
delete process.env.ANTHROPIC_API_KEY;
|
||||
const session = { env: {} };
|
||||
const mockLog = { error: jest.fn() };
|
||||
|
||||
// Execute & Verify
|
||||
expect(() => getAnthropicClientForMCP(session, mockLog)).toThrow();
|
||||
expect(mockLog.error).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getPerplexityClientForMCP', () => {
|
||||
it('should initialize client with API key from session', async () => {
|
||||
// Setup
|
||||
const session = {
|
||||
env: {
|
||||
PERPLEXITY_API_KEY: 'test-perplexity-key'
|
||||
}
|
||||
};
|
||||
const mockLog = { error: jest.fn() };
|
||||
|
||||
// Execute
|
||||
const client = await getPerplexityClientForMCP(session, mockLog);
|
||||
|
||||
// Verify
|
||||
expect(client).toBeDefined();
|
||||
expect(client.chat.completions.create).toBeDefined();
|
||||
expect(mockLog.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should throw error when API key is missing', async () => {
|
||||
// Setup
|
||||
delete process.env.PERPLEXITY_API_KEY;
|
||||
const session = { env: {} };
|
||||
const mockLog = { error: jest.fn() };
|
||||
|
||||
// Execute & Verify
|
||||
await expect(
|
||||
getPerplexityClientForMCP(session, mockLog)
|
||||
).rejects.toThrow();
|
||||
expect(mockLog.error).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getModelConfig', () => {
|
||||
it('should get model config from session', () => {
|
||||
// Setup
|
||||
const session = {
|
||||
env: {
|
||||
MODEL: 'claude-3-opus',
|
||||
MAX_TOKENS: '8000',
|
||||
TEMPERATURE: '0.5'
|
||||
}
|
||||
};
|
||||
|
||||
// Execute
|
||||
const config = getModelConfig(session);
|
||||
|
||||
// Verify
|
||||
expect(config).toEqual({
|
||||
model: 'claude-3-opus',
|
||||
maxTokens: 8000,
|
||||
temperature: 0.5
|
||||
});
|
||||
});
|
||||
|
||||
it('should use default values when session values are missing', () => {
|
||||
// Setup
|
||||
const session = {
|
||||
env: {
|
||||
// No values
|
||||
}
|
||||
};
|
||||
|
||||
// Execute
|
||||
const config = getModelConfig(session);
|
||||
|
||||
// Verify
|
||||
expect(config).toEqual({
|
||||
model: 'claude-3-7-sonnet-20250219',
|
||||
maxTokens: 64000,
|
||||
temperature: 0.2
|
||||
});
|
||||
});
|
||||
|
||||
it('should allow custom defaults', () => {
|
||||
// Setup
|
||||
const session = { env: {} };
|
||||
const customDefaults = {
|
||||
model: 'custom-model',
|
||||
maxTokens: 2000,
|
||||
temperature: 0.3
|
||||
};
|
||||
|
||||
// Execute
|
||||
const config = getModelConfig(session, customDefaults);
|
||||
|
||||
// Verify
|
||||
expect(config).toEqual(customDefaults);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getBestAvailableAIModel', () => {
|
||||
it('should return Perplexity for research when available', async () => {
|
||||
// Setup
|
||||
const session = {
|
||||
env: {
|
||||
PERPLEXITY_API_KEY: 'test-perplexity-key',
|
||||
ANTHROPIC_API_KEY: 'test-anthropic-key'
|
||||
}
|
||||
};
|
||||
const mockLog = { warn: jest.fn(), info: jest.fn(), error: jest.fn() };
|
||||
|
||||
// Execute
|
||||
const result = await getBestAvailableAIModel(
|
||||
session,
|
||||
{ requiresResearch: true },
|
||||
mockLog
|
||||
);
|
||||
|
||||
// Verify
|
||||
expect(result.type).toBe('perplexity');
|
||||
expect(result.client).toBeDefined();
|
||||
});
|
||||
|
||||
it('should return Claude when Perplexity is not available and Claude is not overloaded', async () => {
|
||||
// Setup
|
||||
const originalPerplexityKey = process.env.PERPLEXITY_API_KEY;
|
||||
delete process.env.PERPLEXITY_API_KEY; // Make sure Perplexity is not available in process.env
|
||||
|
||||
const session = {
|
||||
env: {
|
||||
ANTHROPIC_API_KEY: 'test-anthropic-key'
|
||||
// Purposely not including PERPLEXITY_API_KEY
|
||||
}
|
||||
};
|
||||
const mockLog = { warn: jest.fn(), info: jest.fn(), error: jest.fn() };
|
||||
|
||||
try {
|
||||
// Execute
|
||||
const result = await getBestAvailableAIModel(
|
||||
session,
|
||||
{ requiresResearch: true },
|
||||
mockLog
|
||||
);
|
||||
|
||||
// Verify
|
||||
// In our implementation, we prioritize research capability through Perplexity
|
||||
// so if we're testing research but Perplexity isn't available, Claude is used
|
||||
expect(result.type).toBe('claude');
|
||||
expect(result.client).toBeDefined();
|
||||
expect(mockLog.warn).toHaveBeenCalled(); // Warning about using Claude instead of Perplexity
|
||||
} finally {
|
||||
// Restore original env variables
|
||||
if (originalPerplexityKey) {
|
||||
process.env.PERPLEXITY_API_KEY = originalPerplexityKey;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('should fall back to Claude as last resort when overloaded', async () => {
|
||||
// Setup
|
||||
const session = {
|
||||
env: {
|
||||
ANTHROPIC_API_KEY: 'test-anthropic-key'
|
||||
}
|
||||
};
|
||||
const mockLog = { warn: jest.fn(), info: jest.fn(), error: jest.fn() };
|
||||
|
||||
// Execute
|
||||
const result = await getBestAvailableAIModel(
|
||||
session,
|
||||
{ claudeOverloaded: true },
|
||||
mockLog
|
||||
);
|
||||
|
||||
// Verify
|
||||
expect(result.type).toBe('claude');
|
||||
expect(result.client).toBeDefined();
|
||||
expect(mockLog.warn).toHaveBeenCalled(); // Warning about Claude overloaded
|
||||
});
|
||||
|
||||
it('should throw error when no models are available', async () => {
|
||||
// Setup
|
||||
delete process.env.ANTHROPIC_API_KEY;
|
||||
delete process.env.PERPLEXITY_API_KEY;
|
||||
const session = { env: {} };
|
||||
const mockLog = { warn: jest.fn(), info: jest.fn(), error: jest.fn() };
|
||||
|
||||
// Execute & Verify
|
||||
await expect(
|
||||
getBestAvailableAIModel(session, {}, mockLog)
|
||||
).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleClaudeError', () => {
|
||||
it('should handle overloaded error', () => {
|
||||
// Setup
|
||||
const error = {
|
||||
type: 'error',
|
||||
error: {
|
||||
type: 'overloaded_error',
|
||||
message: 'Claude is overloaded'
|
||||
}
|
||||
};
|
||||
|
||||
// Execute
|
||||
const message = handleClaudeError(error);
|
||||
|
||||
// Verify
|
||||
expect(message).toContain('overloaded');
|
||||
});
|
||||
|
||||
it('should handle rate limit error', () => {
|
||||
// Setup
|
||||
const error = {
|
||||
type: 'error',
|
||||
error: {
|
||||
type: 'rate_limit_error',
|
||||
message: 'Rate limit exceeded'
|
||||
}
|
||||
};
|
||||
|
||||
// Execute
|
||||
const message = handleClaudeError(error);
|
||||
|
||||
// Verify
|
||||
expect(message).toContain('rate limit');
|
||||
});
|
||||
|
||||
it('should handle timeout error', () => {
|
||||
// Setup
|
||||
const error = {
|
||||
message: 'Request timed out after 60 seconds'
|
||||
};
|
||||
|
||||
// Execute
|
||||
const message = handleClaudeError(error);
|
||||
|
||||
// Verify
|
||||
expect(message).toContain('timed out');
|
||||
});
|
||||
|
||||
it('should handle generic errors', () => {
|
||||
// Setup
|
||||
const error = {
|
||||
message: 'Something went wrong'
|
||||
};
|
||||
|
||||
// Execute
|
||||
const message = handleClaudeError(error);
|
||||
|
||||
// Verify
|
||||
expect(message).toContain('Error communicating with Claude');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,373 +0,0 @@
|
||||
/**
|
||||
* AI Services module tests
|
||||
*/
|
||||
|
||||
import { jest } from '@jest/globals';
|
||||
import { parseSubtasksFromText } from '../../scripts/modules/ai-services.js';
|
||||
|
||||
// Create a mock log function we can check later
|
||||
const mockLog = jest.fn();
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('@anthropic-ai/sdk', () => {
|
||||
const mockCreate = jest.fn().mockResolvedValue({
|
||||
content: [{ text: 'AI response' }]
|
||||
});
|
||||
const mockAnthropicInstance = {
|
||||
messages: {
|
||||
create: mockCreate
|
||||
}
|
||||
};
|
||||
const mockAnthropicConstructor = jest
|
||||
.fn()
|
||||
.mockImplementation(() => mockAnthropicInstance);
|
||||
return {
|
||||
Anthropic: mockAnthropicConstructor
|
||||
};
|
||||
});
|
||||
|
||||
// Use jest.fn() directly for OpenAI mock
|
||||
const mockOpenAIInstance = {
|
||||
chat: {
|
||||
completions: {
|
||||
create: jest.fn().mockResolvedValue({
|
||||
choices: [{ message: { content: 'Perplexity response' } }]
|
||||
})
|
||||
}
|
||||
}
|
||||
};
|
||||
const mockOpenAI = jest.fn().mockImplementation(() => mockOpenAIInstance);
|
||||
|
||||
jest.mock('openai', () => {
|
||||
return { default: mockOpenAI };
|
||||
});
|
||||
|
||||
jest.mock('dotenv', () => ({
|
||||
config: jest.fn()
|
||||
}));
|
||||
|
||||
jest.mock('../../scripts/modules/utils.js', () => ({
|
||||
CONFIG: {
|
||||
model: 'claude-3-sonnet-20240229',
|
||||
temperature: 0.7,
|
||||
maxTokens: 4000
|
||||
},
|
||||
log: mockLog,
|
||||
sanitizePrompt: jest.fn((text) => text)
|
||||
}));
|
||||
|
||||
jest.mock('../../scripts/modules/ui.js', () => ({
|
||||
startLoadingIndicator: jest.fn().mockReturnValue('mockLoader'),
|
||||
stopLoadingIndicator: jest.fn()
|
||||
}));
|
||||
|
||||
// Mock anthropic global object
|
||||
global.anthropic = {
|
||||
messages: {
|
||||
create: jest.fn().mockResolvedValue({
|
||||
content: [
|
||||
{
|
||||
text: '[{"id": 1, "title": "Test", "description": "Test", "dependencies": [], "details": "Test"}]'
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
// Mock process.env
|
||||
const originalEnv = process.env;
|
||||
|
||||
// Import Anthropic for testing constructor arguments
|
||||
import { Anthropic } from '@anthropic-ai/sdk';
|
||||
|
||||
describe('AI Services Module', () => {
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
process.env = { ...originalEnv };
|
||||
process.env.ANTHROPIC_API_KEY = 'test-anthropic-key';
|
||||
process.env.PERPLEXITY_API_KEY = 'test-perplexity-key';
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
process.env = originalEnv;
|
||||
});
|
||||
|
||||
describe('parseSubtasksFromText function', () => {
|
||||
test('should parse subtasks from JSON text', () => {
|
||||
const text = `Here's your list of subtasks:
|
||||
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Implement database schema",
|
||||
"description": "Design and implement the database schema for user data",
|
||||
"dependencies": [],
|
||||
"details": "Create tables for users, preferences, and settings"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Create API endpoints",
|
||||
"description": "Develop RESTful API endpoints for user operations",
|
||||
"dependencies": [],
|
||||
"details": "Implement CRUD operations for user management"
|
||||
}
|
||||
]
|
||||
|
||||
These subtasks will help you implement the parent task efficiently.`;
|
||||
|
||||
const result = parseSubtasksFromText(text, 1, 2, 5);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0]).toEqual({
|
||||
id: 1,
|
||||
title: 'Implement database schema',
|
||||
description: 'Design and implement the database schema for user data',
|
||||
status: 'pending',
|
||||
dependencies: [],
|
||||
details: 'Create tables for users, preferences, and settings',
|
||||
parentTaskId: 5
|
||||
});
|
||||
expect(result[1]).toEqual({
|
||||
id: 2,
|
||||
title: 'Create API endpoints',
|
||||
description: 'Develop RESTful API endpoints for user operations',
|
||||
status: 'pending',
|
||||
dependencies: [],
|
||||
details: 'Implement CRUD operations for user management',
|
||||
parentTaskId: 5
|
||||
});
|
||||
});
|
||||
|
||||
test('should handle subtasks with dependencies', () => {
|
||||
const text = `
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Setup React environment",
|
||||
"description": "Initialize React app with necessary dependencies",
|
||||
"dependencies": [],
|
||||
"details": "Use Create React App or Vite to set up a new project"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Create component structure",
|
||||
"description": "Design and implement component hierarchy",
|
||||
"dependencies": [1],
|
||||
"details": "Organize components by feature and reusability"
|
||||
}
|
||||
]`;
|
||||
|
||||
const result = parseSubtasksFromText(text, 1, 2, 5);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0].dependencies).toEqual([]);
|
||||
expect(result[1].dependencies).toEqual([1]);
|
||||
});
|
||||
|
||||
test('should handle complex dependency lists', () => {
|
||||
const text = `
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Setup database",
|
||||
"description": "Initialize database structure",
|
||||
"dependencies": [],
|
||||
"details": "Set up PostgreSQL database"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Create models",
|
||||
"description": "Implement data models",
|
||||
"dependencies": [1],
|
||||
"details": "Define Prisma models"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Implement controllers",
|
||||
"description": "Create API controllers",
|
||||
"dependencies": [1, 2],
|
||||
"details": "Build controllers for all endpoints"
|
||||
}
|
||||
]`;
|
||||
|
||||
const result = parseSubtasksFromText(text, 1, 3, 5);
|
||||
|
||||
expect(result).toHaveLength(3);
|
||||
expect(result[2].dependencies).toEqual([1, 2]);
|
||||
});
|
||||
|
||||
test('should throw an error for empty text', () => {
|
||||
const emptyText = '';
|
||||
|
||||
expect(() => parseSubtasksFromText(emptyText, 1, 2, 5)).toThrow(
|
||||
'Empty text provided, cannot parse subtasks'
|
||||
);
|
||||
});
|
||||
|
||||
test('should normalize subtask IDs', () => {
|
||||
const text = `
|
||||
[
|
||||
{
|
||||
"id": 10,
|
||||
"title": "First task with incorrect ID",
|
||||
"description": "First description",
|
||||
"dependencies": [],
|
||||
"details": "First details"
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"title": "Second task with incorrect ID",
|
||||
"description": "Second description",
|
||||
"dependencies": [],
|
||||
"details": "Second details"
|
||||
}
|
||||
]`;
|
||||
|
||||
const result = parseSubtasksFromText(text, 1, 2, 5);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0].id).toBe(1); // Should normalize to starting ID
|
||||
expect(result[1].id).toBe(2); // Should normalize to starting ID + 1
|
||||
});
|
||||
|
||||
test('should convert string dependencies to numbers', () => {
|
||||
const text = `
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"title": "First task",
|
||||
"description": "First description",
|
||||
"dependencies": [],
|
||||
"details": "First details"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Second task",
|
||||
"description": "Second description",
|
||||
"dependencies": ["1"],
|
||||
"details": "Second details"
|
||||
}
|
||||
]`;
|
||||
|
||||
const result = parseSubtasksFromText(text, 1, 2, 5);
|
||||
|
||||
expect(result[1].dependencies).toEqual([1]);
|
||||
expect(typeof result[1].dependencies[0]).toBe('number');
|
||||
});
|
||||
|
||||
test('should throw an error for invalid JSON', () => {
|
||||
const text = `This is not valid JSON and cannot be parsed`;
|
||||
|
||||
expect(() => parseSubtasksFromText(text, 1, 2, 5)).toThrow(
|
||||
'Could not locate valid JSON array in the response'
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleClaudeError function', () => {
|
||||
// Import the function directly for testing
|
||||
let handleClaudeError;
|
||||
|
||||
beforeAll(async () => {
|
||||
// Dynamic import to get the actual function
|
||||
const module = await import('../../scripts/modules/ai-services.js');
|
||||
handleClaudeError = module.handleClaudeError;
|
||||
});
|
||||
|
||||
test('should handle overloaded_error type', () => {
|
||||
const error = {
|
||||
type: 'error',
|
||||
error: {
|
||||
type: 'overloaded_error',
|
||||
message: 'Claude is experiencing high volume'
|
||||
}
|
||||
};
|
||||
|
||||
// Mock process.env to include PERPLEXITY_API_KEY
|
||||
const originalEnv = process.env;
|
||||
process.env = { ...originalEnv, PERPLEXITY_API_KEY: 'test-key' };
|
||||
|
||||
const result = handleClaudeError(error);
|
||||
|
||||
// Restore original env
|
||||
process.env = originalEnv;
|
||||
|
||||
expect(result).toContain('Claude is currently overloaded');
|
||||
expect(result).toContain('fall back to Perplexity AI');
|
||||
});
|
||||
|
||||
test('should handle rate_limit_error type', () => {
|
||||
const error = {
|
||||
type: 'error',
|
||||
error: {
|
||||
type: 'rate_limit_error',
|
||||
message: 'Rate limit exceeded'
|
||||
}
|
||||
};
|
||||
|
||||
const result = handleClaudeError(error);
|
||||
|
||||
expect(result).toContain('exceeded the rate limit');
|
||||
});
|
||||
|
||||
test('should handle invalid_request_error type', () => {
|
||||
const error = {
|
||||
type: 'error',
|
||||
error: {
|
||||
type: 'invalid_request_error',
|
||||
message: 'Invalid request parameters'
|
||||
}
|
||||
};
|
||||
|
||||
const result = handleClaudeError(error);
|
||||
|
||||
expect(result).toContain('issue with the request format');
|
||||
});
|
||||
|
||||
test('should handle timeout errors', () => {
|
||||
const error = {
|
||||
message: 'Request timed out after 60000ms'
|
||||
};
|
||||
|
||||
const result = handleClaudeError(error);
|
||||
|
||||
expect(result).toContain('timed out');
|
||||
});
|
||||
|
||||
test('should handle network errors', () => {
|
||||
const error = {
|
||||
message: 'Network error occurred'
|
||||
};
|
||||
|
||||
const result = handleClaudeError(error);
|
||||
|
||||
expect(result).toContain('network error');
|
||||
});
|
||||
|
||||
test('should handle generic errors', () => {
|
||||
const error = {
|
||||
message: 'Something unexpected happened'
|
||||
};
|
||||
|
||||
const result = handleClaudeError(error);
|
||||
|
||||
expect(result).toContain('Error communicating with Claude');
|
||||
expect(result).toContain('Something unexpected happened');
|
||||
});
|
||||
});
|
||||
|
||||
describe('Anthropic client configuration', () => {
|
||||
test('should include output-128k beta header in client configuration', async () => {
|
||||
// Read the file content to verify the change is present
|
||||
const fs = await import('fs');
|
||||
const path = await import('path');
|
||||
const filePath = path.resolve('./scripts/modules/ai-services.js');
|
||||
const fileContent = fs.readFileSync(filePath, 'utf8');
|
||||
|
||||
// Check if the beta header is in the file
|
||||
expect(fileContent).toContain(
|
||||
"'anthropic-beta': 'output-128k-2025-02-19'"
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user