feat: Introduce modern E2E test suite for Task Master AI
This commit is contained in:
168
tests/e2e/utils/llm-analyzer.js
Normal file
168
tests/e2e/utils/llm-analyzer.js
Normal file
@@ -0,0 +1,168 @@
|
||||
import { readFileSync } from 'fs';
|
||||
import fetch from 'node-fetch';
|
||||
|
||||
export class LLMAnalyzer {
|
||||
constructor(config, logger) {
|
||||
this.config = config;
|
||||
this.logger = logger;
|
||||
this.apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
this.apiEndpoint = 'https://api.anthropic.com/v1/messages';
|
||||
}
|
||||
|
||||
async analyzeLog(logFile, providerSummaryFile = null) {
|
||||
if (!this.config.llmAnalysis.enabled) {
|
||||
this.logger.info('LLM analysis is disabled in configuration');
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!this.apiKey) {
|
||||
this.logger.error('ANTHROPIC_API_KEY not found in environment');
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const logContent = readFileSync(logFile, 'utf8');
|
||||
const prompt = this.buildAnalysisPrompt(logContent, providerSummaryFile);
|
||||
|
||||
const response = await this.callLLM(prompt);
|
||||
const analysis = this.parseResponse(response);
|
||||
|
||||
// Calculate and log cost
|
||||
if (response.usage) {
|
||||
const cost = this.calculateCost(response.usage);
|
||||
this.logger.addCost(cost);
|
||||
this.logger.info(`LLM Analysis AI Cost: $${cost.toFixed(6)} USD`);
|
||||
}
|
||||
|
||||
return analysis;
|
||||
} catch (error) {
|
||||
this.logger.error(`LLM analysis failed: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
buildAnalysisPrompt(logContent, providerSummaryFile) {
|
||||
let providerSummary = '';
|
||||
if (providerSummaryFile) {
|
||||
try {
|
||||
providerSummary = readFileSync(providerSummaryFile, 'utf8');
|
||||
} catch (error) {
|
||||
this.logger.warning(`Could not read provider summary file: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
return `Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially.
|
||||
|
||||
Your goal is to:
|
||||
1. Verify if the key E2E steps completed successfully based on the log messages (e.g., init, parse PRD, list tasks, analyze complexity, expand task, set status, manage models, add/remove dependencies, add/update/remove tasks/subtasks, generate files).
|
||||
2. **Specifically analyze the Multi-Provider Add-Task Test Sequence:**
|
||||
a. Identify which providers were tested for \`add-task\`. Look for log steps like "Testing Add-Task with Provider: ..." and the summary log 'provider_add_task_summary.log'.
|
||||
b. For each tested provider, determine if \`add-task\` succeeded or failed. Note the created task ID if successful.
|
||||
c. Review the corresponding \`add_task_show_output_<provider>_id_<id>.log\` file (if created) for each successful \`add-task\` execution.
|
||||
d. **Compare the quality and completeness** of the task generated by each successful provider based on their \`show\` output. Assign a score (e.g., 1-10, 10 being best) based on relevance to the prompt, detail level, and correctness.
|
||||
e. Note any providers where \`add-task\` failed or where the task ID could not be extracted.
|
||||
3. Identify any general explicit "[ERROR]" messages or stack traces throughout the *entire* log.
|
||||
4. Identify any potential warnings or unusual output that might indicate a problem even if not marked as an explicit error.
|
||||
5. Provide an overall assessment of the test run's health based *only* on the log content.
|
||||
|
||||
${providerSummary ? `\nProvider Summary:\n${providerSummary}\n` : ''}
|
||||
|
||||
Return your analysis **strictly** in the following JSON format. Do not include any text outside of the JSON structure:
|
||||
|
||||
{
|
||||
"overall_status": "Success|Failure|Warning",
|
||||
"verified_steps": [ "Initialization", "PRD Parsing", /* ...other general steps observed... */ ],
|
||||
"provider_add_task_comparison": {
|
||||
"prompt_used": "... (extract from log if possible or state 'standard auth prompt') ...",
|
||||
"provider_results": {
|
||||
"anthropic": { "status": "Success|Failure|ID_Extraction_Failed|Set_Model_Failed", "task_id": "...", "score": "X/10 | N/A", "notes": "..." },
|
||||
"openai": { "status": "Success|Failure|...", "task_id": "...", "score": "X/10 | N/A", "notes": "..." },
|
||||
/* ... include all tested providers ... */
|
||||
},
|
||||
"comparison_summary": "Brief overall comparison of generated tasks..."
|
||||
},
|
||||
"detected_issues": [ { "severity": "Error|Warning|Anomaly", "description": "...", "log_context": "[Optional, short snippet from log near the issue]" } ],
|
||||
"llm_summary_points": [ "Overall summary point 1", "Provider comparison highlight", "Any major issues noted" ]
|
||||
}
|
||||
|
||||
Here is the main log content:
|
||||
|
||||
${logContent}`;
|
||||
}
|
||||
|
||||
async callLLM(prompt) {
|
||||
const payload = {
|
||||
model: this.config.llmAnalysis.model,
|
||||
max_tokens: this.config.llmAnalysis.maxTokens,
|
||||
messages: [
|
||||
{ role: 'user', content: prompt }
|
||||
]
|
||||
};
|
||||
|
||||
const response = await fetch(this.apiEndpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-api-key': this.apiKey,
|
||||
'anthropic-version': '2023-06-01'
|
||||
},
|
||||
body: JSON.stringify(payload)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`LLM API call failed: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
parseResponse(response) {
|
||||
try {
|
||||
const content = response.content[0].text;
|
||||
const jsonStart = content.indexOf('{');
|
||||
const jsonEnd = content.lastIndexOf('}');
|
||||
|
||||
if (jsonStart === -1 || jsonEnd === -1) {
|
||||
throw new Error('No JSON found in response');
|
||||
}
|
||||
|
||||
const jsonString = content.substring(jsonStart, jsonEnd + 1);
|
||||
return JSON.parse(jsonString);
|
||||
} catch (error) {
|
||||
this.logger.error(`Failed to parse LLM response: ${error.message}`);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
calculateCost(usage) {
|
||||
const modelCosts = {
|
||||
'claude-3-7-sonnet-20250219': {
|
||||
input: 3.00, // per 1M tokens
|
||||
output: 15.00 // per 1M tokens
|
||||
}
|
||||
};
|
||||
|
||||
const costs = modelCosts[this.config.llmAnalysis.model] || { input: 0, output: 0 };
|
||||
const inputCost = (usage.input_tokens / 1000000) * costs.input;
|
||||
const outputCost = (usage.output_tokens / 1000000) * costs.output;
|
||||
|
||||
return inputCost + outputCost;
|
||||
}
|
||||
|
||||
formatReport(analysis) {
|
||||
if (!analysis) return null;
|
||||
|
||||
const report = {
|
||||
title: 'TASKMASTER E2E Test Analysis Report',
|
||||
timestamp: new Date().toISOString(),
|
||||
status: analysis.overall_status,
|
||||
summary: analysis.llm_summary_points,
|
||||
verifiedSteps: analysis.verified_steps,
|
||||
providerComparison: analysis.provider_add_task_comparison,
|
||||
issues: analysis.detected_issues
|
||||
};
|
||||
|
||||
return report;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user