172 lines
6.0 KiB
JavaScript
172 lines
6.0 KiB
JavaScript
import { readFileSync } from 'fs';
|
|
import fetch from 'node-fetch';
|
|
|
|
export class LLMAnalyzer {
|
|
constructor(config, logger) {
|
|
this.config = config;
|
|
this.logger = logger;
|
|
this.apiKey = process.env.ANTHROPIC_API_KEY;
|
|
this.apiEndpoint = 'https://api.anthropic.com/v1/messages';
|
|
}
|
|
|
|
async analyzeLog(logFile, providerSummaryFile = null) {
|
|
if (!this.config.llmAnalysis.enabled) {
|
|
this.logger.info('LLM analysis is disabled in configuration');
|
|
return null;
|
|
}
|
|
|
|
if (!this.apiKey) {
|
|
this.logger.error('ANTHROPIC_API_KEY not found in environment');
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
const logContent = readFileSync(logFile, 'utf8');
|
|
const prompt = this.buildAnalysisPrompt(logContent, providerSummaryFile);
|
|
|
|
const response = await this.callLLM(prompt);
|
|
const analysis = this.parseResponse(response);
|
|
|
|
// Calculate and log cost
|
|
if (response.usage) {
|
|
const cost = this.calculateCost(response.usage);
|
|
this.logger.addCost(cost);
|
|
this.logger.info(`LLM Analysis AI Cost: $${cost.toFixed(6)} USD`);
|
|
}
|
|
|
|
return analysis;
|
|
} catch (error) {
|
|
this.logger.error(`LLM analysis failed: ${error.message}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
buildAnalysisPrompt(logContent, providerSummaryFile) {
|
|
let providerSummary = '';
|
|
if (providerSummaryFile) {
|
|
try {
|
|
providerSummary = readFileSync(providerSummaryFile, 'utf8');
|
|
} catch (error) {
|
|
this.logger.warning(
|
|
`Could not read provider summary file: ${error.message}`
|
|
);
|
|
}
|
|
}
|
|
|
|
return `Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially.
|
|
|
|
Your goal is to:
|
|
1. Verify if the key E2E steps completed successfully based on the log messages (e.g., init, parse PRD, list tasks, analyze complexity, expand task, set status, manage models, add/remove dependencies, add/update/remove tasks/subtasks, generate files).
|
|
2. **Specifically analyze the Multi-Provider Add-Task Test Sequence:**
|
|
a. Identify which providers were tested for \`add-task\`. Look for log steps like "Testing Add-Task with Provider: ..." and the summary log 'provider_add_task_summary.log'.
|
|
b. For each tested provider, determine if \`add-task\` succeeded or failed. Note the created task ID if successful.
|
|
c. Review the corresponding \`add_task_show_output_<provider>_id_<id>.log\` file (if created) for each successful \`add-task\` execution.
|
|
d. **Compare the quality and completeness** of the task generated by each successful provider based on their \`show\` output. Assign a score (e.g., 1-10, 10 being best) based on relevance to the prompt, detail level, and correctness.
|
|
e. Note any providers where \`add-task\` failed or where the task ID could not be extracted.
|
|
3. Identify any general explicit "[ERROR]" messages or stack traces throughout the *entire* log.
|
|
4. Identify any potential warnings or unusual output that might indicate a problem even if not marked as an explicit error.
|
|
5. Provide an overall assessment of the test run's health based *only* on the log content.
|
|
|
|
${providerSummary ? `\nProvider Summary:\n${providerSummary}\n` : ''}
|
|
|
|
Return your analysis **strictly** in the following JSON format. Do not include any text outside of the JSON structure:
|
|
|
|
{
|
|
"overall_status": "Success|Failure|Warning",
|
|
"verified_steps": [ "Initialization", "PRD Parsing", /* ...other general steps observed... */ ],
|
|
"provider_add_task_comparison": {
|
|
"prompt_used": "... (extract from log if possible or state 'standard auth prompt') ...",
|
|
"provider_results": {
|
|
"anthropic": { "status": "Success|Failure|ID_Extraction_Failed|Set_Model_Failed", "task_id": "...", "score": "X/10 | N/A", "notes": "..." },
|
|
"openai": { "status": "Success|Failure|...", "task_id": "...", "score": "X/10 | N/A", "notes": "..." },
|
|
/* ... include all tested providers ... */
|
|
},
|
|
"comparison_summary": "Brief overall comparison of generated tasks..."
|
|
},
|
|
"detected_issues": [ { "severity": "Error|Warning|Anomaly", "description": "...", "log_context": "[Optional, short snippet from log near the issue]" } ],
|
|
"llm_summary_points": [ "Overall summary point 1", "Provider comparison highlight", "Any major issues noted" ]
|
|
}
|
|
|
|
Here is the main log content:
|
|
|
|
${logContent}`;
|
|
}
|
|
|
|
async callLLM(prompt) {
|
|
const payload = {
|
|
model: this.config.llmAnalysis.model,
|
|
max_tokens: this.config.llmAnalysis.maxTokens,
|
|
messages: [{ role: 'user', content: prompt }]
|
|
};
|
|
|
|
const response = await fetch(this.apiEndpoint, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'x-api-key': this.apiKey,
|
|
'anthropic-version': '2023-06-01'
|
|
},
|
|
body: JSON.stringify(payload)
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const error = await response.text();
|
|
throw new Error(`LLM API call failed: ${response.status} - ${error}`);
|
|
}
|
|
|
|
return response.json();
|
|
}
|
|
|
|
parseResponse(response) {
|
|
try {
|
|
const content = response.content[0].text;
|
|
const jsonStart = content.indexOf('{');
|
|
const jsonEnd = content.lastIndexOf('}');
|
|
|
|
if (jsonStart === -1 || jsonEnd === -1) {
|
|
throw new Error('No JSON found in response');
|
|
}
|
|
|
|
const jsonString = content.substring(jsonStart, jsonEnd + 1);
|
|
return JSON.parse(jsonString);
|
|
} catch (error) {
|
|
this.logger.error(`Failed to parse LLM response: ${error.message}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
calculateCost(usage) {
|
|
const modelCosts = {
|
|
'claude-3-7-sonnet-20250219': {
|
|
input: 3.0, // per 1M tokens
|
|
output: 15.0 // per 1M tokens
|
|
}
|
|
};
|
|
|
|
const costs = modelCosts[this.config.llmAnalysis.model] || {
|
|
input: 0,
|
|
output: 0
|
|
};
|
|
const inputCost = (usage.input_tokens / 1000000) * costs.input;
|
|
const outputCost = (usage.output_tokens / 1000000) * costs.output;
|
|
|
|
return inputCost + outputCost;
|
|
}
|
|
|
|
formatReport(analysis) {
|
|
if (!analysis) return null;
|
|
|
|
const report = {
|
|
title: 'TASKMASTER E2E Test Analysis Report',
|
|
timestamp: new Date().toISOString(),
|
|
status: analysis.overall_status,
|
|
summary: analysis.llm_summary_points,
|
|
verifiedSteps: analysis.verified_steps,
|
|
providerComparison: analysis.provider_add_task_comparison,
|
|
issues: analysis.detected_issues
|
|
};
|
|
|
|
return report;
|
|
}
|
|
}
|