import { readFileSync } from 'fs'; import fetch from 'node-fetch'; export class LLMAnalyzer { constructor(config, logger) { this.config = config; this.logger = logger; this.apiKey = process.env.ANTHROPIC_API_KEY; this.apiEndpoint = 'https://api.anthropic.com/v1/messages'; } async analyzeLog(logFile, providerSummaryFile = null) { if (!this.config.llmAnalysis.enabled) { this.logger.info('LLM analysis is disabled in configuration'); return null; } if (!this.apiKey) { this.logger.error('ANTHROPIC_API_KEY not found in environment'); return null; } try { const logContent = readFileSync(logFile, 'utf8'); const prompt = this.buildAnalysisPrompt(logContent, providerSummaryFile); const response = await this.callLLM(prompt); const analysis = this.parseResponse(response); // Calculate and log cost if (response.usage) { const cost = this.calculateCost(response.usage); this.logger.addCost(cost); this.logger.info(`LLM Analysis AI Cost: $${cost.toFixed(6)} USD`); } return analysis; } catch (error) { this.logger.error(`LLM analysis failed: ${error.message}`); return null; } } buildAnalysisPrompt(logContent, providerSummaryFile) { let providerSummary = ''; if (providerSummaryFile) { try { providerSummary = readFileSync(providerSummaryFile, 'utf8'); } catch (error) { this.logger.warning( `Could not read provider summary file: ${error.message}` ); } } return `Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially. Your goal is to: 1. Verify if the key E2E steps completed successfully based on the log messages (e.g., init, parse PRD, list tasks, analyze complexity, expand task, set status, manage models, add/remove dependencies, add/update/remove tasks/subtasks, generate files). 2. **Specifically analyze the Multi-Provider Add-Task Test Sequence:** a. Identify which providers were tested for \`add-task\`. Look for log steps like "Testing Add-Task with Provider: ..." and the summary log 'provider_add_task_summary.log'. b. For each tested provider, determine if \`add-task\` succeeded or failed. Note the created task ID if successful. c. Review the corresponding \`add_task_show_output__id_.log\` file (if created) for each successful \`add-task\` execution. d. **Compare the quality and completeness** of the task generated by each successful provider based on their \`show\` output. Assign a score (e.g., 1-10, 10 being best) based on relevance to the prompt, detail level, and correctness. e. Note any providers where \`add-task\` failed or where the task ID could not be extracted. 3. Identify any general explicit "[ERROR]" messages or stack traces throughout the *entire* log. 4. Identify any potential warnings or unusual output that might indicate a problem even if not marked as an explicit error. 5. Provide an overall assessment of the test run's health based *only* on the log content. ${providerSummary ? `\nProvider Summary:\n${providerSummary}\n` : ''} Return your analysis **strictly** in the following JSON format. Do not include any text outside of the JSON structure: { "overall_status": "Success|Failure|Warning", "verified_steps": [ "Initialization", "PRD Parsing", /* ...other general steps observed... */ ], "provider_add_task_comparison": { "prompt_used": "... (extract from log if possible or state 'standard auth prompt') ...", "provider_results": { "anthropic": { "status": "Success|Failure|ID_Extraction_Failed|Set_Model_Failed", "task_id": "...", "score": "X/10 | N/A", "notes": "..." }, "openai": { "status": "Success|Failure|...", "task_id": "...", "score": "X/10 | N/A", "notes": "..." }, /* ... include all tested providers ... */ }, "comparison_summary": "Brief overall comparison of generated tasks..." }, "detected_issues": [ { "severity": "Error|Warning|Anomaly", "description": "...", "log_context": "[Optional, short snippet from log near the issue]" } ], "llm_summary_points": [ "Overall summary point 1", "Provider comparison highlight", "Any major issues noted" ] } Here is the main log content: ${logContent}`; } async callLLM(prompt) { const payload = { model: this.config.llmAnalysis.model, max_tokens: this.config.llmAnalysis.maxTokens, messages: [{ role: 'user', content: prompt }] }; const response = await fetch(this.apiEndpoint, { method: 'POST', headers: { 'Content-Type': 'application/json', 'x-api-key': this.apiKey, 'anthropic-version': '2023-06-01' }, body: JSON.stringify(payload) }); if (!response.ok) { const error = await response.text(); throw new Error(`LLM API call failed: ${response.status} - ${error}`); } return response.json(); } parseResponse(response) { try { const content = response.content[0].text; const jsonStart = content.indexOf('{'); const jsonEnd = content.lastIndexOf('}'); if (jsonStart === -1 || jsonEnd === -1) { throw new Error('No JSON found in response'); } const jsonString = content.substring(jsonStart, jsonEnd + 1); return JSON.parse(jsonString); } catch (error) { this.logger.error(`Failed to parse LLM response: ${error.message}`); return null; } } calculateCost(usage) { const modelCosts = { 'claude-3-7-sonnet-20250219': { input: 3.0, // per 1M tokens output: 15.0 // per 1M tokens } }; const costs = modelCosts[this.config.llmAnalysis.model] || { input: 0, output: 0 }; const inputCost = (usage.input_tokens / 1000000) * costs.input; const outputCost = (usage.output_tokens / 1000000) * costs.output; return inputCost + outputCost; } formatReport(analysis) { if (!analysis) return null; const report = { title: 'TASKMASTER E2E Test Analysis Report', timestamp: new Date().toISOString(), status: analysis.overall_status, summary: analysis.llm_summary_points, verifiedSteps: analysis.verified_steps, providerComparison: analysis.provider_add_task_comparison, issues: analysis.detected_issues }; return report; } }