feat: Introduce modern E2E test suite for Task Master AI

2025-07-03 14:23:53 +03:00
parent 55442226d0
commit 395693af24
14 changed files with 2760 additions and 2 deletions
--- a/tests/e2e/utils/llm-analyzer.js
+++ b/tests/e2e/utils/llm-analyzer.js
@@ -0,0 +1,168 @@
+import { readFileSync } from 'fs';
+import fetch from 'node-fetch';
+
+export class LLMAnalyzer {
+  constructor(config, logger) {
+    this.config = config;
+    this.logger = logger;
+    this.apiKey = process.env.ANTHROPIC_API_KEY;
+    this.apiEndpoint = 'https://api.anthropic.com/v1/messages';
+  }
+
+  async analyzeLog(logFile, providerSummaryFile = null) {
+    if (!this.config.llmAnalysis.enabled) {
+      this.logger.info('LLM analysis is disabled in configuration');
+      return null;
+    }
+
+    if (!this.apiKey) {
+      this.logger.error('ANTHROPIC_API_KEY not found in environment');
+      return null;
+    }
+
+    try {
+      const logContent = readFileSync(logFile, 'utf8');
+      const prompt = this.buildAnalysisPrompt(logContent, providerSummaryFile);
+
+      const response = await this.callLLM(prompt);
+      const analysis = this.parseResponse(response);
+      
+      // Calculate and log cost
+      if (response.usage) {
+        const cost = this.calculateCost(response.usage);
+        this.logger.addCost(cost);
+        this.logger.info(`LLM Analysis AI Cost: $${cost.toFixed(6)} USD`);
+      }
+
+      return analysis;
+    } catch (error) {
+      this.logger.error(`LLM analysis failed: ${error.message}`);
+      return null;
+    }
+  }
+
+  buildAnalysisPrompt(logContent, providerSummaryFile) {
+    let providerSummary = '';
+    if (providerSummaryFile) {
+      try {
+        providerSummary = readFileSync(providerSummaryFile, 'utf8');
+      } catch (error) {
+        this.logger.warning(`Could not read provider summary file: ${error.message}`);
+      }
+    }
+
+    return `Analyze the following E2E test log for the task-master tool. The log contains output from various 'task-master' commands executed sequentially.
+
+Your goal is to:
+1. Verify if the key E2E steps completed successfully based on the log messages (e.g., init, parse PRD, list tasks, analyze complexity, expand task, set status, manage models, add/remove dependencies, add/update/remove tasks/subtasks, generate files).
+2. **Specifically analyze the Multi-Provider Add-Task Test Sequence:**
+   a. Identify which providers were tested for \`add-task\`. Look for log steps like "Testing Add-Task with Provider: ..." and the summary log 'provider_add_task_summary.log'.
+   b. For each tested provider, determine if \`add-task\` succeeded or failed. Note the created task ID if successful.
+   c. Review the corresponding \`add_task_show_output_<provider>_id_<id>.log\` file (if created) for each successful \`add-task\` execution.
+   d. **Compare the quality and completeness** of the task generated by each successful provider based on their \`show\` output. Assign a score (e.g., 1-10, 10 being best) based on relevance to the prompt, detail level, and correctness.
+   e. Note any providers where \`add-task\` failed or where the task ID could not be extracted.
+3. Identify any general explicit "[ERROR]" messages or stack traces throughout the *entire* log.
+4. Identify any potential warnings or unusual output that might indicate a problem even if not marked as an explicit error.
+5. Provide an overall assessment of the test run's health based *only* on the log content.
+
+${providerSummary ? `\nProvider Summary:\n${providerSummary}\n` : ''}
+
+Return your analysis **strictly** in the following JSON format. Do not include any text outside of the JSON structure:
+
+{
+  "overall_status": "Success|Failure|Warning",
+  "verified_steps": [ "Initialization", "PRD Parsing", /* ...other general steps observed... */ ],
+  "provider_add_task_comparison": {
+     "prompt_used": "... (extract from log if possible or state 'standard auth prompt') ...",
+     "provider_results": {
+       "anthropic": { "status": "Success|Failure|ID_Extraction_Failed|Set_Model_Failed", "task_id": "...", "score": "X/10 | N/A", "notes": "..." },
+       "openai": { "status": "Success|Failure|...", "task_id": "...", "score": "X/10 | N/A", "notes": "..." },
+       /* ... include all tested providers ... */
+     },
+     "comparison_summary": "Brief overall comparison of generated tasks..."
+  },
+  "detected_issues": [ { "severity": "Error|Warning|Anomaly", "description": "...", "log_context": "[Optional, short snippet from log near the issue]" } ],
+  "llm_summary_points": [ "Overall summary point 1", "Provider comparison highlight", "Any major issues noted" ]
+}
+
+Here is the main log content:
+
+${logContent}`;
+  }
+
+  async callLLM(prompt) {
+    const payload = {
+      model: this.config.llmAnalysis.model,
+      max_tokens: this.config.llmAnalysis.maxTokens,
+      messages: [
+        { role: 'user', content: prompt }
+      ]
+    };
+
+    const response = await fetch(this.apiEndpoint, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'x-api-key': this.apiKey,
+        'anthropic-version': '2023-06-01'
+      },
+      body: JSON.stringify(payload)
+    });
+
+    if (!response.ok) {
+      const error = await response.text();
+      throw new Error(`LLM API call failed: ${response.status} - ${error}`);
+    }
+
+    return response.json();
+  }
+
+  parseResponse(response) {
+    try {
+      const content = response.content[0].text;
+      const jsonStart = content.indexOf('{');
+      const jsonEnd = content.lastIndexOf('}');
+      
+      if (jsonStart === -1 || jsonEnd === -1) {
+        throw new Error('No JSON found in response');
+      }
+
+      const jsonString = content.substring(jsonStart, jsonEnd + 1);
+      return JSON.parse(jsonString);
+    } catch (error) {
+      this.logger.error(`Failed to parse LLM response: ${error.message}`);
+      return null;
+    }
+  }
+
+  calculateCost(usage) {
+    const modelCosts = {
+      'claude-3-7-sonnet-20250219': {
+        input: 3.00,   // per 1M tokens
+        output: 15.00  // per 1M tokens
+      }
+    };
+
+    const costs = modelCosts[this.config.llmAnalysis.model] || { input: 0, output: 0 };
+    const inputCost = (usage.input_tokens / 1000000) * costs.input;
+    const outputCost = (usage.output_tokens / 1000000) * costs.output;
+    
+    return inputCost + outputCost;
+  }
+
+  formatReport(analysis) {
+    if (!analysis) return null;
+
+    const report = {
+      title: 'TASKMASTER E2E Test Analysis Report',
+      timestamp: new Date().toISOString(),
+      status: analysis.overall_status,
+      summary: analysis.llm_summary_points,
+      verifiedSteps: analysis.verified_steps,
+      providerComparison: analysis.provider_add_task_comparison,
+      issues: analysis.detected_issues
+    };
+
+    return report;
+  }
+}