chore: create plan for task execution

2025-10-06 18:07:50 +02:00
parent 5cb7ed557a
commit 27b2348a9a
5 changed files with 877 additions and 5 deletions
--- a/.taskmaster/tasks/tasks.json
+++ b/.taskmaster/tasks/tasks.json
@@ -7901,5 +7901,344 @@
      "updated": "2025-09-12T04:02:07.346Z",
      "description": "Tasks for tm-start context"
    }
+  },
+  "autonomous-tdd-git-workflow": {
+    "tasks": [
+      {
+        "id": 11,
+        "title": "Create WorkflowOrchestrator Core Service",
+        "description": "Implement the core orchestration service that drives the autonomous TDD workflow with state machine phases",
+        "details": "Create packages/tm-core/src/services/workflow-orchestrator.ts implementing a state machine with phases: Preflight → Branch/Tag → SubtaskIter (Red/Green/Commit) → Finalize → PR. Use EventEmitter for progress events. Include methods: startWorkflow(taskId, options), resumeWorkflow(runId), pauseWorkflow(), getWorkflowState(). Store state in memory with persistence to .taskmaster/reports/runs/<run-id>/state.json. Implement checkpoint saving after each phase transition.",
+        "testStrategy": "Unit tests for state transitions, event emission, checkpoint persistence. Integration tests for full workflow lifecycle with mock adapters. Test resume capability from various checkpoints.",
+        "priority": "high",
+        "dependencies": [],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 12,
+        "title": "Implement Test Runner Adapter Service",
+        "description": "Create framework-agnostic test runner adapter that detects and executes project test commands",
+        "details": "Create packages/tm-core/src/services/test-runner-adapter.ts with methods: detectRunner() (checks package.json for test scripts), runTargeted(files/pattern), runAll(), getCoverageReport(), enforceCoverageThresholds(thresholds). Support npm/pnpm/yarn test detection. Parse test output for pass/fail counts and coverage metrics. Return structured TestResult interface with failures, duration, coverage data. Default 80% coverage thresholds.",
+        "testStrategy": "Mock different package.json configurations for runner detection. Test parsing of various test output formats. Verify coverage threshold enforcement logic. Integration test with actual npm test execution.",
+        "priority": "high",
+        "dependencies": [],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 13,
+        "title": "Build Git Operations Adapter",
+        "description": "Encapsulate all git operations with confirmation gates and branch naming patterns",
+        "details": "Create packages/tm-core/src/services/git-adapter.ts wrapping git commands: createBranch(pattern, tag, taskId), checkout(branch), add(files), commit(message, scope), push(options), getCurrentBranch(), getDefaultBranch(). Implement branch naming with configurable pattern support ({tag}/task-{id}[-slug]). Add confirmation prompts for destructive operations unless --no-confirm. Never allow commits to default branch. Use simple-git library or child_process for git commands.",
+        "testStrategy": "Mock git commands and verify correct invocations. Test branch naming pattern generation. Verify default branch protection. Test confirmation gate behavior with different flags.",
+        "priority": "high",
+        "dependencies": [],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 14,
+        "title": "Create Autopilot CLI Command",
+        "description": "Implement the main autopilot command with argument parsing and orchestrator invocation",
+        "details": "Create apps/cli/src/commands/autopilot.command.ts using Commander.js. Accept taskId argument and flags: --dry-run, --no-push, --no-pr, --no-confirm, --force, --max-attempts <n>, --resume. Initialize WorkflowOrchestrator with options. Subscribe to orchestrator events and render progress using existing UI components from apps/cli/src/ui/components/. Handle interrupt signals gracefully for resumability.",
+        "testStrategy": "Test command parsing with various flag combinations. Mock orchestrator and verify correct initialization. Test event subscription and UI rendering. Verify graceful shutdown on SIGINT.",
+        "priority": "high",
+        "dependencies": [
+          11
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 15,
+        "title": "Integrate Surgical Test Generator",
+        "description": "Connect the existing surgical test generator agent to the autopilot workflow for red phase",
+        "details": "Create test generation prompt adapter in packages/tm-core/src/services/test-generator.ts. Load .claude/agents/surgical-test-generator.md as system prompt. Format subtask context into user prompt with file paths, existing code, and requirements. Use existing executor service to invoke claude with the prompt. Parse generated test code and write to appropriate test files following project conventions. Validate tests compile/parse before proceeding.",
+        "testStrategy": "Mock executor responses with sample test generation. Verify prompt formatting includes all context. Test file writing to correct locations. Validate test syntax checking logic.",
+        "priority": "high",
+        "dependencies": [
+          11,
+          12
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 16,
+        "title": "Implement Code Generation Executor",
+        "description": "Create green phase code implementation using focused prompts to make tests pass",
+        "details": "Extend packages/tm-core/src/services/task-execution-service.ts with autopilot-specific prompt generation. Create minimal implementation prompt: 'Make these failing tests pass with the smallest code changes following project patterns. Only modify necessary files.' Include test failures, subtask context, and existing code. Use ExecutorFactory to invoke selected executor (claude/codex/gemini). Parse and apply code changes, handling conflicts gracefully.",
+        "testStrategy": "Test prompt generation with various failure scenarios. Mock executor responses and verify code application. Test conflict resolution strategies. Verify minimal change enforcement.",
+        "priority": "high",
+        "dependencies": [
+          11,
+          15
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 17,
+        "title": "Add Branch and Tag Management Integration",
+        "description": "Connect autopilot to existing tag management for branch-tag mapping",
+        "details": "Integrate with scripts/modules/task-manager/tag-management.js for branch→tag mapping. When creating branch, register mapping in tag system. Explicitly switch active tag to match branch tag. Load task data filtered by active tag. Ensure branch name includes both tag and task ID per spec. Handle tag switching when resuming workflows. Persist tag-branch associations.",
+        "testStrategy": "Test branch-tag registration and retrieval. Verify active tag switching. Test filtered task loading by tag. Validate branch naming includes tag and task ID.",
+        "priority": "medium",
+        "dependencies": [
+          13
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 18,
+        "title": "Build Run State Persistence System",
+        "description": "Implement checkpoint saving and workflow resumability with detailed logging",
+        "details": "Create run state management in WorkflowOrchestrator. Save checkpoints to .taskmaster/reports/runs/<timestamp>/state.json after each phase. Include: current phase, subtask progress, test results, git state, timestamps. Implement JSONL logging for all operations to .taskmaster/reports/runs/<timestamp>/log.jsonl. Add resume() method to restore from checkpoint. Handle partial state recovery gracefully.",
+        "testStrategy": "Test checkpoint creation at each phase. Verify JSONL log format and completeness. Test resume from various interruption points. Validate state recovery with corrupted files.",
+        "priority": "medium",
+        "dependencies": [
+          11
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 19,
+        "title": "Implement Preflight Validation Service",
+        "description": "Create comprehensive pre-execution validation checking git state, tools, and configuration",
+        "details": "Add preflight checks in WorkflowOrchestrator: verify clean working tree (configurable), detect test runner availability, validate git/gh CLI installation, check for required API keys/executors, verify task has subtasks (auto-expand if not), ensure not on default branch. Return structured validation report with errors/warnings. Allow --force to bypass non-critical checks.",
+        "testStrategy": "Mock various environment states for validation. Test clean/dirty working tree detection. Verify tool availability checks. Test auto-expansion trigger when no subtasks.",
+        "priority": "medium",
+        "dependencies": [
+          11,
+          12,
+          13
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 20,
+        "title": "Create PR Generation Service",
+        "description": "Implement GitHub PR creation with formatted body from run reports",
+        "details": "Extend git-adapter.ts with PR operations using gh CLI. Generate PR title: 'Task #<id> [<tag>]: <title>'. Format PR body with: summary of changes, subtask completion list, test coverage report, run statistics. Include link to full run report. Handle gh unavailability with fallback instructions. Support --no-pr flag to skip. Store PR URL in run state.",
+        "testStrategy": "Mock gh CLI responses for PR creation. Test PR title and body formatting. Verify fallback behavior without gh. Test PR URL persistence in run state.",
+        "priority": "medium",
+        "dependencies": [
+          13,
+          18
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 21,
+        "title": "Add Subtask Selection Logic",
+        "description": "Implement intelligent subtask selection respecting dependencies and status",
+        "details": "Enhance WorkflowOrchestrator with subtask selection using TaskService.getNextTask(). Filter subtasks by: pending/in-progress status, satisfied dependencies, task ownership. Process in dependency order. Skip already-done subtasks. Handle blocked subtasks gracefully. Update subtask status to in-progress when starting, done when tests pass and committed.",
+        "testStrategy": "Test selection with various dependency graphs. Verify status filtering logic. Test dependency satisfaction checking. Validate status transitions during workflow.",
+        "priority": "high",
+        "dependencies": [
+          11
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 22,
+        "title": "Implement Test-Driven Commit Gating",
+        "description": "Enforce commit-only-on-green policy with configurable coverage thresholds",
+        "details": "Add commit gating logic in WorkflowOrchestrator. After code generation, run tests and check: all tests pass, coverage meets thresholds (default 80% for lines/branches/functions/statements). Only commit if both conditions met. Support --force-commit override. Implement retry logic with backoff for flaky tests. Log all attempts and results.",
+        "testStrategy": "Test gating with various test results and coverage levels. Verify threshold enforcement. Test override flag behavior. Validate retry logic with intermittent failures.",
+        "priority": "high",
+        "dependencies": [
+          11,
+          12,
+          16
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 23,
+        "title": "Build Progress Event System",
+        "description": "Create event-driven progress reporting for CLI rendering and future integrations",
+        "details": "Implement EventEmitter-based progress system in WorkflowOrchestrator. Emit events: workflow:start, phase:change, subtask:start/complete, test:run/pass/fail, commit:created, pr:created, workflow:complete/error. Include detailed payloads with timestamps, durations, results. Create event aggregator for summary statistics. Support event filtering and buffering.",
+        "testStrategy": "Test event emission at each workflow step. Verify event payload completeness. Test event aggregation logic. Validate buffering and filtering mechanisms.",
+        "priority": "medium",
+        "dependencies": [
+          11
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 24,
+        "title": "Create Autopilot Configuration Schema",
+        "description": "Extend taskmaster config with autopilot-specific settings and validation",
+        "details": "Add autopilot section to .taskmaster/config.json schema: autopilot: { enabled, requireCleanWorkingTree, commitTemplate, defaultCommitType }, test: { runner, coverageThresholds }, git: { branchPattern, pr: { enabled, base } }. Create validation with Zod schema. Add config migration for existing projects. Provide sensible defaults. Support environment variable overrides.",
+        "testStrategy": "Test schema validation with various configurations. Verify migration from old configs. Test default value application. Validate environment override behavior.",
+        "priority": "medium",
+        "dependencies": [],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 25,
+        "title": "Implement Dry Run Mode",
+        "description": "Add simulation mode showing planned operations without execution",
+        "details": "Add --dry-run support throughout workflow. In dry-run: show planned git operations, display test commands without running, preview commit messages, show PR body without creating. Format output clearly indicating simulated vs actual. Still perform validation and planning phases. Useful for debugging and verification.",
+        "testStrategy": "Test dry-run flag propagation to all adapters. Verify no side effects occur. Test output formatting for clarity. Validate planning phases still execute.",
+        "priority": "low",
+        "dependencies": [
+          14,
+          19
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 26,
+        "title": "Add tmux Integration Support",
+        "description": "Create tmux pane management for split-view executor terminal",
+        "details": "Create apps/cli/src/ui/tui/tmux-manager.ts for pane control. Detect tmux availability. Support: split-window for executor pane, send-keys for command execution, capture-pane for output, kill-pane for cleanup. Left pane shows autopilot progress, right pane runs executor. Handle non-tmux fallback gracefully. Preserve pane on interrupt for debugging.",
+        "testStrategy": "Mock tmux commands and verify invocations. Test pane creation and command sending. Verify fallback behavior without tmux. Test cleanup on exit.",
+        "priority": "low",
+        "dependencies": [
+          14
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 27,
+        "title": "Build Run Report Generator",
+        "description": "Create comprehensive markdown and JSON reports for completed workflows",
+        "details": "Generate reports in .taskmaster/reports/runs/<run-id>/: summary.md with task details, subtask results, test coverage, commit list, duration stats. Full log.jsonl with all operations. coverage.json with detailed metrics. state.json for resumability. Include charts/tables for readability. Generate PR-ready summary section. Archive old runs automatically.",
+        "testStrategy": "Test report generation with various workflow outcomes. Verify markdown formatting and readability. Test JSON structure validity. Validate archival logic for old runs.",
+        "priority": "medium",
+        "dependencies": [
+          18
+        ],
+        "status": "pending",
+        "subtasks": [
+          {
+            "id": 1,
+            "title": "Create Report Generator Service Core",
+            "description": "Implement the core WorkflowReportGenerator service that orchestrates report generation for completed workflow runs",
+            "dependencies": [],
+            "details": "Create packages/tm-core/src/services/report-generator.service.ts with WorkflowReportGenerator class. Implement methods: generateRunReport(runId, workflowState), generateSummaryMarkdown(state), generateJSONLogs(operations), generateCoverageMetrics(testResults), archiveOldRuns(threshold). Use EventEmitter for progress updates. Store reports in .taskmaster/reports/runs/<run-id>/ directory structure. Integrate with existing ConfigManager for paths and FileStorage for persistence.",
+            "status": "pending",
+            "testStrategy": "Unit tests for report generation methods, markdown formatting validation, JSON structure tests, archive logic tests"
+          },
+          {
+            "id": 2,
+            "title": "Build Markdown Summary Generator",
+            "description": "Create comprehensive markdown report generation with tables, charts, and PR-ready sections",
+            "dependencies": [
+              "27.1"
+            ],
+            "details": "Implement markdown generation in packages/tm-core/src/services/report-generators/markdown-generator.ts. Create formatted sections: Executive Summary (task completion stats, duration, test coverage), Task Details Table (ID, title, status, duration), Subtask Results (grouped by parent, with test outcomes), Test Coverage Charts (using ASCII art or markdown badges), Commit History (list with links), Performance Metrics (timings per phase). Include generatePRBody() method for GitHub-ready summaries. Use markdown tables and proper formatting for readability.",
+            "status": "pending",
+            "testStrategy": "Test markdown output formatting, table generation, special character escaping, PR body validation"
+          },
+          {
+            "id": 3,
+            "title": "Implement JSONL Operation Logger",
+            "description": "Build detailed operation logging system that captures all workflow operations in JSONL format",
+            "dependencies": [
+              "27.1"
+            ],
+            "details": "Create packages/tm-core/src/services/report-generators/jsonl-logger.ts with JSONLOperationLogger class. Implement streaming JSONL writer for log.jsonl file. Capture operations: task starts/completions, test executions, git operations, phase transitions, errors/retries. Each line contains: timestamp, operation type, phase, task/subtask ID, duration, result, metadata. Implement buffered writing for performance. Include log rotation when file exceeds size limit.",
+            "status": "pending",
+            "testStrategy": "Test JSONL format validity, streaming performance, log rotation, operation capture completeness"
+          },
+          {
+            "id": 4,
+            "title": "Create Coverage and Metrics Collectors",
+            "description": "Build test coverage collection and performance metrics aggregation components",
+            "dependencies": [
+              "27.1"
+            ],
+            "details": "Create packages/tm-core/src/services/report-generators/metrics-collector.ts. Implement CoverageCollector to parse test runner outputs (Jest, Vitest, etc.), aggregate line/branch/function coverage, generate coverage.json with detailed metrics per file/module. Implement PerformanceCollector to track phase durations, operation timings, resource usage. Create state.json generator for workflow resumability with checkpoints, completed operations, pending tasks.",
+            "status": "pending",
+            "testStrategy": "Mock various test runner outputs, verify coverage parsing accuracy, test metric aggregation logic"
+          },
+          {
+            "id": 5,
+            "title": "Build Report Archival and Management System",
+            "description": "Implement automatic archival of old run reports and report lifecycle management",
+            "dependencies": [
+              "27.1",
+              "27.2",
+              "27.3",
+              "27.4"
+            ],
+            "details": "Create packages/tm-core/src/services/report-generators/archive-manager.ts. Implement automatic archival: move reports older than 30 days to .taskmaster/reports/archived/, compress old reports to .tar.gz, maintain index of archived reports. Add report management CLI commands in apps/cli/src/commands/reports.command.ts: list-reports, view-report <run-id>, archive-reports, clean-reports. Integrate with WorkflowOrchestrator to trigger report generation on workflow completion.",
+            "status": "pending",
+            "testStrategy": "Test archival thresholds, compression functionality, index maintenance, CLI command integration"
+          }
+        ]
+      },
+      {
+        "id": 28,
+        "title": "Add MCP Tools Integration",
+        "description": "Integrate with MCP server for structured task operations during autopilot",
+        "details": "Use MCP tools where available: get_tasks for task loading, set_task_status for status updates, update_subtask for progress notes, expand_task if subtasks needed. Fallback to direct service calls if MCP unavailable. Improve context passing to executors via MCP. Support MCP-based shell/test execution where available.",
+        "testStrategy": "Mock MCP tool availability and responses. Test fallback to direct service calls. Verify status updates through MCP. Test context enhancement via MCP.",
+        "priority": "low",
+        "dependencies": [
+          14,
+          21
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 29,
+        "title": "Implement Retry and Backoff Logic",
+        "description": "Add intelligent retry mechanisms for flaky tests and transient failures",
+        "details": "Implement exponential backoff in WorkflowOrchestrator for: test execution (max 3 retries), executor calls (max 2 retries), git operations (max 2 retries). Detect flaky test patterns. Add --max-attempts flag (default 3). Track retry attempts in run state. Implement circuit breaker for repeated failures. Provide clear failure reasons.",
+        "testStrategy": "Test retry logic with simulated failures. Verify exponential backoff timing. Test max attempts enforcement. Validate circuit breaker activation.",
+        "priority": "medium",
+        "dependencies": [
+          11,
+          22
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 30,
+        "title": "Create End-to-End Integration Tests",
+        "description": "Build comprehensive test suite validating full autopilot workflow",
+        "details": "Create test/integration/autopilot.test.ts with scenarios: happy path (all tests pass first try), retry scenarios (flaky tests), resume from interruption, various flag combinations, multi-subtask workflows. Use test fixtures with mock tasks/subtasks. Verify all outputs: commits, branches, reports, PR body. Test with different executors and test runners.",
+        "testStrategy": "Integration tests with mock git repo and task data. Test complete workflow execution. Verify all artifacts created correctly. Validate resume functionality. Performance benchmarks for workflow duration.",
+        "priority": "low",
+        "dependencies": [
+          11,
+          12,
+          13,
+          14,
+          15,
+          16,
+          17,
+          18,
+          19,
+          20,
+          21,
+          22
+        ],
+        "status": "pending",
+        "subtasks": []
+      }
+    ],
+    "metadata": {
+      "created": "2025-09-30T13:32:28.649Z",
+      "updated": "2025-09-30T15:13:53.999Z",
+      "description": "Tasks for autonomous-tdd-git-workflow context"
+    }
  }
 }