chore: keep working on tasks

2025-10-06 20:22:58 +02:00
parent cc3850eccd
commit 1f81077bc9
4 changed files with 854 additions and 368 deletions
--- a/.taskmaster/config.json
+++ b/.taskmaster/config.json
@@ -1,43 +1,44 @@
 {
-	"models": {
-		"main": {
-			"provider": "claude-code",
-			"modelId": "opus",
-			"maxTokens": 32000,
-			"temperature": 0.2
-		},
-		"research": {
-			"provider": "perplexity",
-			"modelId": "sonar",
-			"maxTokens": 8700,
-			"temperature": 0.1
-		},
-		"fallback": {
-			"provider": "anthropic",
-			"modelId": "claude-3-7-sonnet-20250219",
-			"maxTokens": 120000,
-			"temperature": 0.2
-		}
-	},
-	"global": {
-		"logLevel": "info",
-		"debug": false,
-		"defaultNumTasks": 10,
-		"defaultSubtasks": 5,
-		"defaultPriority": "medium",
-		"projectName": "Taskmaster",
-		"ollamaBaseURL": "http://localhost:11434/api",
-		"bedrockBaseURL": "https://bedrock.us-east-1.amazonaws.com",
-		"responseLanguage": "English",
-		"enableCodebaseAnalysis": true,
-		"userId": "1234567890",
-		"azureBaseURL": "https://your-endpoint.azure.com/",
-		"defaultTag": "master"
-	},
-	"claudeCode": {},
-	"grokCli": {
-		"timeout": 120000,
-		"workingDirectory": null,
-		"defaultModel": "grok-4-latest"
-	}
-}
+  "models": {
+    "main": {
+      "provider": "claude-code",
+      "modelId": "sonnet",
+      "maxTokens": 64000,
+      "temperature": 0.2
+    },
+    "research": {
+      "provider": "perplexity",
+      "modelId": "sonar",
+      "maxTokens": 8700,
+      "temperature": 0.1
+    },
+    "fallback": {
+      "provider": "anthropic",
+      "modelId": "claude-3-7-sonnet-20250219",
+      "maxTokens": 120000,
+      "temperature": 0.2
+    }
+  },
+  "global": {
+    "logLevel": "info",
+    "debug": false,
+    "defaultNumTasks": 10,
+    "defaultSubtasks": 5,
+    "defaultPriority": "medium",
+    "projectName": "Taskmaster",
+    "ollamaBaseURL": "http://localhost:11434/api",
+    "bedrockBaseURL": "https://bedrock.us-east-1.amazonaws.com",
+    "responseLanguage": "English",
+    "enableCodebaseAnalysis": true,
+    "userId": "1234567890",
+    "azureBaseURL": "https://your-endpoint.azure.com/",
+    "defaultTag": "master"
+  },
+  "claudeCode": {},
+  "codexCli": {},
+  "grokCli": {
+    "timeout": 120000,
+    "workingDirectory": null,
+    "defaultModel": "grok-4-latest"
+  }
+}
--- a/.taskmaster/docs/autonomous-tdd-git-workflow.md
+++ b/.taskmaster/docs/autonomous-tdd-git-workflow.md
@@ -26,7 +26,7 @@

 ## Success Criteria

- One command can autonomously complete a task’s subtasks via TDD and open a PR when done.
+- One command can autonomously complete a task's subtasks via TDD and open a PR when done.

 - All commits made on a branch that includes the tag and task id (see Branch Naming); no commits to the default branch directly.

@@ -34,6 +34,13 @@

 - End-to-end logs + artifacts stored in .taskmaster/reports/runs/<timestamp-or-id>/.

+## Success Metrics (Phase 1)
+
+- **Adoption**: 80% of tasks in a pilot repo completed via `tm autopilot`
+- **Safety**: 0 commits to default branch; 100% of commits have green tests
+- **Efficiency**: Average time from task start to PR < 30min for simple subtasks
+- **Reliability**: < 5% of runs require manual intervention (timeout/conflicts)
+
 ## User Stories

 - As a developer, I can run tm autopilot <taskId> and watch a structured, safe workflow execute.
@@ -42,6 +49,103 @@

 - As an operator, I can see current step, active subtask, tests status, and logs in a compact CLI view and read a final run report.

+## Example Workflow Traces
+
+### Happy Path: Complete a 3-subtask feature
+
+```bash
+# Developer starts
+$ tm autopilot 42
+→ Checks preflight: ✓ clean tree, ✓ npm test detected
+→ Creates branch: analytics/task-42-user-metrics
+→ Subtask 42.1: "Add metrics schema"
+  RED: generates test_metrics_schema.test.js → 3 failures
+  GREEN: implements schema.js → all pass
+  COMMIT: "feat(metrics): add metrics schema (task 42.1)"
+→ Subtask 42.2: "Add collection endpoint"
+  RED: generates test_metrics_endpoint.test.js → 5 failures
+  GREEN: implements api/metrics.js → all pass
+  COMMIT: "feat(metrics): add collection endpoint (task 42.2)"
+→ Subtask 42.3: "Add dashboard widget"
+  RED: generates test_metrics_widget.test.js → 4 failures
+  GREEN: implements components/MetricsWidget.jsx → all pass
+  COMMIT: "feat(metrics): add dashboard widget (task 42.3)"
+→ Final: all 3 subtasks complete
+  ✓ Run full test suite → all pass
+  ✓ Coverage check → 85% (meets 80% threshold)
+  PUSH: confirms with user → pushed to origin
+  PR: opens #123 "Task #42 [analytics]: User metrics tracking"
+
+✓ Task 42 complete. PR: https://github.com/org/repo/pull/123
+  Run report: .taskmaster/reports/runs/2025-01-15-142033/
+```
+
+### Error Recovery: Failing tests timeout
+
+```bash
+$ tm autopilot 42
+→ Subtask 42.2 GREEN phase: attempt 1 fails (2 tests still red)
+→ Subtask 42.2 GREEN phase: attempt 2 fails (1 test still red)
+→ Subtask 42.2 GREEN phase: attempt 3 fails (1 test still red)
+
+⚠️  Paused: Could not achieve green state after 3 attempts
+📋 State saved to: .taskmaster/reports/runs/2025-01-15-142033/
+    Last error: "POST /api/metrics returns 500 instead of 201"
+
+Next steps:
+  - Review diff: git diff HEAD
+  - Inspect logs: cat .taskmaster/reports/runs/2025-01-15-142033/log.jsonl
+  - Check test output: cat .taskmaster/reports/runs/2025-01-15-142033/test-results/subtask-42.2-green-attempt3.json
+  - Resume after manual fix: tm autopilot --resume
+
+# Developer manually fixes the issue, then:
+$ tm autopilot --resume
+→ Resuming subtask 42.2 GREEN phase
+  GREEN: all tests pass
+  COMMIT: "feat(metrics): add collection endpoint (task 42.2)"
+→ Continuing to subtask 42.3...
+```
+
+### Dry Run: Preview before execution
+
+```bash
+$ tm autopilot 42 --dry-run
+Autopilot Plan for Task #42 [analytics]: User metrics tracking
+─────────────────────────────────────────────────────────────
+Preflight:
+  ✓ Working tree is clean
+  ✓ Test command detected: npm test
+  ✓ Tools available: git, gh, node, npm
+  ✓ Current branch: main (will create new branch)
+
+Branch & Tag:
+  → Create branch: analytics/task-42-user-metrics
+  → Set active tag: analytics
+
+Subtasks (3 pending):
+  1. 42.1: Add metrics schema
+     - RED: generate tests in src/__tests__/schema.test.js
+     - GREEN: implement src/schema.js
+     - COMMIT: "feat(metrics): add metrics schema (task 42.1)"
+
+  2. 42.2: Add collection endpoint [depends on 42.1]
+     - RED: generate tests in src/api/__tests__/metrics.test.js
+     - GREEN: implement src/api/metrics.js
+     - COMMIT: "feat(metrics): add collection endpoint (task 42.2)"
+
+  3. 42.3: Add dashboard widget [depends on 42.2]
+     - RED: generate tests in src/components/__tests__/MetricsWidget.test.jsx
+     - GREEN: implement src/components/MetricsWidget.jsx
+     - COMMIT: "feat(metrics): add dashboard widget (task 42.3)"
+
+Finalization:
+  → Run full test suite with coverage
+  → Push branch to origin (will confirm)
+  → Create PR targeting main
+
+Run without --dry-run to execute.
+```
+
 ## High‑Level Workflow

 1) Pre‑flight
@@ -180,31 +284,420 @@

 ### TUI with tmux (Linear Execution)

- Left pane: Tag selector, task list (status/priority), start/expand shortcuts; “Start” triggers the next task or a selected task.
+- Left pane: Tag selector, task list (status/priority), start/expand shortcuts; "Start" triggers the next task or a selected task.

 - Right pane: Executor terminal (tmux split) that runs the coding agent (claude-code/codex). Autopilot can hand over to the right pane during green.

 - MCP integration: use MCP tools for task queries/updates and for shell/test invocations where available.

-## Prompts (Initial Direction)
+## TUI Layout (tmux-based)

- Red phase prompt skeleton (tests):
+### Pane Structure

-  - Use .claude/agents/surgical-test-generator.md as the system prompt to generate high-signal failing tests tailored to the project’s language and conventions. Keep scope minimal and deterministic; no code changes yet.
+```
+┌─────────────────────────────────────┬──────────────────────────────────┐
+│ Task Navigator (left)               │ Executor Terminal (right)        │
+│                                     │                                  │
+│ Project: my-app                     │ $ tm autopilot --executor-mode   │
+│ Branch: analytics/task-42           │ > Running subtask 42.2 GREEN...  │
+│ Tag: analytics                      │ > Implementing endpoint...       │
+│                                     │ > Tests: 3 passed, 0 failed      │
+│ Tasks:                              │ > Ready to commit                │
+│ → 42 [in-progress] User metrics     │                                  │
+│   → 42.1 [done] Schema              │ [Live output from Claude Code]   │
+│   → 42.2 [active] Endpoint ◀        │                                  │
+│   → 42.3 [pending] Dashboard        │                                  │
+│                                     │                                  │
+│ [s] start  [p] pause  [q] quit      │                                  │
+└─────────────────────────────────────┴──────────────────────────────────┘
+```

- Green phase prompt skeleton (code):
+### Implementation Notes

-  - “Make the tests pass by changing the smallest amount of code, following project patterns. Only modify necessary files. Keep commits focused to this subtask.”
+- **Left pane**: `apps/cli/src/ui/tui/navigator.ts` (new, uses `blessed` or `ink`)
+- **Right pane**: spawned via `tmux split-window -h` running `tm autopilot --executor-mode`
+- **Communication**: shared state file `.taskmaster/state/current-run.json` + file watching or event stream
+- **Keybindings**:
+  - `s` - Start selected task
+  - `p` - Pause/resume current run
+  - `q` - Quit (with confirmation if run active)
+  - `↑/↓` - Navigate task list
+  - `Enter` - Expand/collapse subtasks

-## Configuration
+## Prompt Composition (Detailed)

- .taskmaster/config.json additions
+### System Prompt Assembly

-  - autopilot: { enabled: true, requireCleanWorkingTree: true, commitTemplate: "{type}({scope}): {msg}", defaultCommitType: "feat" }
+Prompts are composed in three layers:

-  - test: { runner: "auto", coverageThresholds: { lines: 80, branches: 80, functions: 80, statements: 80 } }
+1. **Base rules** (loaded in order from `.cursor/rules/` and `.claude/agents/`):
+   - `git_workflow.mdc` → git commit conventions, branch policy, PR guidelines
+   - `test_workflow.mdc` → TDD loop requirements, coverage thresholds, test structure
+   - `surgical-test-generator.md` → test generation methodology, project-specific test patterns

-  - git: { branchPattern: "{tag}/task-{id}-{slug}", pr: { enabled: true, base: "default" } }
+2. **Task context injection**:
+   ```
+   You are implementing:
+   Task #42 [analytics]: User metrics tracking
+   Subtask 42.2: Add collection endpoint
+
+   Description:
+   Implement POST /api/metrics endpoint to collect user metrics events
+
+   Acceptance criteria:
+   - POST /api/metrics accepts { userId, eventType, timestamp }
+   - Validates input schema (reject missing/invalid fields)
+   - Persists to database
+   - Returns 201 on success with created record
+   - Returns 400 on validation errors
+
+   Dependencies:
+   - Subtask 42.1 (metrics schema) is complete
+
+   Current phase: RED (generate failing tests)
+   Test command: npm test
+   Test file convention: src/**/*.test.js (vitest framework detected)
+   Branch: analytics/task-42-user-metrics
+   Project language: JavaScript (Node.js)
+   ```
+
+3. **Phase-specific instructions**:
+   - **RED phase**: "Generate minimal failing tests for this subtask. Do NOT implement any production code. Only create test files. Confirm tests fail with clear error messages indicating missing implementation."
+   - **GREEN phase**: "Implement minimal code to pass the failing tests. Follow existing project patterns in `src/`. Only modify files necessary for this subtask. Keep changes focused and reviewable."
+
+### Example Full Prompt (RED Phase)
+
+```markdown
+<SYSTEM PROMPT>
+[Contents of .cursor/rules/git_workflow.mdc]
+[Contents of .cursor/rules/test_workflow.mdc]
+[Contents of .claude/agents/surgical-test-generator.md]
+
+<TASK CONTEXT>
+You are implementing:
+Task #42.2: Add collection endpoint
+
+Description:
+Implement POST /api/metrics endpoint to collect user metrics events
+
+Acceptance criteria:
+- POST /api/metrics accepts { userId, eventType, timestamp }
+- Validates input schema (reject missing/invalid fields)
+- Persists to database using MetricsSchema from subtask 42.1
+- Returns 201 on success with created record
+- Returns 400 on validation errors with details
+
+Dependencies: Subtask 42.1 (metrics schema) is complete
+
+<INSTRUCTION>
+Generate failing tests for this subtask. Follow project conventions:
+- Test file: src/api/__tests__/metrics.test.js
+- Framework: vitest (detected from package.json)
+- Test cases to cover:
+  * POST /api/metrics with valid payload → should return 201 (will fail: endpoint not implemented)
+  * POST /api/metrics with missing userId → should return 400 (will fail: validation not implemented)
+  * POST /api/metrics with invalid timestamp → should return 400 (will fail: validation not implemented)
+  * POST /api/metrics should persist to database → should save record (will fail: persistence not implemented)
+
+Do NOT implement the endpoint code yet. Only create test file(s).
+Confirm tests fail with messages like "Cannot POST /api/metrics" or "endpoint not defined".
+
+Output format:
+1. File path to create: src/api/__tests__/metrics.test.js
+2. Complete test code
+3. Command to run: npm test src/api/__tests__/metrics.test.js
+```
+
+### Example Full Prompt (GREEN Phase)
+
+```markdown
+<SYSTEM PROMPT>
+[Contents of .cursor/rules/git_workflow.mdc]
+[Contents of .cursor/rules/test_workflow.mdc]
+
+<TASK CONTEXT>
+Task #42.2: Add collection endpoint
+[same context as RED phase]
+
+<CURRENT STATE>
+Tests created in RED phase:
+- src/api/__tests__/metrics.test.js
+- 5 tests written, all failing as expected
+
+Test output:
+```
+FAIL src/api/__tests__/metrics.test.js
+  POST /api/metrics
+    ✗ should return 201 with valid payload (endpoint not found)
+    ✗ should return 400 with missing userId (endpoint not found)
+    ✗ should return 400 with invalid timestamp (endpoint not found)
+    ✗ should persist to database (endpoint not found)
+```
+
+<INSTRUCTION>
+Implement minimal code to make all tests pass.
+
+Guidelines:
+- Create/modify file: src/api/metrics.js
+- Use existing patterns from src/api/ (e.g., src/api/users.js for reference)
+- Import MetricsSchema from subtask 42.1 (src/models/schema.js)
+- Implement validation, persistence, and response handling
+- Follow project error handling conventions
+- Keep implementation focused on this subtask only
+
+After implementation:
+1. Run tests: npm test src/api/__tests__/metrics.test.js
+2. Confirm all 5 tests pass
+3. Report results
+
+Output format:
+1. File(s) created/modified
+2. Implementation code
+3. Test command and results
+```
+
+### Prompt Loading Configuration
+
+See `.taskmaster/config.json` → `prompts` section for paths and load order.
+
+## Configuration Schema
+
+### .taskmaster/config.json
+
+```json
+{
+  "autopilot": {
+    "enabled": true,
+    "requireCleanWorkingTree": true,
+    "commitTemplate": "{type}({scope}): {msg}",
+    "defaultCommitType": "feat",
+    "maxGreenAttempts": 3,
+    "testTimeout": 300000
+  },
+  "test": {
+    "runner": "auto",
+    "coverageThresholds": {
+      "lines": 80,
+      "branches": 80,
+      "functions": 80,
+      "statements": 80
+    },
+    "targetedRunPattern": "**/*.test.js"
+  },
+  "git": {
+    "branchPattern": "{tag}/task-{id}-{slug}",
+    "pr": {
+      "enabled": true,
+      "base": "default",
+      "bodyTemplate": ".taskmaster/templates/pr-body.md"
+    }
+  },
+  "prompts": {
+    "rulesPath": ".cursor/rules",
+    "testGeneratorPath": ".claude/agents/surgical-test-generator.md",
+    "loadOrder": ["git_workflow.mdc", "test_workflow.mdc"]
+  }
+}
+```
+
+### Configuration Fields
+
+#### autopilot
+- `enabled` (boolean): Enable/disable autopilot functionality
+- `requireCleanWorkingTree` (boolean): Require clean git state before starting
+- `commitTemplate` (string): Template for commit messages (tokens: `{type}`, `{scope}`, `{msg}`)
+- `defaultCommitType` (string): Default commit type (feat, fix, chore, etc.)
+- `maxGreenAttempts` (number): Maximum retry attempts to achieve green tests (default: 3)
+- `testTimeout` (number): Timeout in milliseconds per test run (default: 300000 = 5min)
+
+#### test
+- `runner` (string): Test runner detection mode (`"auto"` or explicit command like `"npm test"`)
+- `coverageThresholds` (object): Minimum coverage percentages required
+  - `lines`, `branches`, `functions`, `statements` (number): Threshold percentages (0-100)
+- `targetedRunPattern` (string): Glob pattern for targeted subtask test runs
+
+#### git
+- `branchPattern` (string): Branch naming pattern (tokens: `{tag}`, `{id}`, `{slug}`)
+- `pr.enabled` (boolean): Enable automatic PR creation
+- `pr.base` (string): Target branch for PRs (`"default"` uses repo default, or specify like `"main"`)
+- `pr.bodyTemplate` (string): Path to PR body template file (optional)
+
+#### prompts
+- `rulesPath` (string): Directory containing rule files (e.g., `.cursor/rules`)
+- `testGeneratorPath` (string): Path to test generator prompt file
+- `loadOrder` (array): Order to load rule files from `rulesPath`
+
+### Environment Variables
+
+```bash
+# Required for executor
+ANTHROPIC_API_KEY=sk-ant-...          # Claude API key
+
+# Optional: for PR creation
+GITHUB_TOKEN=ghp_...                  # GitHub personal access token
+
+# Optional: for other executors (future)
+OPENAI_API_KEY=sk-...
+GOOGLE_API_KEY=...
+```
+
+## Run Artifacts & Observability
+
+### Per-Run Artifact Structure
+
+Each autopilot run creates a timestamped directory with complete traceability:
+
+```
+.taskmaster/reports/runs/2025-01-15-142033/
+├── manifest.json          # run metadata (task id, start/end time, status)
+├── log.jsonl              # timestamped event stream
+├── commits.txt            # list of commit SHAs made during run
+├── test-results/
+│   ├── subtask-42.1-red.json
+│   ├── subtask-42.1-green.json
+│   ├── subtask-42.2-red.json
+│   ├── subtask-42.2-green-attempt1.json
+│   ├── subtask-42.2-green-attempt2.json
+│   ├── subtask-42.2-green-attempt3.json
+│   └── final-suite.json
+└── pr.md                  # generated PR body
+```
+
+### manifest.json Format
+
+```json
+{
+  "runId": "2025-01-15-142033",
+  "taskId": "42",
+  "tag": "analytics",
+  "branch": "analytics/task-42-user-metrics",
+  "startTime": "2025-01-15T14:20:33Z",
+  "endTime": "2025-01-15T14:45:12Z",
+  "status": "completed",
+  "subtasksCompleted": ["42.1", "42.2", "42.3"],
+  "subtasksFailed": [],
+  "totalCommits": 3,
+  "prUrl": "https://github.com/org/repo/pull/123",
+  "finalCoverage": {
+    "lines": 85.3,
+    "branches": 82.1,
+    "functions": 88.9,
+    "statements": 85.0
+  }
+}
+```
+
+### log.jsonl Format
+
+Event stream in JSON Lines format for easy parsing and debugging:
+
+```jsonl
+{"ts":"2025-01-15T14:20:33Z","phase":"preflight","status":"ok","details":{"testCmd":"npm test","gitClean":true}}
+{"ts":"2025-01-15T14:20:45Z","phase":"branch","status":"ok","branch":"analytics/task-42-user-metrics"}
+{"ts":"2025-01-15T14:21:00Z","phase":"red","subtask":"42.1","status":"ok","tests":{"failed":3,"passed":0}}
+{"ts":"2025-01-15T14:22:15Z","phase":"green","subtask":"42.1","status":"ok","tests":{"passed":3,"failed":0},"attempts":2}
+{"ts":"2025-01-15T14:22:20Z","phase":"commit","subtask":"42.1","status":"ok","sha":"a1b2c3d","message":"feat(metrics): add metrics schema (task 42.1)"}
+{"ts":"2025-01-15T14:23:00Z","phase":"red","subtask":"42.2","status":"ok","tests":{"failed":5,"passed":0}}
+{"ts":"2025-01-15T14:25:30Z","phase":"green","subtask":"42.2","status":"error","tests":{"passed":3,"failed":2},"attempts":3,"error":"Max attempts reached"}
+{"ts":"2025-01-15T14:25:35Z","phase":"pause","reason":"max_attempts","nextAction":"manual_review"}
+```
+
+### Test Results Format
+
+Each test run stores detailed results:
+
+```json
+{
+  "subtask": "42.2",
+  "phase": "green",
+  "attempt": 3,
+  "timestamp": "2025-01-15T14:25:30Z",
+  "command": "npm test src/api/__tests__/metrics.test.js",
+  "exitCode": 1,
+  "duration": 2340,
+  "summary": {
+    "total": 5,
+    "passed": 3,
+    "failed": 2,
+    "skipped": 0
+  },
+  "failures": [
+    {
+      "test": "POST /api/metrics should return 201 with valid payload",
+      "error": "Expected status 201, got 500",
+      "stack": "..."
+    }
+  ],
+  "coverage": {
+    "lines": 78.5,
+    "branches": 75.0,
+    "functions": 80.0,
+    "statements": 78.5
+  }
+}
+```
+
+## Design Decisions
+
+### Why commit per subtask instead of per task?
+
+**Decision**: Commit after each subtask's green state, not after the entire task.
+
+**Rationale**:
+- Atomic commits make code review easier (reviewers can see logical progression)
+- Easier to revert a single subtask if it causes issues downstream
+- Matches the TDD loop's natural checkpoint and cognitive boundary
+- Provides resumability points if the run is interrupted
+
+**Trade-off**: More commits per task (can use squash-merge in PRs if desired)
+
+### Why not support parallel subtask execution?
+
+**Decision**: Sequential subtask execution in Phase 1; parallel execution deferred to Phase 3.
+
+**Rationale**:
+- Subtasks often have implicit dependencies (e.g., schema before endpoint, endpoint before UI)
+- Simpler orchestrator state machine (less complexity = faster to ship)
+- Parallel execution requires explicit dependency DAG and conflict resolution
+- Can be added in Phase 3 once core workflow is proven stable
+
+**Trade-off**: Slower for truly independent subtasks (mitigated by keeping subtasks small and focused)
+
+### Why require 80% coverage by default?
+
+**Decision**: Enforce 80% coverage threshold (lines/branches/functions/statements) before allowing commits.
+
+**Rationale**:
+- Industry standard baseline for production code quality
+- Forces test generation to be comprehensive, not superficial
+- Configurable per project via `.taskmaster/config.json` if too strict
+- Prevents "green tests" that only test happy paths
+
+**Trade-off**: May require more test generation iterations; can be lowered per project
+
+### Why use tmux instead of a rich GUI?
+
+**Decision**: MVP uses tmux split panes for TUI, not Electron/web-based GUI.
+
+**Rationale**:
+- Tmux is universally available on dev machines; no installation burden
+- Terminal-first workflows match developer mental model (no context switching)
+- Simpler to implement and maintain; can add GUI later via extensions
+- State stored in files allows IDE/extension integration without coupling
+
+**Trade-off**: Less visual polish than GUI; requires tmux familiarity
+
+### Why not support multiple executors (codex/gemini/claude) in Phase 1?
+
+**Decision**: Start with Claude executor only; add others in Phase 2+.
+
+**Rationale**:
+- Reduces scope and complexity for initial delivery
+- Claude Code already integrated with existing executor service
+- Executor abstraction already exists; adding more is straightforward later
+- Different executors may need different prompt strategies (requires experimentation)
+
+**Trade-off**: Users locked to Claude initially; can work around with manual executor selection

 ## Risks and Mitigations

--- a/.taskmaster/reports/task-complexity-report_autonomous-tdd-git-workflow.json
+++ b/.taskmaster/reports/task-complexity-report_autonomous-tdd-git-workflow.json
@@ -1,6 +1,6 @@
 {
 	"meta": {
-		"generatedAt": "2025-10-03T09:04:22.505Z",
+		"generatedAt": "2025-10-06T16:13:59.317Z",
 		"tasksAnalyzed": 20,
 		"totalTasks": 20,
 		"analysisCount": 20,
@@ -12,162 +12,162 @@
 		{
 			"taskId": 11,
 			"taskTitle": "Create WorkflowOrchestrator Core Service",
-			"complexityScore": 8,
-			"recommendedSubtasks": 6,
-			"expansionPrompt": "Break down the WorkflowOrchestrator implementation into: 1) Core state machine with phase transitions and event emission, 2) Workflow state persistence and checkpoint system, 3) Resume/pause functionality with state restoration, 4) Integration points for adapters (test runner, git, executors), 5) Progress event system with EventEmitter, 6) Error handling and recovery mechanisms. Each subtask should focus on a specific aspect of the orchestrator.",
-			"reasoning": "High complexity due to state machine implementation, event-driven architecture, checkpoint persistence, and multiple integration points. Requires EventEmitter setup (not currently in codebase), state persistence to JSON files, and complex phase transition logic."
+			"complexityScore": 9,
+			"recommendedSubtasks": 8,
+			"expansionPrompt": "Break down the WorkflowOrchestrator into discrete components: 1) State machine core with phase definitions, 2) EventEmitter integration, 3) Persistence layer with checkpoint saving, 4) State recovery and resume functionality, 5) Phase transition logic, 6) Error handling and rollback, 7) Configuration management, 8) Integration with existing services",
+			"reasoning": "This is the most complex task requiring a sophisticated state machine, event system, persistence, and integration with multiple adapters. It's the foundation component that orchestrates the entire workflow."
 		},
 		{
 			"taskId": 12,
 			"taskTitle": "Implement Test Runner Adapter Service",
 			"complexityScore": 7,
-			"recommendedSubtasks": 5,
-			"expansionPrompt": "Divide test runner adapter into: 1) Test runner detection from package.json scripts, 2) Command execution wrapper with output capture, 3) Test output parser for various formats (Jest, Vitest, etc.), 4) Coverage metrics extraction and reporting, 5) Threshold enforcement and validation logic. Focus on framework-agnostic design with extensible parsers.",
-			"reasoning": "Requires parsing different test output formats, detecting test runners from package.json, implementing coverage threshold logic, and creating a framework-agnostic interface. Vitest is used in tm-core, need to support multiple runners."
+			"recommendedSubtasks": 6,
+			"expansionPrompt": "Split into: 1) Package.json parser for test script detection, 2) npm/pnpm/yarn runner detection logic, 3) Test execution wrapper with structured output parsing, 4) Coverage report parsing and threshold enforcement, 5) TestResult interface implementation, 6) Integration with different test frameworks (Jest, Vitest, etc.)",
+			"reasoning": "Moderate-high complexity due to need to support multiple package managers, parse various test output formats, and handle coverage reporting. Framework detection adds complexity."
 		},
 		{
 			"taskId": 13,
 			"taskTitle": "Build Git Operations Adapter",
 			"complexityScore": 6,
-			"recommendedSubtasks": 4,
-			"expansionPrompt": "Split git adapter into: 1) Core git command wrapper using child_process, 2) Branch naming pattern system with template support, 3) Confirmation gates and default branch protection, 4) Push and commit operations with safety checks. Ensure proper error handling for git command failures.",
-			"reasoning": "Moderate complexity for git operations wrapper. No git library currently in use, will need child_process implementation. Includes branch naming patterns, confirmation prompts, and safety checks for default branch protection."
+			"recommendedSubtasks": 5,
+			"expansionPrompt": "Organize into: 1) Core git command wrapper using simple-git or child_process, 2) Branch management with naming pattern support, 3) Confirmation gate system for destructive operations, 4) Default branch protection logic, 5) Error handling and command validation",
+			"reasoning": "Medium complexity - wraps existing git operations but needs careful handling of destructive operations, branch naming patterns, and error scenarios. Existing git utilities can be leveraged."
 		},
 		{
 			"taskId": 14,
 			"taskTitle": "Create Autopilot CLI Command",
 			"complexityScore": 5,
-			"recommendedSubtasks": 3,
-			"expansionPrompt": "Implement autopilot command in three parts: 1) Command setup with Commander.js and flag parsing following existing patterns in apps/cli/src/commands/, 2) WorkflowOrchestrator initialization and event subscription, 3) Progress UI rendering using existing dashboard components and graceful shutdown handling. Follow patterns from list.command.ts and start.command.ts.",
-			"reasoning": "Straightforward CLI command implementation following existing patterns. Commander.js is already used, UI components exist in apps/cli/src/ui/. Main complexity is in orchestrator integration and event handling."
+			"recommendedSubtasks": 4,
+			"expansionPrompt": "Structure as: 1) Commander.js argument parsing and validation, 2) WorkflowOrchestrator initialization and configuration, 3) Event subscription and UI progress rendering, 4) Signal handling for graceful shutdown and resumability",
+			"reasoning": "Medium complexity - primarily integration work using existing CLI patterns and orchestrator. Signal handling and event subscription add some complexity but build on established patterns."
 		},
 		{
 			"taskId": 15,
 			"taskTitle": "Integrate Surgical Test Generator",
 			"complexityScore": 6,
-			"recommendedSubtasks": 4,
-			"expansionPrompt": "Break down test generator integration: 1) Agent prompt loader from .claude/agents/surgical-test-generator.md, 2) Context formatter for subtask details and existing code, 3) Executor service integration using existing ExecutorFactory, 4) Test code parser and file writer with project convention detection. Leverage existing executor infrastructure.",
-			"reasoning": "Requires loading agent prompts, formatting context, integrating with existing ExecutorFactory and executor-service.ts, and parsing/writing generated test code. Builds on existing executor infrastructure."
+			"recommendedSubtasks": 5,
+			"expansionPrompt": "Break into: 1) Agent prompt adapter loading from .claude/agents/, 2) Context formatting for subtask requirements, 3) Executor service integration, 4) Test file writing with project convention detection, 5) Test syntax validation before proceeding",
+			"reasoning": "Medium-high complexity due to prompt engineering, file system integration, and need to understand project conventions. Test validation adds complexity."
 		},
 		{
 			"taskId": 16,
 			"taskTitle": "Implement Code Generation Executor",
-			"complexityScore": 5,
-			"recommendedSubtasks": 3,
-			"expansionPrompt": "Implement code generation in phases: 1) Extend task-execution-service.ts with autopilot-specific prompt generation for making tests pass, 2) Integration with ExecutorFactory for multiple executor support (claude/codex/gemini), 3) Code change parser and conflict resolution handler. Build on existing executor patterns.",
-			"reasoning": "Extends existing task-execution-service.ts and uses ExecutorFactory. Main work is prompt generation for test-driven implementation and handling code application with conflict resolution."
+			"complexityScore": 7,
+			"recommendedSubtasks": 6,
+			"expansionPrompt": "Organize as: 1) Minimal implementation prompt generation, 2) Test failure analysis and context extraction, 3) ExecutorFactory integration for multiple AI providers, 4) Code change parsing and application, 5) Conflict resolution strategies, 6) Change minimization validation",
+			"reasoning": "High complexity due to sophisticated prompt engineering, multi-provider support, code parsing, and conflict resolution. Requires understanding of various executor outputs."
 		},
 		{
 			"taskId": 17,
 			"taskTitle": "Add Branch and Tag Management Integration",
 			"complexityScore": 4,
 			"recommendedSubtasks": 3,
-			"expansionPrompt": "Integrate tag management: 1) Branch-to-tag mapping registration in tag-management.js, 2) Active tag switching when creating branches, 3) Tag-filtered task loading and branch naming with tag prefixes. Use existing tag management infrastructure.",
-			"reasoning": "Relatively simple integration with existing tag-management.js. Mainly involves calling existing functions for tag registration, switching, and filtering. Infrastructure already exists."
+			"expansionPrompt": "Split into: 1) Integration with existing tag-management.js for branch-tag mapping, 2) Active tag switching and task filtering logic, 3) Branch naming with tag and task ID inclusion",
+			"reasoning": "Lower complexity as it builds on existing tag management infrastructure. Primarily integration work with established patterns."
 		},
 		{
 			"taskId": 18,
 			"taskTitle": "Build Run State Persistence System",
 			"complexityScore": 6,
-			"recommendedSubtasks": 4,
-			"expansionPrompt": "Implement state persistence: 1) Checkpoint serialization to JSON after each phase, 2) JSONL logger for operation history, 3) State restoration logic for workflow resume, 4) Graceful handling of corrupted or partial state files. Use FileStorage from tm-core for consistency.",
-			"reasoning": "Requires implementing checkpoint system, JSONL logging, state restoration, and error recovery. Builds on existing FileStorage patterns in packages/tm-core/src/storage/."
+			"recommendedSubtasks": 5,
+			"expansionPrompt": "Structure as: 1) Checkpoint schema design and validation, 2) JSONL logging implementation, 3) State serialization and deserialization, 4) Partial state recovery with graceful degradation, 5) File system operations with error handling",
+			"reasoning": "Medium-high complexity due to data persistence, state management, and recovery scenarios. File I/O and state consistency add complexity."
 		},
 		{
 			"taskId": 19,
 			"taskTitle": "Implement Preflight Validation Service",
 			"complexityScore": 5,
-			"recommendedSubtasks": 3,
-			"expansionPrompt": "Create preflight validation: 1) Environment checks for git state, test runner, and CLI tools availability, 2) Task validation with auto-expansion trigger when no subtasks exist, 3) Structured validation report with errors/warnings and --force override support. Integrate with existing services.",
-			"reasoning": "Moderate complexity for various validation checks. Integrates with existing services for task expansion and test runner detection. Main work is aggregating checks and reporting."
+			"recommendedSubtasks": 4,
+			"expansionPrompt": "Organize into: 1) Git state validation (clean working tree, branch checks), 2) Tool availability detection (test runner, git, gh CLI), 3) API key and executor validation, 4) Task validation with auto-expansion trigger",
+			"reasoning": "Medium complexity - involves multiple validation checks across different systems. Auto-expansion logic and --force override handling add some complexity."
 		},
 		{
 			"taskId": 20,
 			"taskTitle": "Create PR Generation Service",
 			"complexityScore": 4,
 			"recommendedSubtasks": 3,
-			"expansionPrompt": "Implement PR creation: 1) Extend git-adapter.ts with gh CLI wrapper for PR operations, 2) PR body formatter using run reports and task completion data, 3) Fallback instructions when gh is unavailable and PR URL persistence. Build on git adapter foundation.",
-			"reasoning": "Straightforward gh CLI integration extending git-adapter. Main work is formatting PR body from run reports. Relatively simple with clear requirements."
+			"expansionPrompt": "Break into: 1) gh CLI integration for PR operations, 2) PR title and body formatting with run report integration, 3) Fallback handling when gh unavailable",
+			"reasoning": "Lower-medium complexity as it primarily formats data and calls external gh CLI. Builds on existing git adapter patterns."
 		},
 		{
 			"taskId": 21,
 			"taskTitle": "Add Subtask Selection Logic",
-			"complexityScore": 5,
-			"recommendedSubtasks": 3,
-			"expansionPrompt": "Implement subtask selection: 1) Integration with existing find-next-task.js for dependency-aware selection, 2) Status filtering and update logic for in-progress/done transitions, 3) Blocked subtask handling and skip logic. Leverage existing task service methods.",
-			"reasoning": "Builds on existing find-next-task.js logic. Main complexity is in dependency resolution and status management. Most infrastructure exists in task-service.ts."
+			"complexityScore": 6,
+			"recommendedSubtasks": 5,
+			"expansionPrompt": "Structure as: 1) Dependency resolution algorithm, 2) Status filtering and task ownership logic, 3) Subtask ordering with dependency satisfaction, 4) Status transition management during workflow, 5) Blocked task handling with graceful degradation",
+			"reasoning": "Medium-high complexity due to dependency graph traversal and status management. The logic builds on existing TaskService.getNextTask() but requires workflow-specific enhancements."
 		},
 		{
 			"taskId": 22,
 			"taskTitle": "Implement Test-Driven Commit Gating",
-			"complexityScore": 5,
-			"recommendedSubtasks": 3,
-			"expansionPrompt": "Create commit gating: 1) Test result and coverage validation against thresholds, 2) Retry logic with exponential backoff for flaky tests, 3) Commit creation only when tests pass with --force-commit override. Integrate with test runner adapter.",
-			"reasoning": "Moderate complexity for test validation, retry logic, and threshold enforcement. Builds on test runner adapter output. Main work is retry mechanism and gating logic."
+			"complexityScore": 7,
+			"recommendedSubtasks": 6,
+			"expansionPrompt": "Organize into: 1) Test execution and result evaluation, 2) Coverage threshold enforcement logic, 3) Commit gating decision logic, 4) Retry mechanism with exponential backoff for flaky tests, 5) Force override functionality, 6) Comprehensive logging of all attempts",
+			"reasoning": "High complexity due to sophisticated gating logic, coverage analysis, retry mechanisms, and integration with test runner. Critical for TDD workflow integrity."
 		},
 		{
 			"taskId": 23,
 			"taskTitle": "Build Progress Event System",
-			"complexityScore": 4,
-			"recommendedSubtasks": 3,
-			"expansionPrompt": "Implement event system: 1) EventEmitter setup with typed events for workflow phases, 2) Event aggregator for statistics collection, 3) Event filtering and buffering mechanisms. Create clean event API for UI consumption.",
-			"reasoning": "EventEmitter not currently in codebase but straightforward to add. Main work is defining event types, implementing aggregation, and creating clean API for consumers."
+			"complexityScore": 5,
+			"recommendedSubtasks": 4,
+			"expansionPrompt": "Structure as: 1) EventEmitter-based event system design, 2) Event payload standardization with timestamps and metadata, 3) Event aggregation and summary statistics, 4) Event filtering and buffering mechanisms",
+			"reasoning": "Medium complexity - event system design is well-understood but requires careful payload design and aggregation logic. Builds on Node.js EventEmitter patterns."
 		},
 		{
 			"taskId": 24,
 			"taskTitle": "Create Autopilot Configuration Schema",
-			"complexityScore": 3,
-			"recommendedSubtasks": 2,
-			"expansionPrompt": "Add autopilot config: 1) Extend existing config.json schema with autopilot section using Zod validation, 2) Config migration logic and environment variable overrides. Follow existing config patterns in config-manager.ts.",
-			"reasoning": "Simple schema extension to existing config.json. Config infrastructure exists in packages/tm-core/src/config/. Main work is schema definition and migration."
+			"complexityScore": 4,
+			"recommendedSubtasks": 3,
+			"expansionPrompt": "Break into: 1) Zod schema definition for autopilot configuration, 2) Configuration migration logic for existing projects, 3) Environment variable override support with validation",
+			"reasoning": "Lower-medium complexity - schema design is straightforward using Zod, and builds on existing config patterns. Migration logic adds some complexity."
 		},
 		{
 			"taskId": 25,
 			"taskTitle": "Implement Dry Run Mode",
 			"complexityScore": 3,
 			"recommendedSubtasks": 2,
-			"expansionPrompt": "Add dry-run support: 1) Flag propagation through all adapter methods with simulation output, 2) Clear formatting to distinguish simulated vs actual operations. Ensure validation phases still execute normally.",
-			"reasoning": "Simple flag propagation and output formatting. Most complexity handled by individual adapters. Main work is consistent implementation across all operations."
+			"expansionPrompt": "Split into: 1) Dry run flag propagation throughout workflow components, 2) Output formatting for simulated operations with clear indication",
+			"reasoning": "Lower complexity - primarily involves adding conditional logic and formatting. Most of the underlying functionality already exists."
 		},
 		{
 			"taskId": 26,
 			"taskTitle": "Add tmux Integration Support",
 			"complexityScore": 4,
 			"recommendedSubtasks": 3,
-			"expansionPrompt": "Implement tmux support: 1) Tmux availability detection and pane management commands, 2) Split-window layout with command execution in executor pane, 3) Graceful fallback when tmux unavailable. Handle cleanup and debugging scenarios.",
-			"reasoning": "Moderate complexity for tmux integration. Requires command wrapping, pane management, and fallback handling. Optional enhancement with clear boundaries."
+			"expansionPrompt": "Organize as: 1) tmux availability detection and command wrapper, 2) Pane management (split, send-keys, capture), 3) Fallback handling for non-tmux environments",
+			"reasoning": "Lower-medium complexity - wraps tmux commands with graceful fallback. Terminal management adds some complexity but follows established patterns."
 		},
 		{
 			"taskId": 27,
 			"taskTitle": "Build Run Report Generator",
 			"complexityScore": 5,
-			"recommendedSubtasks": 5,
-			"expansionPrompt": "Already has 5 subtasks defined. Focus on implementing each component: report generator service core, markdown formatter, JSONL logger, metrics collector, and archival system. Each subtask is well-scoped.",
-			"reasoning": "Already expanded with 5 subtasks. Moderate complexity for report generation, formatting, and archival. Clear separation of concerns across subtasks."
+			"recommendedSubtasks": 4,
+			"expansionPrompt": "Structure as: 1) Markdown report generation with formatted tables and summaries, 2) JSON report structure with comprehensive metrics, 3) Chart/table formatting for readability, 4) Report archival and cleanup logic",
+			"reasoning": "Medium complexity due to multiple output formats, data aggregation, and file management. Report formatting and archival add complexity."
 		},
 		{
 			"taskId": 28,
 			"taskTitle": "Add MCP Tools Integration",
 			"complexityScore": 3,
 			"recommendedSubtasks": 2,
-			"expansionPrompt": "Integrate MCP tools: 1) MCP tool availability detection and wrapper functions, 2) Fallback to direct service calls when MCP unavailable. Use existing MCP infrastructure in mcp-server/src/.",
-			"reasoning": "Simple integration with existing MCP infrastructure. Main work is detection and fallback logic. MCP server already implemented with all needed tools."
+			"expansionPrompt": "Split into: 1) MCP tool availability detection and integration, 2) Fallback to direct service calls when MCP unavailable",
+			"reasoning": "Lower complexity - primarily integration work with existing MCP infrastructure. Fallback logic is straightforward."
 		},
 		{
 			"taskId": 29,
 			"taskTitle": "Implement Retry and Backoff Logic",
-			"complexityScore": 4,
-			"recommendedSubtasks": 3,
-			"expansionPrompt": "Add retry mechanisms: 1) Exponential backoff calculator with configurable limits, 2) Retry wrapper for test execution, executor calls, and git operations, 3) Circuit breaker pattern for repeated failures. Track attempts in run state.",
-			"reasoning": "Moderate complexity for retry patterns, backoff calculation, and circuit breaker. Generic retry wrapper can be reused across different operations."
+			"complexityScore": 6,
+			"recommendedSubtasks": 5,
+			"expansionPrompt": "Organize into: 1) Exponential backoff algorithm implementation, 2) Flaky test pattern detection, 3) Circuit breaker for repeated failures, 4) Retry attempt tracking in state, 5) Configurable retry limits and failure categorization",
+			"reasoning": "Medium-high complexity due to sophisticated retry algorithms, pattern detection, and state management. Circuit breaker pattern adds complexity."
 		},
 		{
 			"taskId": 30,
 			"taskTitle": "Create End-to-End Integration Tests",
-			"complexityScore": 7,
-			"recommendedSubtasks": 5,
-			"expansionPrompt": "Build comprehensive test suite: 1) Test fixtures with mock git repo and task data setup, 2) Happy path scenario with all tests passing, 3) Retry and failure scenarios with flaky tests, 4) Resume from interruption testing, 5) Flag combination testing and artifact verification. Use Vitest for consistency.",
-			"reasoning": "High complexity for comprehensive integration testing. Requires extensive mocking, multiple scenarios, and artifact verification. Critical for validating entire workflow."
+			"complexityScore": 8,
+			"recommendedSubtasks": 7,
+			"expansionPrompt": "Structure as: 1) Test fixture creation with mock tasks and repositories, 2) Happy path workflow testing, 3) Retry and failure scenario testing, 4) Resume functionality validation, 5) Multi-executor testing scenarios, 6) Performance benchmarking, 7) Artifact verification (commits, branches, reports)",
+			"reasoning": "High complexity due to comprehensive test coverage requirements, multiple scenarios, mock data setup, and integration with many components. Requires testing the entire workflow end-to-end."
 		}
 	]
 }
--- a/.taskmaster/tasks/tasks.json
+++ b/.taskmaster/tasks/tasks.json
@@ -7905,331 +7905,323 @@
  "autonomous-tdd-git-workflow": {
    "tasks": [
      {
-        "id": 11,
-        "title": "Create WorkflowOrchestrator Core Service",
-        "description": "Implement the core orchestration service that drives the autonomous TDD workflow with state machine phases",
-        "details": "Create packages/tm-core/src/services/workflow-orchestrator.ts implementing a state machine with phases: Preflight → Branch/Tag → SubtaskIter (Red/Green/Commit) → Finalize → PR. Use EventEmitter for progress events. Include methods: startWorkflow(taskId, options), resumeWorkflow(runId), pauseWorkflow(), getWorkflowState(). Store state in memory with persistence to .taskmaster/reports/runs/<run-id>/state.json. Implement checkpoint saving after each phase transition.",
-        "testStrategy": "Unit tests for state transitions, event emission, checkpoint persistence. Integration tests for full workflow lifecycle with mock adapters. Test resume capability from various checkpoints.",
+        "id": 31,
+        "title": "Create WorkflowOrchestrator service foundation",
+        "description": "Implement the core WorkflowOrchestrator class in tm-core to manage the autonomous TDD workflow state machine",
+        "details": "Create packages/tm-core/src/services/workflow-orchestrator.ts with phases enum (Preflight, Branch, SubtaskLoop, Finalization), event emitter for progress tracking, and basic state management. Include interfaces for WorkflowConfig, WorkflowState, and WorkflowEvent. Implement constructor, start/pause/resume methods, and phase transition logic. Use existing TaskService and ConfigManager dependencies.",
+        "testStrategy": "Unit tests for state transitions, event emission, phase management, error handling, and integration with existing services. Mock TaskService and ConfigManager dependencies.",
        "priority": "high",
        "dependencies": [],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 12,
-        "title": "Implement Test Runner Adapter Service",
-        "description": "Create framework-agnostic test runner adapter that detects and executes project test commands",
-        "details": "Create packages/tm-core/src/services/test-runner-adapter.ts with methods: detectRunner() (checks package.json for test scripts), runTargeted(files/pattern), runAll(), getCoverageReport(), enforceCoverageThresholds(thresholds). Support npm/pnpm/yarn test detection. Parse test output for pass/fail counts and coverage metrics. Return structured TestResult interface with failures, duration, coverage data. Default 80% coverage thresholds.",
-        "testStrategy": "Mock different package.json configurations for runner detection. Test parsing of various test output formats. Verify coverage threshold enforcement logic. Integration test with actual npm test execution.",
-        "priority": "high",
-        "dependencies": [],
-        "status": "pending",
-        "subtasks": []
-      },
-      {
-        "id": 13,
-        "title": "Build Git Operations Adapter",
-        "description": "Encapsulate all git operations with confirmation gates and branch naming patterns",
-        "details": "Create packages/tm-core/src/services/git-adapter.ts wrapping git commands: createBranch(pattern, tag, taskId), checkout(branch), add(files), commit(message, scope), push(options), getCurrentBranch(), getDefaultBranch(). Implement branch naming with configurable pattern support ({tag}/task-{id}[-slug]). Add confirmation prompts for destructive operations unless --no-confirm. Never allow commits to default branch. Use simple-git library or child_process for git commands.",
-        "testStrategy": "Mock git commands and verify correct invocations. Test branch naming pattern generation. Verify default branch protection. Test confirmation gate behavior with different flags.",
-        "priority": "high",
-        "dependencies": [],
-        "status": "pending",
-        "subtasks": []
-      },
-      {
-        "id": 14,
-        "title": "Create Autopilot CLI Command",
-        "description": "Implement the main autopilot command with argument parsing and orchestrator invocation",
-        "details": "Create apps/cli/src/commands/autopilot.command.ts using Commander.js. Accept taskId argument and flags: --dry-run, --no-push, --no-pr, --no-confirm, --force, --max-attempts <n>, --resume. Initialize WorkflowOrchestrator with options. Subscribe to orchestrator events and render progress using existing UI components from apps/cli/src/ui/components/. Handle interrupt signals gracefully for resumability.",
-        "testStrategy": "Test command parsing with various flag combinations. Mock orchestrator and verify correct initialization. Test event subscription and UI rendering. Verify graceful shutdown on SIGINT.",
+        "id": 32,
+        "title": "Implement GitAdapter for repository operations",
+        "description": "Create git operations adapter that wraps existing git-utils.js functionality for WorkflowOrchestrator",
+        "details": "Create packages/tm-core/src/services/git-adapter.ts that provides TypeScript interface over scripts/modules/utils/git-utils.js. Include methods: isGitRepository, getCurrentBranch, createBranch, checkoutBranch, isWorkingTreeClean, commitChanges, pushBranch, getDefaultBranch. Implement branch naming pattern support using config.git.branchPattern with {tag}, {id}, {slug} tokens. Add confirmation gates for destructive operations.",
+        "testStrategy": "Unit tests with mocked git commands, integration tests with temporary git repositories. Test branch naming patterns, confirmation flows, and error handling for git failures.",
        "priority": "high",
        "dependencies": [
-          11
+          31
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 15,
-        "title": "Integrate Surgical Test Generator",
-        "description": "Connect the existing surgical test generator agent to the autopilot workflow for red phase",
-        "details": "Create test generation prompt adapter in packages/tm-core/src/services/test-generator.ts. Load .claude/agents/surgical-test-generator.md as system prompt. Format subtask context into user prompt with file paths, existing code, and requirements. Use existing executor service to invoke claude with the prompt. Parse generated test code and write to appropriate test files following project conventions. Validate tests compile/parse before proceeding.",
-        "testStrategy": "Mock executor responses with sample test generation. Verify prompt formatting includes all context. Test file writing to correct locations. Validate test syntax checking logic.",
+        "id": 33,
+        "title": "Create TestRunnerAdapter for framework detection and execution",
+        "description": "Implement test runner adapter that detects project test framework and executes tests with coverage",
+        "details": "Create packages/tm-core/src/services/test-runner-adapter.ts that detects test commands from package.json scripts (npm test, pnpm test, etc.), executes targeted and full test runs, parses test results and coverage reports. Support Jest, Vitest, and generic npm scripts. Implement TestResult interface with pass/fail counts, coverage percentages, and detailed failure information. Include timeout handling and retry logic.",
+        "testStrategy": "Unit tests for framework detection logic, test execution with mocked child_process, coverage parsing. Integration tests with actual test frameworks in fixture projects.",
        "priority": "high",
        "dependencies": [
-          11,
-          12
+          31
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 16,
-        "title": "Implement Code Generation Executor",
-        "description": "Create green phase code implementation using focused prompts to make tests pass",
-        "details": "Extend packages/tm-core/src/services/task-execution-service.ts with autopilot-specific prompt generation. Create minimal implementation prompt: 'Make these failing tests pass with the smallest code changes following project patterns. Only modify necessary files.' Include test failures, subtask context, and existing code. Use ExecutorFactory to invoke selected executor (claude/codex/gemini). Parse and apply code changes, handling conflicts gracefully.",
-        "testStrategy": "Test prompt generation with various failure scenarios. Mock executor responses and verify code application. Test conflict resolution strategies. Verify minimal change enforcement.",
-        "priority": "high",
-        "dependencies": [
-          11,
-          15
-        ],
-        "status": "pending",
-        "subtasks": []
-      },
-      {
-        "id": 17,
-        "title": "Add Branch and Tag Management Integration",
-        "description": "Connect autopilot to existing tag management for branch-tag mapping",
-        "details": "Integrate with scripts/modules/task-manager/tag-management.js for branch→tag mapping. When creating branch, register mapping in tag system. Explicitly switch active tag to match branch tag. Load task data filtered by active tag. Ensure branch name includes both tag and task ID per spec. Handle tag switching when resuming workflows. Persist tag-branch associations.",
-        "testStrategy": "Test branch-tag registration and retrieval. Verify active tag switching. Test filtered task loading by tag. Validate branch naming includes tag and task ID.",
+        "id": 34,
+        "title": "Implement autopilot CLI command structure",
+        "description": "Create the main autopilot command in CLI with dry-run, configuration, and basic orchestration",
+        "details": "Create apps/cli/src/commands/autopilot.command.ts using Commander.js. Implement flags: --dry-run, --no-push, --no-pr, --no-confirm, --force, --max-attempts, --resume. Add preflight checks (clean working tree, test command detection, tool availability). Integrate with WorkflowOrchestrator and existing UI components from apps/cli/src/ui/components/. Show detailed execution plan in dry-run mode.",
+        "testStrategy": "Unit tests for command parsing, flag handling, dry-run output formatting. Integration tests with mock WorkflowOrchestrator to verify command flow without actual git operations.",
        "priority": "medium",
        "dependencies": [
-          13
+          31,
+          32,
+          33
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 18,
-        "title": "Build Run State Persistence System",
-        "description": "Implement checkpoint saving and workflow resumability with detailed logging",
-        "details": "Create run state management in WorkflowOrchestrator. Save checkpoints to .taskmaster/reports/runs/<timestamp>/state.json after each phase. Include: current phase, subtask progress, test results, git state, timestamps. Implement JSONL logging for all operations to .taskmaster/reports/runs/<timestamp>/log.jsonl. Add resume() method to restore from checkpoint. Handle partial state recovery gracefully.",
-        "testStrategy": "Test checkpoint creation at each phase. Verify JSONL log format and completeness. Test resume from various interruption points. Validate state recovery with corrupted files.",
+        "id": 35,
+        "title": "Integrate surgical test generator with WorkflowOrchestrator",
+        "description": "Connect existing test generation capabilities with the TDD red phase of the workflow",
+        "details": "Enhance packages/tm-core/src/services/task-execution-service.ts to support test generation mode. Create TestGenerationService that uses existing executor framework with surgical-test-generator prompts. Implement prompt composition system that loads rules from .cursor/rules/ and .claude/agents/, combines with task context, and generates focused failing tests. Support framework-specific test patterns (Jest, Vitest).",
+        "testStrategy": "Unit tests for prompt composition, test generation calls with mocked executors. Integration tests generating actual test files and verifying they fail appropriately.",
        "priority": "medium",
        "dependencies": [
-          11
+          31,
+          33
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 19,
-        "title": "Implement Preflight Validation Service",
-        "description": "Create comprehensive pre-execution validation checking git state, tools, and configuration",
-        "details": "Add preflight checks in WorkflowOrchestrator: verify clean working tree (configurable), detect test runner availability, validate git/gh CLI installation, check for required API keys/executors, verify task has subtasks (auto-expand if not), ensure not on default branch. Return structured validation report with errors/warnings. Allow --force to bypass non-critical checks.",
-        "testStrategy": "Mock various environment states for validation. Test clean/dirty working tree detection. Verify tool availability checks. Test auto-expansion trigger when no subtasks.",
-        "priority": "medium",
-        "dependencies": [
-          11,
-          12,
-          13
-        ],
-        "status": "pending",
-        "subtasks": []
-      },
-      {
-        "id": 20,
-        "title": "Create PR Generation Service",
-        "description": "Implement GitHub PR creation with formatted body from run reports",
-        "details": "Extend git-adapter.ts with PR operations using gh CLI. Generate PR title: 'Task #<id> [<tag>]: <title>'. Format PR body with: summary of changes, subtask completion list, test coverage report, run statistics. Include link to full run report. Handle gh unavailability with fallback instructions. Support --no-pr flag to skip. Store PR URL in run state.",
-        "testStrategy": "Mock gh CLI responses for PR creation. Test PR title and body formatting. Verify fallback behavior without gh. Test PR URL persistence in run state.",
-        "priority": "medium",
-        "dependencies": [
-          13,
-          18
-        ],
-        "status": "pending",
-        "subtasks": []
-      },
-      {
-        "id": 21,
-        "title": "Add Subtask Selection Logic",
-        "description": "Implement intelligent subtask selection respecting dependencies and status",
-        "details": "Enhance WorkflowOrchestrator with subtask selection using TaskService.getNextTask(). Filter subtasks by: pending/in-progress status, satisfied dependencies, task ownership. Process in dependency order. Skip already-done subtasks. Handle blocked subtasks gracefully. Update subtask status to in-progress when starting, done when tests pass and committed.",
-        "testStrategy": "Test selection with various dependency graphs. Verify status filtering logic. Test dependency satisfaction checking. Validate status transitions during workflow.",
+        "id": 36,
+        "title": "Implement subtask TDD loop execution",
+        "description": "Create the core Red-Green-Commit cycle execution logic for individual subtasks",
+        "details": "Extend WorkflowOrchestrator with SubtaskExecutor class that implements the TDD loop: RED phase (generate failing tests), GREEN phase (implement code to pass tests), COMMIT phase (git add, commit with conventional commit message). Include retry logic for GREEN phase with configurable max attempts. Integrate with existing TaskService for subtask status updates. Support timeout and backoff policies.",
+        "testStrategy": "Unit tests for each phase execution, retry logic, timeout handling. Integration tests with actual test files and git operations in isolated test repositories.",
        "priority": "high",
        "dependencies": [
-          11
+          31,
+          32,
+          33,
+          35
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 22,
-        "title": "Implement Test-Driven Commit Gating",
-        "description": "Enforce commit-only-on-green policy with configurable coverage thresholds",
-        "details": "Add commit gating logic in WorkflowOrchestrator. After code generation, run tests and check: all tests pass, coverage meets thresholds (default 80% for lines/branches/functions/statements). Only commit if both conditions met. Support --force-commit override. Implement retry logic with backoff for flaky tests. Log all attempts and results.",
-        "testStrategy": "Test gating with various test results and coverage levels. Verify threshold enforcement. Test override flag behavior. Validate retry logic with intermittent failures.",
-        "priority": "high",
-        "dependencies": [
-          11,
-          12,
-          16
-        ],
-        "status": "pending",
-        "subtasks": []
-      },
-      {
-        "id": 23,
-        "title": "Build Progress Event System",
-        "description": "Create event-driven progress reporting for CLI rendering and future integrations",
-        "details": "Implement EventEmitter-based progress system in WorkflowOrchestrator. Emit events: workflow:start, phase:change, subtask:start/complete, test:run/pass/fail, commit:created, pr:created, workflow:complete/error. Include detailed payloads with timestamps, durations, results. Create event aggregator for summary statistics. Support event filtering and buffering.",
-        "testStrategy": "Test event emission at each workflow step. Verify event payload completeness. Test event aggregation logic. Validate buffering and filtering mechanisms.",
+        "id": 37,
+        "title": "Add configuration schema for autopilot settings",
+        "description": "Extend .taskmaster/config.json schema to support autopilot configuration options",
+        "details": "Update packages/tm-core/src/interfaces/configuration.interface.ts to include autopilot section with: enabled, requireCleanWorkingTree, commitTemplate, defaultCommitType, maxGreenAttempts, testTimeout. Add test section with runner, coverageThresholds, targetedRunPattern. Include git section with branchPattern, pr settings. Update ConfigManager to validate and provide defaults for new settings.",
+        "testStrategy": "Unit tests for config validation, default value application, schema validation. Integration tests loading config from actual .taskmaster/config.json files.",
        "priority": "medium",
        "dependencies": [
-          11
+          31
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 24,
-        "title": "Create Autopilot Configuration Schema",
-        "description": "Extend taskmaster config with autopilot-specific settings and validation",
-        "details": "Add autopilot section to .taskmaster/config.json schema: autopilot: { enabled, requireCleanWorkingTree, commitTemplate, defaultCommitType }, test: { runner, coverageThresholds }, git: { branchPattern, pr: { enabled, base } }. Create validation with Zod schema. Add config migration for existing projects. Provide sensible defaults. Support environment variable overrides.",
-        "testStrategy": "Test schema validation with various configurations. Verify migration from old configs. Test default value application. Validate environment override behavior.",
+        "id": 38,
+        "title": "Implement run state persistence and logging",
+        "description": "Create run artifact storage system for traceability and resume functionality",
+        "details": "Create packages/tm-core/src/services/run-state-manager.ts that persists run state to .taskmaster/reports/runs/<timestamp>/. Include manifest.json (run metadata), log.jsonl (event stream), test-results/ (per-phase test outputs), commits.txt (commit SHAs). Implement JSONL event logging format and structured test result storage. Support state checkpointing for resume functionality.",
+        "testStrategy": "Unit tests for file operations, JSON serialization, log formatting. Integration tests creating actual run directories and verifying persistence across WorkflowOrchestrator restarts.",
        "priority": "medium",
-        "dependencies": [],
+        "dependencies": [
+          31,
+          33,
+          36
+        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 25,
-        "title": "Implement Dry Run Mode",
-        "description": "Add simulation mode showing planned operations without execution",
-        "details": "Add --dry-run support throughout workflow. In dry-run: show planned git operations, display test commands without running, preview commit messages, show PR body without creating. Format output clearly indicating simulated vs actual. Still perform validation and planning phases. Useful for debugging and verification.",
-        "testStrategy": "Test dry-run flag propagation to all adapters. Verify no side effects occur. Test output formatting for clarity. Validate planning phases still execute.",
+        "id": 39,
+        "title": "Add GitHub PR creation with run reports",
+        "description": "Implement automatic PR creation using gh CLI with detailed run reports",
+        "details": "Create packages/tm-core/src/services/pr-adapter.ts that uses gh CLI to create pull requests. Generate PR body from run manifest and test results, include task/subtask completion summary, coverage metrics, commit links. Implement PR title format: 'Task #{id} [{tag}]: {title}'. Add fallback instructions when gh CLI unavailable. Support custom PR body templates.",
+        "testStrategy": "Unit tests for PR body generation, gh CLI command construction, error handling. Integration tests with mocked gh CLI to verify PR creation flow and body formatting.",
+        "priority": "medium",
+        "dependencies": [
+          31,
+          32,
+          38
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 40,
+        "title": "Implement task dependency resolution for subtask ordering",
+        "description": "Add intelligent subtask ordering based on dependencies and readiness",
+        "details": "Extend packages/tm-core/src/services/task-service.ts with getNextEligibleSubtask method that considers subtask dependencies, status, and priority. Implement topological sorting for subtask execution order. Handle blocked subtasks and dependency validation. Integration with existing dependency management and task status systems.",
+        "testStrategy": "Unit tests for dependency resolution algorithms, edge cases with circular dependencies, priority handling. Integration tests with complex task hierarchies and dependency chains.",
+        "priority": "medium",
+        "dependencies": [
+          31,
+          36
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 41,
+        "title": "Create resume functionality for interrupted runs",
+        "description": "Implement checkpoint/resume system for autopilot workflow interruptions",
+        "details": "Enhance RunStateManager with checkpoint creation and restoration. Add --resume flag to autopilot command that reconstructs WorkflowOrchestrator state from persisted run data. Implement state validation to ensure safe resume (git state, file changes, test status). Support partial phase resume (e.g., retry GREEN phase after manual fixes).",
+        "testStrategy": "Unit tests for state serialization/deserialization, validation logic. Integration tests interrupting and resuming workflows at different phases, verifying state consistency.",
+        "priority": "medium",
+        "dependencies": [
+          34,
+          36,
+          38
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 42,
+        "title": "Add coverage threshold enforcement",
+        "description": "Implement code coverage validation before allowing commits and finalization",
+        "details": "Enhance TestRunnerAdapter to parse coverage reports from Jest/Vitest and enforce configurable thresholds (lines, branches, functions, statements). Default to 80% across all metrics. Add coverage gates in GREEN phase before commit and final test suite before PR creation. Provide detailed coverage failure reporting with suggestions for improvement.",
+        "testStrategy": "Unit tests for coverage report parsing from different formats (lcov, json), threshold validation logic. Integration tests with actual test runs generating coverage data.",
+        "priority": "medium",
+        "dependencies": [
+          33,
+          36
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 43,
+        "title": "Implement tmux-based TUI navigator",
+        "description": "Create terminal user interface for interactive task selection and workflow monitoring",
+        "details": "Create apps/cli/src/ui/tui/navigator.ts using blessed or ink for terminal UI. Left pane shows project info, active tag, task list with status indicators. Right pane coordinates with tmux to spawn executor terminal. Implement keybindings for navigation (↑/↓), task selection (Enter), workflow control (s/p/q). Real-time status updates via file watching or event streams.",
+        "testStrategy": "Unit tests for UI component rendering, key handling, state updates. Integration tests with tmux session management and terminal interaction simulation.",
        "priority": "low",
        "dependencies": [
-          14,
-          19
+          31,
+          34
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 26,
-        "title": "Add tmux Integration Support",
-        "description": "Create tmux pane management for split-view executor terminal",
-        "details": "Create apps/cli/src/ui/tui/tmux-manager.ts for pane control. Detect tmux availability. Support: split-window for executor pane, send-keys for command execution, capture-pane for output, kill-pane for cleanup. Left pane shows autopilot progress, right pane runs executor. Handle non-tmux fallback gracefully. Preserve pane on interrupt for debugging.",
-        "testStrategy": "Mock tmux commands and verify invocations. Test pane creation and command sending. Verify fallback behavior without tmux. Test cleanup on exit.",
-        "priority": "low",
-        "dependencies": [
-          14
-        ],
-        "status": "pending",
-        "subtasks": []
-      },
-      {
-        "id": 27,
-        "title": "Build Run Report Generator",
-        "description": "Create comprehensive markdown and JSON reports for completed workflows",
-        "details": "Generate reports in .taskmaster/reports/runs/<run-id>/: summary.md with task details, subtask results, test coverage, commit list, duration stats. Full log.jsonl with all operations. coverage.json with detailed metrics. state.json for resumability. Include charts/tables for readability. Generate PR-ready summary section. Archive old runs automatically.",
-        "testStrategy": "Test report generation with various workflow outcomes. Verify markdown formatting and readability. Test JSON structure validity. Validate archival logic for old runs.",
+        "id": 44,
+        "title": "Add prompt composition system for context-aware test generation",
+        "description": "Create sophisticated prompt assembly system combining rules, task context, and phase instructions",
+        "details": "Create packages/tm-core/src/services/prompt-composer.ts that loads and combines prompt fragments from .cursor/rules/, task context, and phase-specific instructions. Implement template system with token replacement ({task}, {subtask}, {framework}). Support rule precedence and conditional inclusion based on project type. Generate targeted prompts for RED (test generation) and GREEN (implementation) phases.",
+        "testStrategy": "Unit tests for template processing, rule loading, context injection. Integration tests generating complete prompts and validating content relevance and accuracy.",
        "priority": "medium",
        "dependencies": [
-          18
-        ],
-        "status": "pending",
-        "subtasks": [
-          {
-            "id": 1,
-            "title": "Create Report Generator Service Core",
-            "description": "Implement the core WorkflowReportGenerator service that orchestrates report generation for completed workflow runs",
-            "dependencies": [],
-            "details": "Create packages/tm-core/src/services/report-generator.service.ts with WorkflowReportGenerator class. Implement methods: generateRunReport(runId, workflowState), generateSummaryMarkdown(state), generateJSONLogs(operations), generateCoverageMetrics(testResults), archiveOldRuns(threshold). Use EventEmitter for progress updates. Store reports in .taskmaster/reports/runs/<run-id>/ directory structure. Integrate with existing ConfigManager for paths and FileStorage for persistence.",
-            "status": "pending",
-            "testStrategy": "Unit tests for report generation methods, markdown formatting validation, JSON structure tests, archive logic tests"
-          },
-          {
-            "id": 2,
-            "title": "Build Markdown Summary Generator",
-            "description": "Create comprehensive markdown report generation with tables, charts, and PR-ready sections",
-            "dependencies": [
-              "27.1"
-            ],
-            "details": "Implement markdown generation in packages/tm-core/src/services/report-generators/markdown-generator.ts. Create formatted sections: Executive Summary (task completion stats, duration, test coverage), Task Details Table (ID, title, status, duration), Subtask Results (grouped by parent, with test outcomes), Test Coverage Charts (using ASCII art or markdown badges), Commit History (list with links), Performance Metrics (timings per phase). Include generatePRBody() method for GitHub-ready summaries. Use markdown tables and proper formatting for readability.",
-            "status": "pending",
-            "testStrategy": "Test markdown output formatting, table generation, special character escaping, PR body validation"
-          },
-          {
-            "id": 3,
-            "title": "Implement JSONL Operation Logger",
-            "description": "Build detailed operation logging system that captures all workflow operations in JSONL format",
-            "dependencies": [
-              "27.1"
-            ],
-            "details": "Create packages/tm-core/src/services/report-generators/jsonl-logger.ts with JSONLOperationLogger class. Implement streaming JSONL writer for log.jsonl file. Capture operations: task starts/completions, test executions, git operations, phase transitions, errors/retries. Each line contains: timestamp, operation type, phase, task/subtask ID, duration, result, metadata. Implement buffered writing for performance. Include log rotation when file exceeds size limit.",
-            "status": "pending",
-            "testStrategy": "Test JSONL format validity, streaming performance, log rotation, operation capture completeness"
-          },
-          {
-            "id": 4,
-            "title": "Create Coverage and Metrics Collectors",
-            "description": "Build test coverage collection and performance metrics aggregation components",
-            "dependencies": [
-              "27.1"
-            ],
-            "details": "Create packages/tm-core/src/services/report-generators/metrics-collector.ts. Implement CoverageCollector to parse test runner outputs (Jest, Vitest, etc.), aggregate line/branch/function coverage, generate coverage.json with detailed metrics per file/module. Implement PerformanceCollector to track phase durations, operation timings, resource usage. Create state.json generator for workflow resumability with checkpoints, completed operations, pending tasks.",
-            "status": "pending",
-            "testStrategy": "Mock various test runner outputs, verify coverage parsing accuracy, test metric aggregation logic"
-          },
-          {
-            "id": 5,
-            "title": "Build Report Archival and Management System",
-            "description": "Implement automatic archival of old run reports and report lifecycle management",
-            "dependencies": [
-              "27.1",
-              "27.2",
-              "27.3",
-              "27.4"
-            ],
-            "details": "Create packages/tm-core/src/services/report-generators/archive-manager.ts. Implement automatic archival: move reports older than 30 days to .taskmaster/reports/archived/, compress old reports to .tar.gz, maintain index of archived reports. Add report management CLI commands in apps/cli/src/commands/reports.command.ts: list-reports, view-report <run-id>, archive-reports, clean-reports. Integrate with WorkflowOrchestrator to trigger report generation on workflow completion.",
-            "status": "pending",
-            "testStrategy": "Test archival thresholds, compression functionality, index maintenance, CLI command integration"
-          }
-        ]
-      },
-      {
-        "id": 28,
-        "title": "Add MCP Tools Integration",
-        "description": "Integrate with MCP server for structured task operations during autopilot",
-        "details": "Use MCP tools where available: get_tasks for task loading, set_task_status for status updates, update_subtask for progress notes, expand_task if subtasks needed. Fallback to direct service calls if MCP unavailable. Improve context passing to executors via MCP. Support MCP-based shell/test execution where available.",
-        "testStrategy": "Mock MCP tool availability and responses. Test fallback to direct service calls. Verify status updates through MCP. Test context enhancement via MCP.",
-        "priority": "low",
-        "dependencies": [
-          14,
-          21
+          35
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 29,
-        "title": "Implement Retry and Backoff Logic",
-        "description": "Add intelligent retry mechanisms for flaky tests and transient failures",
-        "details": "Implement exponential backoff in WorkflowOrchestrator for: test execution (max 3 retries), executor calls (max 2 retries), git operations (max 2 retries). Detect flaky test patterns. Add --max-attempts flag (default 3). Track retry attempts in run state. Implement circuit breaker for repeated failures. Provide clear failure reasons.",
-        "testStrategy": "Test retry logic with simulated failures. Verify exponential backoff timing. Test max attempts enforcement. Validate circuit breaker activation.",
+        "id": 45,
+        "title": "Implement tag-branch mapping and automatic tag switching",
+        "description": "Create automatic tag management that maps branches to tags and switches context",
+        "details": "Enhance GitAdapter to automatically set active tag based on branch name using existing tag-management.js functionality. Extract tag from branch name using configured pattern, validate tag exists, and switch to tag context before workflow execution. Implement branch-to-tag mapping persistence and validation. Support tag creation if branch tag doesn't exist.",
+        "testStrategy": "Unit tests for tag extraction from branch names, tag switching logic, mapping persistence. Integration tests with actual git branches and tag management operations.",
        "priority": "medium",
        "dependencies": [
-          11,
-          22
+          32,
+          40
        ],
        "status": "pending",
        "subtasks": []
      },
      {
-        "id": 30,
-        "title": "Create End-to-End Integration Tests",
-        "description": "Build comprehensive test suite validating full autopilot workflow",
-        "details": "Create test/integration/autopilot.test.ts with scenarios: happy path (all tests pass first try), retry scenarios (flaky tests), resume from interruption, various flag combinations, multi-subtask workflows. Use test fixtures with mock tasks/subtasks. Verify all outputs: commits, branches, reports, PR body. Test with different executors and test runners.",
-        "testStrategy": "Integration tests with mock git repo and task data. Test complete workflow execution. Verify all artifacts created correctly. Validate resume functionality. Performance benchmarks for workflow duration.",
+        "id": 46,
+        "title": "Add comprehensive error handling and recovery",
+        "description": "Implement robust error handling with actionable recovery suggestions",
+        "details": "Add comprehensive error handling throughout WorkflowOrchestrator with specific error types: GitError, TestError, ConfigError, DependencyError. Implement recovery suggestions for common failures (merge conflicts, test timeouts, missing dependencies). Add --force flag to bypass certain validations. Include error context in run reports and logs.",
+        "testStrategy": "Unit tests for error classification, recovery suggestion generation, force flag behavior. Integration tests simulating various failure scenarios and verifying appropriate error handling.",
+        "priority": "medium",
+        "dependencies": [
+          31,
+          32,
+          33,
+          38
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 47,
+        "title": "Implement conventional commit message generation",
+        "description": "Create intelligent commit message generation based on task context and changes",
+        "details": "Enhance GitAdapter with commit message generation using configurable templates. Support conventional commit format with task context: '{type}({scope}): {message} (task {id}.{subtask})'. Auto-detect commit type (feat, fix, chore) based on task content and file changes. Include commit message templates and validation against conventional commit standards.",
+        "testStrategy": "Unit tests for message template processing, type detection, validation logic. Integration tests generating commit messages for various task types and validating format compliance.",
        "priority": "low",
        "dependencies": [
-          11,
-          12,
-          13,
-          14,
-          15,
-          16,
-          17,
-          18,
-          19,
-          20,
-          21,
-          22
+          32,
+          36
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 48,
+        "title": "Add multi-framework test execution support",
+        "description": "Extend TestRunnerAdapter to support multiple testing frameworks beyond Jest/Vitest",
+        "details": "Enhance TestRunnerAdapter with framework-specific adapters for pytest (Python), go test (Go), cargo test (Rust). Implement common interface for test execution, result parsing, and coverage reporting across frameworks. Add framework detection based on project files (requirements.txt, go.mod, Cargo.toml). Maintain backward compatibility with existing JavaScript/TypeScript support.",
+        "testStrategy": "Unit tests for framework detection, adapter interface implementation. Integration tests with fixture projects for each supported framework, verifying test execution and result parsing.",
+        "priority": "low",
+        "dependencies": [
+          33
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 49,
+        "title": "Implement workflow event streaming for real-time monitoring",
+        "description": "Create event streaming system for real-time workflow progress monitoring",
+        "details": "Enhance WorkflowOrchestrator with EventEmitter-based streaming of workflow events (phase changes, test results, commit creation). Implement structured event format with timestamps, phase info, and progress data. Support event persistence to run logs and optional WebSocket streaming for external monitoring. Include progress percentage calculation and time estimates.",
+        "testStrategy": "Unit tests for event emission, formatting, persistence. Integration tests monitoring complete workflow execution with event verification and progress tracking accuracy.",
+        "priority": "low",
+        "dependencies": [
+          31,
+          38
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 50,
+        "title": "Add intelligent test targeting for faster feedback",
+        "description": "Implement smart test selection that runs only relevant tests during GREEN phase",
+        "details": "Enhance TestRunnerAdapter with test targeting based on file changes and test dependencies. Implement test impact analysis to identify which tests are affected by implementation changes. Support framework-specific targeting (Jest --findRelatedTests, Vitest changed files). Fall back to full test suite if targeting fails or for final validation.",
+        "testStrategy": "Unit tests for change detection, test dependency analysis, targeting logic. Integration tests with various project structures verifying targeted test selection accuracy and performance improvements.",
+        "priority": "low",
+        "dependencies": [
+          33,
+          36
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 51,
+        "title": "Implement dry-run visualization with execution timeline",
+        "description": "Create detailed dry-run output showing complete execution plan with time estimates",
+        "details": "Enhance autopilot command dry-run mode with detailed execution timeline showing all phases, subtasks, estimated durations, and dependencies. Include preflight check results, branch operations, test generation/execution plans, and finalization steps. Add ASCII art progress visualization and resource requirements (git, gh, test tools).",
+        "testStrategy": "Unit tests for timeline calculation, duration estimation, visualization formatting. Integration tests generating dry-run output for various task complexities and verifying accuracy of plans.",
+        "priority": "low",
+        "dependencies": [
+          34,
+          40
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 52,
+        "title": "Add autopilot workflow integration tests",
+        "description": "Create comprehensive end-to-end integration tests for complete autopilot workflows",
+        "details": "Create tests/integration/autopilot/ with full workflow tests using temporary git repositories, mock task data, and isolated test environments. Test complete red-green-commit cycles, error recovery, resume functionality, and PR creation. Include performance benchmarks and resource usage validation. Support both Jest and Vitest test execution.",
+        "testStrategy": "Integration tests with isolated environments, git repository fixtures, mock GitHub API responses. Performance tests measuring workflow execution times and resource consumption across different project sizes.",
+        "priority": "medium",
+        "dependencies": [
+          36,
+          39,
+          41
+        ],
+        "status": "pending",
+        "subtasks": []
+      },
+      {
+        "id": 53,
+        "title": "Finalize autopilot documentation and examples",
+        "description": "Create comprehensive documentation for autopilot workflow with examples and troubleshooting",
+        "details": "Create detailed documentation covering autopilot setup, configuration options, workflow phases, error handling, and best practices. Include example PRD files that demonstrate autopilot-compatible task structure. Add troubleshooting guide for common issues (git conflicts, test failures, dependency problems). Create demo video or GIF showing complete workflow execution.",
+        "testStrategy": "Documentation validation through user testing, example verification by running actual autopilot workflows, link checking and format validation. Accessibility and clarity review for technical documentation.",
+        "priority": "low",
+        "dependencies": [
+          52
        ],
        "status": "pending",
        "subtasks": []
@@ -8237,7 +8229,7 @@
    ],
    "metadata": {
      "created": "2025-09-30T13:32:28.649Z",
-      "updated": "2025-09-30T15:13:53.999Z",
+      "updated": "2025-10-06T17:44:07.207Z",
      "description": "Tasks for autonomous-tdd-git-workflow context"
    }
  }