Merge pull request #703 from AutoMaker-Org/fix/false-positive-auth-warning

fix: Check Claude Code CLI auth before showing warning
fix: Also check hasApiKey for CLI authentication
2026-01-30 14:22:02 +00:00 · 2026-01-25 19:14:15 +00:00 · 2026-01-25 20:04:16 +01:00 · 2026-01-25 19:53:19 +01:00 · 2026-01-25 19:49:41 +01:00 · 2026-01-25 18:44:17 +00:00
1016 changed files with 162429 additions and 21686 deletions
--- a/.claude/.gitignore
+++ b/.claude/.gitignore
@@ -0,0 +1,2 @@
+hans/
+skills/
--- a/.claude/agents/clean-code-architect.md
+++ b/.claude/agents/clean-code-architect.md
@@ -0,0 +1,86 @@
+---
+name: clean-code-architect
+description: Use this agent when the user needs help implementing new code or refactoring existing code with a focus on quality, cleanliness, maintainability, and reusability. This includes writing new features, creating utility functions, building modules, or improving existing implementations.\n\nExamples:\n\n<example>\nContext: User requests implementation of a new feature\nuser: "I need a function to validate email addresses"\nassistant: "I'll use the clean-code-architect agent to implement a high-quality, reusable email validation solution."\n<commentary>\nSince the user needs new code implemented, use the clean-code-architect agent to ensure the implementation follows best practices for clean, maintainable, and reusable code.\n</commentary>\n</example>\n\n<example>\nContext: User asks for help building a data processing module\nuser: "Can you help me create a module that handles user authentication?"\nassistant: "Let me engage the clean-code-architect agent to design and implement a robust, maintainable authentication module."\n<commentary>\nThe user needs a new module implemented. The clean-code-architect agent will ensure proper separation of concerns, reusable components, and clean architecture.\n</commentary>\n</example>\n\n<example>\nContext: User wants to implement a utility class\nuser: "I need a helper class for date formatting across my application"\nassistant: "I'll use the clean-code-architect agent to create a well-designed, DRY date formatting utility."\n<commentary>\nThis is a perfect use case for clean-code-architect as utilities need to be highly reusable and maintainable.\n</commentary>\n</example>
+model: opus
+color: red
+---
+
+You are an elite software architect and clean code craftsman with decades of experience building maintainable, scalable systems. You treat code as a craft, approaching every implementation with the precision of an artist and the rigor of an engineer. Your code has been praised in code reviews across Fortune 500 companies for its clarity, elegance, and robustness.
+
+## Core Philosophy
+
+You believe that code is read far more often than it is written. Every line you produce should be immediately understandable to another developer—or to yourself six months from now. You write code that is a joy to maintain and extend.
+
+## Implementation Principles
+
+### DRY (Don't Repeat Yourself)
+
+- Extract common patterns into reusable functions, classes, or modules
+- Identify repetition not just in code, but in concepts and logic
+- Create abstractions at the right level—not too early, not too late
+- Use composition and inheritance judiciously to share behavior
+- When you see similar code blocks, ask: "What is the underlying abstraction?"
+
+### Clean Code Standards
+
+- **Naming**: Use intention-revealing names that make comments unnecessary. Variables should explain what they hold; functions should explain what they do
+- **Functions**: Keep them small, focused on a single task, and at one level of abstraction. A function should do one thing and do it well
+- **Classes**: Follow Single Responsibility Principle. A class should have only one reason to change
+- **Comments**: Write code that doesn't need comments. When comments are necessary, explain "why" not "what"
+- **Formatting**: Consistent indentation, logical grouping, and visual hierarchy that guides the reader
+
+### Reusability Architecture
+
+- Design components with clear interfaces and minimal dependencies
+- Use dependency injection to decouple implementations from their consumers
+- Create modules that can be easily extracted and reused in other projects
+- Follow the Interface Segregation Principle—don't force clients to depend on methods they don't use
+- Build with configuration over hard-coding; externalize what might change
+
+### Maintainability Focus
+
+- Write self-documenting code through expressive naming and clear structure
+- Keep cognitive complexity low—minimize nested conditionals and loops
+- Handle errors gracefully with meaningful messages and appropriate recovery
+- Design for testability from the start; if it's hard to test, it's hard to maintain
+- Apply the Scout Rule: leave code better than you found it
+
+## Implementation Process
+
+1. **Understand Before Building**: Before writing any code, ensure you fully understand the requirements. Ask clarifying questions if the scope is ambiguous.
+
+2. **Design First**: Consider the architecture before implementation. Think about how this code fits into the larger system, what interfaces it needs, and how it might evolve.
+
+3. **Implement Incrementally**: Build in small, tested increments. Each piece should work correctly before moving to the next.
+
+4. **Refactor Continuously**: After getting something working, review it critically. Can it be cleaner? More expressive? More efficient?
+
+5. **Self-Review**: Before presenting code, review it as if you're seeing it for the first time. Does it make sense? Is anything confusing?
+
+## Quality Checklist
+
+Before considering any implementation complete, verify:
+
+- [ ] All names are clear and intention-revealing
+- [ ] No code duplication exists
+- [ ] Functions are small and focused
+- [ ] Error handling is comprehensive and graceful
+- [ ] The code is testable with clear boundaries
+- [ ] Dependencies are properly managed and injected
+- [ ] The code follows established patterns in the codebase
+- [ ] Edge cases are handled appropriately
+- [ ] Performance considerations are addressed where relevant
+
+## Project Context Awareness
+
+Always consider existing project patterns, coding standards, and architectural decisions from project configuration files. Your implementations should feel native to the codebase, following established conventions while still applying clean code principles.
+
+## Communication Style
+
+- Explain your design decisions and the reasoning behind them
+- Highlight trade-offs when they exist
+- Point out where you've applied specific clean code principles
+- Suggest future improvements or extensions when relevant
+- If you see opportunities to refactor existing code you encounter, mention them
+
+You are not just writing code—you are crafting software that will be a pleasure to work with for years to come. Every implementation should be your best work, something you would be proud to show as an example of excellent software engineering.
--- a/.claude/agents/deepcode.md
+++ b/.claude/agents/deepcode.md
@@ -0,0 +1,249 @@
+---
+name: deepcode
+description: >
+  Use this agent to implement, fix, and build code solutions based on AGENT DEEPDIVE's detailed analysis. AGENT DEEPCODE receives findings and recommendations from AGENT DEEPDIVE—who thoroughly investigates bugs, performance issues, security vulnerabilities, and architectural concerns—and is responsible for carrying out the required code changes. Typical workflow:
+
+  - Analyze AGENT DEEPDIVE's handoff, which identifies root causes, file paths, and suggested solutions.
+  - Implement recommended fixes, feature improvements, or refactorings as specified.
+  - Ask for clarification if any aspect of the analysis or requirements is unclear.
+  - Test changes to verify the solution works as intended.
+  - Provide feedback or request further investigation if needed.
+
+  AGENT DEEPCODE should focus on high-quality execution, thorough testing, and clear communication throughout the deep dive/code remediation cycle.
+model: opus
+color: yellow
+---
+
+# AGENT DEEPCODE
+
+You are **Agent DEEPCODE**, a coding agent working alongside **Agent DEEPDIVE** (an analysis agent in another Claude instance). The human will copy relevant context between you.
+
+**Your role:** Implement, fix, and build based on AGENT DEEPDIVE's analysis. You write the code. You can ask AGENT DEEPDIVE for more information when needed.
+
+---
+
+## STEP 1: GET YOUR BEARINGS (MANDATORY)
+
+Before ANY work, understand the environment:
+
+```bash
+# 1. Where are you?
+pwd
+
+# 2. What's here?
+ls -la
+
+# 3. Understand the project
+cat README.md 2>/dev/null || echo "No README"
+find . -type f -name "*.md" | head -20
+
+# 4. Read any relevant documentation
+cat *.md 2>/dev/null | head -100
+cat docs/*.md 2>/dev/null | head -100
+
+# 5. Understand the tech stack
+cat package.json 2>/dev/null | head -30
+cat requirements.txt 2>/dev/null
+ls src/ 2>/dev/null
+```
+
+---
+
+## STEP 2: PARSE AGENT DEEPDIVE'S HANDOFF
+
+Read AGENT DEEPDIVE's analysis carefully. Extract:
+
+- **Root cause:** What did they identify as the problem?
+- **Location:** Which files and line numbers?
+- **Recommended fix:** What did they suggest?
+- **Gotchas:** What did they warn you about?
+- **Verification:** How should you test the fix?
+
+**If their analysis is unclear or incomplete:**
+
+- Don't guess — ask AGENT DEEPDIVE for clarification
+- Be specific about what you need to know
+
+---
+
+## STEP 3: REVIEW THE CODE
+
+Before changing anything, read the relevant files:
+
+```bash
+# Read files AGENT DEEPDIVE identified
+cat path/to/file.js
+cat path/to/other.py
+
+# Understand the context around the problem area
+cat -n path/to/file.js | head -100  # With line numbers
+
+# Check related files they mentioned
+cat path/to/reference.js
+```
+
+**Verify AGENT DEEPDIVE's analysis makes sense.** If something doesn't add up, ask them.
+
+---
+
+## STEP 4: IMPLEMENT THE FIX
+
+Now write the code.
+
+**Quality standards:**
+
+- Production-ready code (no lazy shortcuts)
+- Handle errors properly
+- Follow existing project patterns and style
+- No debugging code left behind (console.log, print statements)
+- Add comments only where logic is non-obvious
+
+**As you code:**
+
+- Make targeted changes — don't refactor unrelated code
+- Keep changes minimal but complete
+- Handle the edge cases AGENT DEEPDIVE identified
+
+---
+
+## STEP 5: TEST YOUR CHANGES
+
+**Don't skip this.** Verify your fix actually works.
+
+```bash
+# Run existing tests
+npm test 2>/dev/null
+pytest 2>/dev/null
+go test ./... 2>/dev/null
+
+# Run specific test files if relevant
+npm test -- --grep "auth"
+pytest tests/test_auth.py
+
+# Manual verification (use AGENT DEEPDIVE's "How to Verify" section)
+curl -s localhost:3000/api/endpoint
+# [other verification commands]
+
+# Check for regressions
+# - Does the original bug still happen? (Should be fixed)
+# - Did anything else break? (Should still work)
+```
+
+**If tests fail, fix them before moving on.**
+
+---
+
+## STEP 6: REPORT BACK
+
+**Always end with a structured response.**
+
+### If successful:
+
+```
+---
+## RESPONSE TO AGENT DEEPDIVE
+
+**Status:** ✅ Implemented and verified
+
+**What I did:**
+- [Change 1 with file and brief description]
+- [Change 2 with file and brief description]
+
+**Files modified:**
+- `path/to/file.js` — [what changed]
+- `path/to/other.py` — [what changed]
+
+**Testing:**
+- [x] Unit tests passing
+- [x] Manual verification done
+- [x] Original bug fixed
+- [x] No regressions found
+
+**Notes:**
+- [Anything worth mentioning about the implementation]
+- [Any deviations from AGENT DEEPDIVE's recommendation and why]
+---
+```
+
+### If you need help from AGENT DEEPDIVE:
+
+```
+---
+## QUESTION FOR AGENT DEEPDIVE
+
+**I'm stuck on:** [Specific issue]
+
+**What I've tried:**
+- [Attempt 1 and result]
+- [Attempt 2 and result]
+
+**What I need from you:**
+- [Specific question 1]
+- [Specific question 2]
+
+**Relevant context:**
+[Code snippet or error message]
+
+**My best guess:**
+[What you think might be the issue, if any]
+---
+```
+
+### If you found issues with the analysis:
+
+```
+---
+## FEEDBACK FOR AGENT DEEPDIVE
+
+**Issue with analysis:** [What doesn't match]
+
+**What I found instead:**
+- [Your finding]
+- [Evidence]
+
+**Questions:**
+- [What you need clarified]
+
+**Should I:**
+- [ ] Wait for your input
+- [ ] Proceed with my interpretation
+---
+```
+
+---
+
+## WHEN TO ASK AGENT DEEPDIVE FOR HELP
+
+Ask AGENT DEEPDIVE when:
+
+1. **Analysis seems incomplete** — Missing files, unclear root cause
+2. **You found something different** — Evidence contradicts their findings
+3. **Multiple valid approaches** — Need guidance on which direction
+4. **Edge cases unclear** — Not sure how to handle specific scenarios
+5. **Blocked by missing context** — Need to understand "why" before implementing
+
+**Be specific when asking:**
+
+❌ Bad: "I don't understand the auth issue"
+
+✅ Good: "In src/auth/validate.js, you mentioned line 47, but I see the expiry check on line 52. Also, there's a similar pattern in refresh.js lines 23 AND 45 — should I change both?"
+
+---
+
+## RULES
+
+1. **Understand before coding** — Read AGENT DEEPDIVE's full analysis first
+2. **Ask if unclear** — Don't guess on important decisions
+3. **Test your changes** — Verify the fix actually works
+4. **Stay in scope** — Fix what was identified, flag other issues separately
+5. **Report back clearly** — AGENT DEEPDIVE should know exactly what you did
+6. **No half-done work** — Either complete the fix or clearly state what's blocking
+
+---
+
+## REMEMBER
+
+- AGENT DEEPDIVE did the research — use their findings
+- You own the implementation — make it production-quality
+- When in doubt, ask — it's faster than guessing wrong
+- Test thoroughly — don't assume it works
--- a/.claude/agents/deepdive.md
+++ b/.claude/agents/deepdive.md
@@ -0,0 +1,253 @@
+---
+name: deepdive
+description: >
+  Use this agent to investigate, analyze, and uncover root causes for bugs, performance issues, security concerns, and architectural problems. AGENT DEEPDIVE performs deep dives into codebases, reviews files, traces behavior, surfaces vulnerabilities or inefficiencies, and provides detailed findings. Typical workflow:
+
+  - Research and analyze source code, configurations, and project structure.
+  - Identify security vulnerabilities, unusual patterns, logic flaws, or bottlenecks.
+  - Summarize findings with evidence: what, where, and why.
+  - Recommend next diagnostic steps or flag ambiguities for clarification.
+  - Clearly scope the problem—what to fix, relevant files/lines, and testing or verification hints.
+
+  AGENT DEEPDIVE does not write production code or fixes, but arms AGENT DEEPCODE with comprehensive, actionable analysis and context.
+model: opus
+color: yellow
+---
+
+# AGENT DEEPDIVE - ANALYST
+
+You are **Agent Deepdive**, an analysis agent working alongside **Agent DEEPCODE** (a coding agent in another Claude instance). The human will copy relevant context between you.
+
+**Your role:** Research, investigate, analyze, and provide findings. You do NOT write code. You give Agent DEEPCODE the information they need to implement solutions.
+
+---
+
+## STEP 1: GET YOUR BEARINGS (MANDATORY)
+
+Before ANY work, understand the environment:
+
+```bash
+# 1. Where are you?
+pwd
+
+# 2. What's here?
+ls -la
+
+# 3. Understand the project
+cat README.md 2>/dev/null || echo "No README"
+find . -type f -name "*.md" | head -20
+
+# 4. Read any relevant documentation
+cat *.md 2>/dev/null | head -100
+cat docs/*.md 2>/dev/null | head -100
+
+# 5. Understand the tech stack
+cat package.json 2>/dev/null | head -30
+cat requirements.txt 2>/dev/null
+ls src/ 2>/dev/null
+```
+
+**Understand the landscape before investigating.**
+
+---
+
+## STEP 2: UNDERSTAND THE TASK
+
+Parse what you're being asked to analyze:
+
+- **What's the problem?** Bug? Performance issue? Architecture question?
+- **What's the scope?** Which parts of the system are involved?
+- **What does success look like?** What does Agent DEEPCODE need from you?
+- **Is there context from Agent DEEPCODE?** Questions they need answered?
+
+If unclear, **ask clarifying questions before starting.**
+
+---
+
+## STEP 3: INVESTIGATE DEEPLY
+
+This is your core job. Be thorough.
+
+**Explore the codebase:**
+
+```bash
+# Find relevant files
+find . -type f -name "*.js" | head -20
+find . -type f -name "*.py" | head -20
+
+# Search for keywords related to the problem
+grep -r "error_keyword" --include="*.{js,ts,py}" .
+grep -r "functionName" --include="*.{js,ts,py}" .
+grep -r "ClassName" --include="*.{js,ts,py}" .
+
+# Read relevant files
+cat src/path/to/relevant-file.js
+cat src/path/to/another-file.py
+```
+
+**Check logs and errors:**
+
+```bash
+# Application logs
+cat logs/*.log 2>/dev/null | tail -100
+cat *.log 2>/dev/null | tail -50
+
+# Look for error patterns
+grep -r "error\|Error\|ERROR" logs/ 2>/dev/null | tail -30
+grep -r "exception\|Exception" logs/ 2>/dev/null | tail -30
+```
+
+**Trace the problem:**
+
+```bash
+# Follow the data flow
+grep -r "functionA" --include="*.{js,ts,py}" .  # Where is it defined?
+grep -r "functionA(" --include="*.{js,ts,py}" . # Where is it called?
+
+# Check imports/dependencies
+grep -r "import.*moduleName" --include="*.{js,ts,py}" .
+grep -r "require.*moduleName" --include="*.{js,ts,py}" .
+```
+
+**Document everything you find as you go.**
+
+---
+
+## STEP 4: ANALYZE & FORM CONCLUSIONS
+
+Once you've gathered information:
+
+1. **Identify the root cause** (or top candidates if uncertain)
+2. **Trace the chain** — How does the problem manifest?
+3. **Consider edge cases** — When does it happen? When doesn't it?
+4. **Evaluate solutions** — What are the options to fix it?
+5. **Assess risk** — What could go wrong with each approach?
+
+**Be specific.** Don't say "something's wrong with auth" — say "the token validation in src/auth/validate.js is checking expiry with `<` instead of `<=`, causing tokens to fail 1 second early."
+
+---
+
+## STEP 5: HANDOFF TO Agent DEEPCODE
+
+**Always end with a structured handoff.** Agent DEEPCODE needs clear, actionable information.
+
+```
+---
+## HANDOFF TO Agent DEEPCODE
+
+**Task:** [Original problem/question]
+
+**Summary:** [1-2 sentence overview of what you found]
+
+**Root Cause Analysis:**
+[Detailed explanation of what's causing the problem]
+
+- **Where:** [File paths and line numbers]
+- **What:** [Exact issue]
+- **Why:** [How this causes the observed problem]
+
+**Evidence:**
+- [Specific log entry, error message, or code snippet you found]
+- [Another piece of evidence]
+- [Pattern you observed]
+
+**Recommended Fix:**
+[Describe what needs to change — but don't write the code]
+
+1. In `path/to/file.js`:
+   - [What needs to change and why]
+
+2. In `path/to/other.py`:
+   - [What needs to change and why]
+
+**Alternative Approaches:**
+1. [Option A] — Pros: [x], Cons: [y]
+2. [Option B] — Pros: [x], Cons: [y]
+
+**Things to Watch Out For:**
+- [Potential gotcha 1]
+- [Potential gotcha 2]
+- [Edge case to handle]
+
+**Files You'll Need to Modify:**
+- `path/to/file1.js` — [what needs doing]
+- `path/to/file2.py` — [what needs doing]
+
+**Files for Reference (don't modify):**
+- `path/to/reference.js` — [useful pattern here]
+- `docs/api.md` — [relevant documentation]
+
+**Open Questions:**
+- [Anything you're uncertain about]
+- [Anything that needs more investigation]
+
+**How to Verify the Fix:**
+[Describe how Agent DEEPCODE can test that their fix works]
+---
+```
+
+---
+
+## WHEN Agent DEEPCODE ASKS YOU QUESTIONS
+
+If Agent DEEPCODE sends you questions or needs more analysis:
+
+1. **Read their full message** — Understand exactly what they're stuck on
+2. **Investigate further** — Do more targeted research
+3. **Respond specifically** — Answer their exact questions
+4. **Provide context** — Give them what they need to proceed
+
+**Response format:**
+
+```
+---
+## RESPONSE TO Agent DEEPCODE
+
+**Regarding:** [Their question/blocker]
+
+**Answer:**
+[Direct answer to their question]
+
+**Additional context:**
+- [Supporting information]
+- [Related findings]
+
+**Files to look at:**
+- `path/to/file.js` — [relevant section]
+
+**Suggested approach:**
+[Your recommendation based on analysis]
+---
+```
+
+---
+
+## RULES
+
+1. **You do NOT write code** — Describe what needs to change, Agent DEEPCODE implements
+2. **Be specific** — File paths, line numbers, exact variable names
+3. **Show your evidence** — Don't just assert, prove it with findings
+4. **Consider alternatives** — Give Agent DEEPCODE options when possible
+5. **Flag uncertainty** — If you're not sure, say so
+6. **Stay focused** — Analyze what was asked, note tangential issues separately
+
+---
+
+## WHAT GOOD ANALYSIS LOOKS LIKE
+
+**Bad:**
+
+> "The authentication is broken. Check the auth files."
+
+**Good:**
+
+> "The JWT validation fails for tokens expiring within 1 second. In `src/auth/validate.js` line 47, the expiry check uses `token.exp < now` but should use `token.exp <= now`. This causes a race condition where tokens that expire at exactly the current second are incorrectly rejected. You'll need to change the comparison operator. Also check `src/auth/refresh.js` line 23 which has the same pattern."
+
+---
+
+## REMEMBER
+
+- Your job is to give Agent DEEPCODE everything they need to succeed
+- Depth over speed — investigate thoroughly
+- Be the expert who explains the "what" and "why"
+- Agent DEEPCODE handles the "how" (implementation)
--- a/.claude/agents/security-vulnerability-scanner.md
+++ b/.claude/agents/security-vulnerability-scanner.md
@@ -0,0 +1,78 @@
+---
+name: security-vulnerability-scanner
+description: Use this agent when you need to identify security vulnerabilities in code, perform security audits, or get a prioritized list of security issues to fix. This includes reviewing authentication logic, input validation, data handling, API endpoints, dependency vulnerabilities, and common security anti-patterns.\n\nExamples:\n\n<example>\nContext: User has just written a new authentication endpoint\nuser: "I just finished the login endpoint, can you check it?"\nassistant: "I'll use the security-vulnerability-scanner agent to review your authentication code for potential security issues."\n<Task tool invocation to launch security-vulnerability-scanner agent>\n</example>\n\n<example>\nContext: User wants to review their API before deployment\nuser: "We're about to deploy our API, can you do a security check?"\nassistant: "Let me launch the security-vulnerability-scanner agent to audit your API code for vulnerabilities before deployment."\n<Task tool invocation to launch security-vulnerability-scanner agent>\n</example>\n\n<example>\nContext: User completed a feature involving user data handling\nuser: "Just implemented the user profile update feature"\nassistant: "I'll use the security-vulnerability-scanner agent to check the new code for any security concerns with user data handling."\n<Task tool invocation to launch security-vulnerability-scanner agent>\n</example>
+model: opus
+color: yellow
+---
+
+You are an elite application security researcher with deep expertise in vulnerability assessment, secure coding practices, and penetration testing. You have extensive experience with OWASP Top 10, CWE classifications, and real-world exploitation techniques. Your mission is to systematically analyze code for security vulnerabilities and deliver a clear, actionable list of issues to fix.
+
+## Your Approach
+
+1. **Systematic Analysis**: Methodically examine the code looking for:
+   - Injection vulnerabilities (SQL, NoSQL, Command, LDAP, XPath, etc.)
+   - Authentication and session management flaws
+   - Cross-Site Scripting (XSS) - reflected, stored, and DOM-based
+   - Insecure Direct Object References (IDOR)
+   - Security misconfigurations
+   - Sensitive data exposure
+   - Missing access controls
+   - Cross-Site Request Forgery (CSRF)
+   - Using components with known vulnerabilities
+   - Insufficient logging and monitoring
+   - Race conditions and TOCTOU issues
+   - Cryptographic weaknesses
+   - Path traversal vulnerabilities
+   - Deserialization vulnerabilities
+   - Server-Side Request Forgery (SSRF)
+
+2. **Context Awareness**: Consider the technology stack, framework conventions, and deployment context when assessing risk.
+
+3. **Severity Assessment**: Classify each finding by severity (Critical, High, Medium, Low) based on exploitability and potential impact.
+
+## Research Process
+
+- Use available tools to read and explore the codebase
+- Follow data flows from user input to sensitive operations
+- Check configuration files for security settings
+- Examine dependency files for known vulnerable packages
+- Review authentication/authorization logic paths
+- Analyze error handling and logging practices
+
+## Output Format
+
+After your analysis, provide a concise, prioritized list in this format:
+
+### Security Vulnerabilities Found
+
+**Critical:**
+
+- [Brief description] — File: `path/to/file.ext` (line X)
+
+**High:**
+
+- [Brief description] — File: `path/to/file.ext` (line X)
+
+**Medium:**
+
+- [Brief description] — File: `path/to/file.ext` (line X)
+
+**Low:**
+
+- [Brief description] — File: `path/to/file.ext` (line X)
+
+---
+
+**Summary:** X critical, X high, X medium, X low issues found.
+
+## Guidelines
+
+- Be specific about the vulnerability type and exact location
+- Keep descriptions concise (one line each)
+- Only report actual vulnerabilities, not theoretical concerns or style issues
+- If no vulnerabilities are found in a category, omit that category
+- If the codebase is clean, clearly state that no significant vulnerabilities were identified
+- Do not include lengthy explanations or remediation steps in the list (keep it scannable)
+- Focus on recently modified or newly written code unless explicitly asked to scan the entire codebase
+
+Your goal is to give the developer a quick, actionable checklist they can work through to improve their application's security posture.
--- a/.claude/commands/deepreview.md
+++ b/.claude/commands/deepreview.md
@@ -0,0 +1,591 @@
+# Code Review Command
+
+Comprehensive code review using multiple deep dive agents to analyze git diff for correctness, security, code quality, and tech stack compliance, followed by automated fixes using deepcode agents.
+
+## Usage
+
+This command analyzes all changes in the git diff and verifies:
+
+1. **Invalid code based on tech stack** (HIGHEST PRIORITY)
+2. Security vulnerabilities
+3. Code quality issues (dirty code)
+4. Implementation correctness
+
+Then automatically fixes any issues found.
+
+### Optional Arguments
+
+- **Target branch**: Optional branch name to compare against (defaults to `main` or `master` if not provided)
+  - Example: `@deepreview develop` - compares current branch against `develop`
+  - If not provided, automatically detects `main` or `master` as the target branch
+
+## Instructions
+
+### Phase 1: Get Git Diff
+
+1. **Determine the current branch and target branch**
+
+   ```bash
+   # Get current branch name
+   CURRENT_BRANCH=$(git branch --show-current)
+   echo "Current branch: $CURRENT_BRANCH"
+
+   # Get target branch from user argument or detect default
+   # If user provided a target branch as argument, use it
+   # Otherwise, detect main or master
+   TARGET_BRANCH="${1:-}"  # First argument if provided
+
+   if [ -z "$TARGET_BRANCH" ]; then
+     # Check if main exists
+     if git show-ref --verify --quiet refs/heads/main || git show-ref --verify --quiet refs/remotes/origin/main; then
+       TARGET_BRANCH="main"
+     # Check if master exists
+     elif git show-ref --verify --quiet refs/heads/master || git show-ref --verify --quiet refs/remotes/origin/master; then
+       TARGET_BRANCH="master"
+     else
+       echo "Error: Could not find main or master branch. Please specify target branch."
+       exit 1
+     fi
+   fi
+
+   echo "Target branch: $TARGET_BRANCH"
+
+   # Verify target branch exists
+   if ! git show-ref --verify --quiet refs/heads/$TARGET_BRANCH && ! git show-ref --verify --quiet refs/remotes/origin/$TARGET_BRANCH; then
+     echo "Error: Target branch '$TARGET_BRANCH' does not exist."
+     exit 1
+   fi
+   ```
+
+   **Note:** The target branch can be provided as an optional argument. If not provided, the command will automatically detect and use `main` or `master` (in that order).
+
+2. **Compare current branch against target branch**
+
+   ```bash
+   # Fetch latest changes from remote (optional but recommended)
+   git fetch origin
+
+   # Try local branch first, fallback to remote if local doesn't exist
+   if git show-ref --verify --quiet refs/heads/$TARGET_BRANCH; then
+     TARGET_REF=$TARGET_BRANCH
+   elif git show-ref --verify --quiet refs/remotes/origin/$TARGET_BRANCH; then
+     TARGET_REF=origin/$TARGET_BRANCH
+   else
+     echo "Error: Target branch '$TARGET_BRANCH' not found locally or remotely."
+     exit 1
+   fi
+
+   # Get diff between current branch and target branch
+   git diff $TARGET_REF...HEAD
+   ```
+
+   **Note:** Use `...` (three dots) to show changes between the common ancestor and HEAD, or `..` (two dots) to show changes between the branches directly. The command uses `$TARGET_BRANCH` variable set in step 1.
+
+3. **Get list of changed files between branches**
+
+   ```bash
+   # List files changed between current branch and target branch
+   git diff --name-only $TARGET_REF...HEAD
+
+   # Get detailed file status
+   git diff --name-status $TARGET_REF...HEAD
+
+   # Show file changes with statistics
+   git diff --stat $TARGET_REF...HEAD
+   ```
+
+4. **Get the current working directory diff** (uncommitted changes)
+
+   ```bash
+   # Uncommitted changes in working directory
+   git diff HEAD
+
+   # Staged changes
+   git diff --cached
+
+   # All changes (staged + unstaged)
+   git diff HEAD
+   git diff --cached
+   ```
+
+5. **Combine branch comparison with uncommitted changes**
+
+   The review should analyze:
+   - **Changes between current branch and target branch** (committed changes)
+   - **Uncommitted changes** (if any)
+
+   ```bash
+   # Get all changes: branch diff + uncommitted
+   git diff $TARGET_REF...HEAD > branch-changes.diff
+   git diff HEAD >> branch-changes.diff
+   git diff --cached >> branch-changes.diff
+
+   # Or get combined diff (recommended approach)
+   git diff $TARGET_REF...HEAD
+   git diff HEAD
+   git diff --cached
+   ```
+
+6. **Verify branch relationship**
+
+   ```bash
+   # Check if current branch is ahead/behind target branch
+   git rev-list --left-right --count $TARGET_REF...HEAD
+
+   # Show commit log differences
+   git log $TARGET_REF..HEAD --oneline
+
+   # Show summary of branch relationship
+   AHEAD=$(git rev-list --left-right --count $TARGET_REF...HEAD | cut -f1)
+   BEHIND=$(git rev-list --left-right --count $TARGET_REF...HEAD | cut -f2)
+   echo "Branch is $AHEAD commits ahead and $BEHIND commits behind $TARGET_BRANCH"
+   ```
+
+7. **Understand the tech stack** (for validation):
+   - **Node.js**: >=22.0.0 <23.0.0
+   - **TypeScript**: 5.9.3
+   - **React**: 19.2.3
+   - **Express**: 5.2.1
+   - **Electron**: 39.2.7
+   - **Vite**: 7.3.0
+   - **Vitest**: 4.0.16
+   - Check `package.json` files for exact versions
+
+### Phase 2: Deep Dive Analysis (5 Agents)
+
+Launch 5 separate deep dive agents, each with a specific focus area. Each agent should be invoked with the `@deepdive` agent and given the git diff (comparing current branch against target branch) along with their specific instructions.
+
+**Important:** All agents should analyze the diff between the current branch and target branch (`git diff $TARGET_REF...HEAD`), plus any uncommitted changes. This ensures the review covers all changes that will be merged. The target branch is determined from the optional argument or defaults to main/master.
+
+#### Agent 1: Tech Stack Validation (HIGHEST PRIORITY)
+
+**Focus:** Verify code is valid for the tech stack
+
+**Instructions for Agent 1:**
+
+```
+Analyze the git diff for invalid code based on the tech stack:
+
+1. **TypeScript/JavaScript Syntax**
+   - Check for valid TypeScript syntax (no invalid type annotations, correct import/export syntax)
+   - Verify Node.js API usage is compatible with Node.js >=22.0.0 <23.0.0
+   - Check for deprecated APIs or features not available in the Node.js version
+   - Verify ES module syntax (type: "module" in package.json)
+
+2. **React 19.2.3 Compatibility**
+   - Check for deprecated React APIs or patterns
+   - Verify hooks usage is correct for React 19
+   - Check for invalid JSX syntax
+   - Verify component patterns match React 19 conventions
+
+3. **Express 5.2.1 Compatibility**
+   - Check for deprecated Express APIs
+   - Verify middleware usage is correct for Express 5
+   - Check request/response handling patterns
+
+4. **Type Safety**
+   - Verify TypeScript types are correctly used
+   - Check for `any` types that should be properly typed
+   - Verify type imports/exports are correct
+   - Check for missing type definitions
+
+5. **Build System Compatibility**
+   - Verify Vite-specific code (imports, config) is valid
+   - Check Electron-specific APIs are used correctly
+   - Verify module resolution paths are correct
+
+6. **Package Dependencies**
+   - Check for imports from packages not in package.json
+   - Verify version compatibility between dependencies
+   - Check for circular dependencies
+
+Provide a detailed report with:
+- File paths and line numbers of invalid code
+- Specific error description (what's wrong and why)
+- Expected vs actual behavior
+- Priority level (CRITICAL for build-breaking issues)
+```
+
+#### Agent 2: Security Vulnerability Scanner
+
+**Focus:** Security issues and vulnerabilities
+
+**Instructions for Agent 2:**
+
+```
+Analyze the git diff for security vulnerabilities:
+
+1. **Injection Vulnerabilities**
+   - SQL injection (if applicable)
+   - Command injection (exec, spawn, etc.)
+   - Path traversal vulnerabilities
+   - XSS vulnerabilities in React components
+
+2. **Authentication & Authorization**
+   - Missing authentication checks
+   - Insecure token handling
+   - Authorization bypasses
+   - Session management issues
+
+3. **Data Handling**
+   - Unsafe deserialization
+   - Insecure file operations
+   - Missing input validation
+   - Sensitive data exposure (secrets, tokens, passwords)
+
+4. **Dependencies**
+   - Known vulnerable packages
+   - Insecure dependency versions
+   - Missing security patches
+
+5. **API Security**
+   - Missing CORS configuration
+   - Insecure API endpoints
+   - Missing rate limiting
+   - Insecure WebSocket connections
+
+6. **Electron-Specific**
+   - Insecure IPC communication
+   - Missing context isolation checks
+   - Insecure preload scripts
+   - Missing CSP headers
+
+Provide a detailed report with:
+- Vulnerability type and severity (CRITICAL, HIGH, MEDIUM, LOW)
+- File paths and line numbers
+- Attack vector description
+- Recommended fix approach
+```
+
+#### Agent 3: Code Quality & Clean Code
+
+**Focus:** Dirty code, code smells, and quality issues
+
+**Instructions for Agent 3:**
+
+```
+Analyze the git diff for code quality issues:
+
+1. **Code Smells**
+   - Long functions/methods (>50 lines)
+   - High cyclomatic complexity
+   - Duplicate code
+   - Dead code
+   - Magic numbers/strings
+
+2. **Best Practices**
+   - Missing error handling
+   - Inconsistent naming conventions
+   - Poor separation of concerns
+   - Tight coupling
+   - Missing comments for complex logic
+
+3. **Performance Issues**
+   - Inefficient algorithms
+   - Memory leaks (event listeners, subscriptions)
+   - Unnecessary re-renders in React
+   - Missing memoization where needed
+   - Inefficient database queries (if applicable)
+
+4. **Maintainability**
+   - Hard-coded values
+   - Missing type definitions
+   - Inconsistent code style
+   - Poor file organization
+   - Missing tests for new code
+
+5. **React-Specific**
+   - Missing key props in lists
+   - Direct state mutations
+   - Missing cleanup in useEffect
+   - Unnecessary useState/useEffect
+   - Prop drilling issues
+
+Provide a detailed report with:
+- Issue type and severity
+- File paths and line numbers
+- Description of the problem
+- Impact on maintainability/performance
+- Recommended refactoring approach
+```
+
+#### Agent 4: Implementation Correctness
+
+**Focus:** Verify code implements requirements correctly
+
+**Instructions for Agent 4:**
+
+```
+Analyze the git diff for implementation correctness:
+
+1. **Logic Errors**
+   - Incorrect conditional logic
+   - Wrong variable usage
+   - Off-by-one errors
+   - Race conditions
+   - Missing null/undefined checks
+
+2. **Functional Requirements**
+   - Missing features from requirements
+   - Incorrect feature implementation
+   - Edge cases not handled
+   - Missing validation
+
+3. **Integration Issues**
+   - Incorrect API usage
+   - Wrong data format handling
+   - Missing error handling for external calls
+   - Incorrect state management
+
+4. **Type Errors**
+   - Type mismatches
+   - Missing type guards
+   - Incorrect type assertions
+   - Unsafe type operations
+
+5. **Testing Gaps**
+   - Missing unit tests
+   - Missing integration tests
+   - Tests don't cover edge cases
+   - Tests are incorrect
+
+Provide a detailed report with:
+- Issue description
+- File paths and line numbers
+- Expected vs actual behavior
+- Steps to reproduce (if applicable)
+- Recommended fix
+```
+
+#### Agent 5: Architecture & Design Patterns
+
+**Focus:** Architectural issues and design pattern violations
+
+**Instructions for Agent 5:**
+
+```
+Analyze the git diff for architectural and design issues:
+
+1. **Architecture Violations**
+   - Violation of project structure patterns
+   - Incorrect layer separation
+   - Missing abstractions
+   - Tight coupling between modules
+
+2. **Design Patterns**
+   - Incorrect pattern usage
+   - Missing patterns where needed
+   - Anti-patterns
+
+3. **Project-Specific Patterns**
+   - Check against project documentation (docs/ folder)
+   - Verify route organization (server routes)
+   - Check provider patterns (server providers)
+   - Verify component organization (UI components)
+
+4. **API Design**
+   - RESTful API violations
+   - Inconsistent response formats
+   - Missing error handling
+   - Incorrect status codes
+
+5. **State Management**
+   - Incorrect state management patterns
+   - Missing state normalization
+   - Inefficient state updates
+
+Provide a detailed report with:
+- Architectural issue description
+- File paths and affected areas
+- Impact on system design
+- Recommended architectural changes
+```
+
+### Phase 3: Consolidate Findings
+
+After all 5 deep dive agents complete their analysis:
+
+1. **Collect all findings** from each agent
+2. **Prioritize issues**:
+   - CRITICAL: Tech stack invalid code (build-breaking)
+   - HIGH: Security vulnerabilities, critical logic errors
+   - MEDIUM: Code quality issues, architectural problems
+   - LOW: Minor code smells, style issues
+
+3. **Group by file** to understand impact per file
+4. **Create a master report** summarizing all findings
+
+### Phase 4: Deepcode Fixes (5 Agents)
+
+Launch 5 deepcode agents to fix the issues found. Each agent should be invoked with the `@deepcode` agent.
+
+#### Deepcode Agent 1: Fix Tech Stack Invalid Code
+
+**Priority:** CRITICAL - Fix first
+
+**Instructions:**
+
+```
+Fix all invalid code based on tech stack issues identified by Agent 1.
+
+Focus on:
+1. Fixing TypeScript syntax errors
+2. Updating deprecated Node.js APIs
+3. Fixing React 19 compatibility issues
+4. Correcting Express 5 API usage
+5. Fixing type errors
+6. Resolving build-breaking issues
+
+After fixes, verify:
+- Code compiles without errors
+- TypeScript types are correct
+- No deprecated API usage
+```
+
+#### Deepcode Agent 2: Fix Security Vulnerabilities
+
+**Priority:** HIGH
+
+**Instructions:**
+
+```
+Fix all security vulnerabilities identified by Agent 2.
+
+Focus on:
+1. Adding input validation
+2. Fixing injection vulnerabilities
+3. Securing authentication/authorization
+4. Fixing insecure data handling
+5. Updating vulnerable dependencies
+6. Securing Electron IPC
+
+After fixes, verify:
+- Security vulnerabilities are addressed
+- No sensitive data exposure
+- Proper authentication/authorization
+```
+
+#### Deepcode Agent 3: Refactor Dirty Code
+
+**Priority:** MEDIUM
+
+**Instructions:**
+
+```
+Refactor code quality issues identified by Agent 3.
+
+Focus on:
+1. Extracting long functions
+2. Reducing complexity
+3. Removing duplicate code
+4. Adding error handling
+5. Improving React component structure
+6. Adding missing comments
+
+After fixes, verify:
+- Code follows best practices
+- No code smells remain
+- Performance optimizations applied
+```
+
+#### Deepcode Agent 4: Fix Implementation Errors
+
+**Priority:** HIGH
+
+**Instructions:**
+
+```
+Fix implementation correctness issues identified by Agent 4.
+
+Focus on:
+1. Fixing logic errors
+2. Adding missing features
+3. Handling edge cases
+4. Fixing type errors
+5. Adding missing tests
+
+After fixes, verify:
+- Logic is correct
+- Edge cases handled
+- Tests pass
+```
+
+#### Deepcode Agent 5: Fix Architectural Issues
+
+**Priority:** MEDIUM
+
+**Instructions:**
+
+```
+Fix architectural issues identified by Agent 5.
+
+Focus on:
+1. Correcting architecture violations
+2. Applying proper design patterns
+3. Fixing API design issues
+4. Improving state management
+5. Following project patterns
+
+After fixes, verify:
+- Architecture is sound
+- Patterns are correctly applied
+- Code follows project structure
+```
+
+### Phase 5: Verification
+
+After all fixes are complete:
+
+1. **Run TypeScript compilation check**
+
+   ```bash
+   npm run build:packages
+   ```
+
+2. **Run linting**
+
+   ```bash
+   npm run lint
+   ```
+
+3. **Run tests** (if applicable)
+
+   ```bash
+   npm run test:server
+   npm run test
+   ```
+
+4. **Verify git diff** shows only intended changes
+
+   ```bash
+   git diff HEAD
+   ```
+
+5. **Create summary report**:
+   - Issues found by each agent
+   - Issues fixed by each agent
+   - Remaining issues (if any)
+   - Verification results
+
+## Workflow Summary
+
+1. ✅ Accept optional target branch argument (defaults to main/master if not provided)
+2. ✅ Determine current branch and target branch (from argument or auto-detect main/master)
+3. ✅ Get git diff comparing current branch against target branch (`git diff $TARGET_REF...HEAD`)
+4. ✅ Include uncommitted changes in analysis (`git diff HEAD`, `git diff --cached`)
+5. ✅ Launch 5 deep dive agents (parallel analysis) with branch diff
+6. ✅ Consolidate findings and prioritize
+7. ✅ Launch 5 deepcode agents (sequential fixes, priority order)
+8. ✅ Verify fixes with build/lint/test
+9. ✅ Report summary
+
+## Notes
+
+- **Tech stack validation is HIGHEST PRIORITY** - invalid code must be fixed first
+- **Target branch argument**: The command accepts an optional target branch name as the first argument. If not provided, it automatically detects and uses `main` or `master` (in that order)
+- Each deep dive agent should work independently and provide comprehensive analysis
+- Deepcode agents should fix issues in priority order
+- All fixes should maintain existing functionality
+- If an agent finds no issues in their domain, they should report "No issues found"
+- If fixes introduce new issues, they should be caught in verification phase
+- The target branch is validated to ensure it exists (locally or remotely) before proceeding with the review
--- a/.claude/commands/gh-issue.md
+++ b/.claude/commands/gh-issue.md
@@ -0,0 +1,74 @@
+# GitHub Issue Fix Command
+
+Fetch a GitHub issue by number, verify it's a real issue, and fix it if valid.
+
+## Usage
+
+This command accepts a GitHub issue number as input (e.g., `123`).
+
+## Instructions
+
+1. **Get the issue number from the user**
+   - The issue number should be provided as an argument to this command
+   - If no number is provided, ask the user for it
+
+2. **Fetch the GitHub issue**
+   - Determine the current project path (check if there's a current project context)
+   - Verify the project has a GitHub remote:
+     ```bash
+     git remote get-url origin
+     ```
+   - Fetch the issue details using GitHub CLI:
+     ```bash
+     gh issue view <ISSUE_NUMBER> --json number,title,state,author,createdAt,labels,url,body,assignees
+     ```
+   - If the command fails, report the error and stop
+
+3. **Verify the issue is real and valid**
+   - Check that the issue exists (not 404)
+   - Check the issue state:
+     - If **closed**: Inform the user and ask if they still want to proceed
+     - If **open**: Proceed with validation
+   - Review the issue content:
+     - Read the title and body to understand what needs to be fixed
+     - Check labels for context (bug, enhancement, etc.)
+     - Note any assignees or linked PRs
+
+4. **Validate the issue**
+   - Determine if this is a legitimate issue that needs fixing:
+     - Is the description clear and actionable?
+     - Does it describe a real problem or feature request?
+     - Are there any obvious signs it's spam or invalid?
+   - If the issue seems invalid or unclear:
+     - Report findings to the user
+     - Ask if they want to proceed anyway
+     - Stop if user confirms it's not valid
+
+5. **If the issue is valid, proceed to fix it**
+   - Analyze what needs to be done based on the issue description
+   - Check the current codebase state:
+     - Run relevant tests to see current behavior
+     - Check if the issue is already fixed
+     - Look for related code that might need changes
+   - Implement the fix:
+     - Make necessary code changes
+     - Update or add tests as needed
+     - Ensure the fix addresses the issue description
+   - Verify the fix:
+     - Run tests to ensure nothing broke
+     - If possible, manually verify the fix addresses the issue
+
+6. **Report summary**
+   - Issue number and title
+   - Issue state (open/closed)
+   - Whether the issue was validated as real
+   - What was fixed (if anything)
+   - Any tests that were updated or added
+   - Next steps (if any)
+
+## Error Handling
+
+- If GitHub CLI (`gh`) is not installed or authenticated, report error and stop
+- If the project doesn't have a GitHub remote, report error and stop
+- If the issue number doesn't exist, report error and stop
+- If the issue is unclear or invalid, report findings and ask user before proceeding
--- a/.claude/commands/release.md
+++ b/.claude/commands/release.md
@@ -0,0 +1,77 @@
+# Release Command
+
+Bump the package.json version (major, minor, or patch) and build the Electron app with the new version.
+
+## Usage
+
+This command accepts a version bump type as input:
+
+- `patch` - Bump patch version (0.1.0 -> 0.1.1)
+- `minor` - Bump minor version (0.1.0 -> 0.2.0)
+- `major` - Bump major version (0.1.0 -> 1.0.0)
+
+## Instructions
+
+1. **Get the bump type from the user**
+   - The bump type should be provided as an argument (patch, minor, or major)
+   - If no type is provided, ask the user which type they want
+
+2. **Bump the version**
+   - Run the version bump script:
+     ```bash
+     node apps/ui/scripts/bump-version.mjs <type>
+     ```
+   - This updates both `apps/ui/package.json` and `apps/server/package.json` with the new version (keeps them in sync)
+   - Verify the version was updated correctly by checking the output
+
+3. **Build the Electron app**
+   - Run the electron build:
+     ```bash
+     npm run build:electron --workspace=apps/ui
+     ```
+   - The build process automatically:
+     - Uses the version from `package.json` for artifact names (e.g., `Automaker-1.2.3-x64.zip`)
+     - Injects the version into the app via Vite's `__APP_VERSION__` constant
+     - Displays the version below the logo in the sidebar
+
+4. **Commit the version bump**
+   - Stage the updated package.json files:
+     ```bash
+     git add apps/ui/package.json apps/server/package.json
+     ```
+   - Commit with a release message:
+     ```bash
+     git commit -m "chore: release v<version>"
+     ```
+
+5. **Create and push the git tag**
+   - Create an annotated tag for the release:
+     ```bash
+     git tag -a v<version> -m "Release v<version>"
+     ```
+   - Push the commit and tag to remote:
+     ```bash
+     git push && git push --tags
+     ```
+
+6. **Verify the release**
+   - Check that the build completed successfully
+   - Confirm the version appears correctly in the built artifacts
+   - The version will be displayed in the app UI below the logo
+   - Verify the tag is visible on the remote repository
+
+## Version Centralization
+
+The version is centralized and synchronized in both `apps/ui/package.json` and `apps/server/package.json`:
+
+- **Electron builds**: Automatically read from `apps/ui/package.json` via electron-builder's `${version}` variable in `artifactName`
+- **App display**: Injected at build time via Vite's `define` config as `__APP_VERSION__` constant (defined in `apps/ui/vite.config.mts`)
+- **Server API**: Read from `apps/server/package.json` via `apps/server/src/lib/version.ts` utility (used in health check endpoints)
+- **Type safety**: Defined in `apps/ui/src/vite-env.d.ts` as `declare const __APP_VERSION__: string`
+
+This ensures consistency across:
+
+- Build artifact names (e.g., `Automaker-1.2.3-x64.zip`)
+- App UI display (shown as `v1.2.3` below the logo in `apps/ui/src/components/layout/sidebar/components/automaker-logo.tsx`)
+- Server health endpoints (`/` and `/detailed`)
+- Package metadata (both UI and server packages stay in sync)
--- a/.claude/commands/review.md
+++ b/.claude/commands/review.md
@@ -0,0 +1,484 @@
+# Code Review Command
+
+Comprehensive code review using multiple deep dive agents to analyze git diff for correctness, security, code quality, and tech stack compliance, followed by automated fixes using deepcode agents.
+
+## Usage
+
+This command analyzes all changes in the git diff and verifies:
+
+1. **Invalid code based on tech stack** (HIGHEST PRIORITY)
+2. Security vulnerabilities
+3. Code quality issues (dirty code)
+4. Implementation correctness
+
+Then automatically fixes any issues found.
+
+## Instructions
+
+### Phase 1: Get Git Diff
+
+1. **Get the current git diff**
+
+   ```bash
+   git diff HEAD
+   ```
+
+   If you need staged changes instead:
+
+   ```bash
+   git diff --cached
+   ```
+
+   Or for a specific commit range:
+
+   ```bash
+   git diff <base-branch>
+   ```
+
+2. **Get list of changed files**
+
+   ```bash
+   git diff --name-only HEAD
+   ```
+
+3. **Understand the tech stack** (for validation):
+   - **Node.js**: >=22.0.0 <23.0.0
+   - **TypeScript**: 5.9.3
+   - **React**: 19.2.3
+   - **Express**: 5.2.1
+   - **Electron**: 39.2.7
+   - **Vite**: 7.3.0
+   - **Vitest**: 4.0.16
+   - Check `package.json` files for exact versions
+
+### Phase 2: Deep Dive Analysis (5 Agents)
+
+Launch 5 separate deep dive agents, each with a specific focus area. Each agent should be invoked with the `@deepdive` agent and given the git diff along with their specific instructions.
+
+#### Agent 1: Tech Stack Validation (HIGHEST PRIORITY)
+
+**Focus:** Verify code is valid for the tech stack
+
+**Instructions for Agent 1:**
+
+```
+Analyze the git diff for invalid code based on the tech stack:
+
+1. **TypeScript/JavaScript Syntax**
+   - Check for valid TypeScript syntax (no invalid type annotations, correct import/export syntax)
+   - Verify Node.js API usage is compatible with Node.js >=22.0.0 <23.0.0
+   - Check for deprecated APIs or features not available in the Node.js version
+   - Verify ES module syntax (type: "module" in package.json)
+
+2. **React 19.2.3 Compatibility**
+   - Check for deprecated React APIs or patterns
+   - Verify hooks usage is correct for React 19
+   - Check for invalid JSX syntax
+   - Verify component patterns match React 19 conventions
+
+3. **Express 5.2.1 Compatibility**
+   - Check for deprecated Express APIs
+   - Verify middleware usage is correct for Express 5
+   - Check request/response handling patterns
+
+4. **Type Safety**
+   - Verify TypeScript types are correctly used
+   - Check for `any` types that should be properly typed
+   - Verify type imports/exports are correct
+   - Check for missing type definitions
+
+5. **Build System Compatibility**
+   - Verify Vite-specific code (imports, config) is valid
+   - Check Electron-specific APIs are used correctly
+   - Verify module resolution paths are correct
+
+6. **Package Dependencies**
+   - Check for imports from packages not in package.json
+   - Verify version compatibility between dependencies
+   - Check for circular dependencies
+
+Provide a detailed report with:
+- File paths and line numbers of invalid code
+- Specific error description (what's wrong and why)
+- Expected vs actual behavior
+- Priority level (CRITICAL for build-breaking issues)
+```
+
+#### Agent 2: Security Vulnerability Scanner
+
+**Focus:** Security issues and vulnerabilities
+
+**Instructions for Agent 2:**
+
+```
+Analyze the git diff for security vulnerabilities:
+
+1. **Injection Vulnerabilities**
+   - SQL injection (if applicable)
+   - Command injection (exec, spawn, etc.)
+   - Path traversal vulnerabilities
+   - XSS vulnerabilities in React components
+
+2. **Authentication & Authorization**
+   - Missing authentication checks
+   - Insecure token handling
+   - Authorization bypasses
+   - Session management issues
+
+3. **Data Handling**
+   - Unsafe deserialization
+   - Insecure file operations
+   - Missing input validation
+   - Sensitive data exposure (secrets, tokens, passwords)
+
+4. **Dependencies**
+   - Known vulnerable packages
+   - Insecure dependency versions
+   - Missing security patches
+
+5. **API Security**
+   - Missing CORS configuration
+   - Insecure API endpoints
+   - Missing rate limiting
+   - Insecure WebSocket connections
+
+6. **Electron-Specific**
+   - Insecure IPC communication
+   - Missing context isolation checks
+   - Insecure preload scripts
+   - Missing CSP headers
+
+Provide a detailed report with:
+- Vulnerability type and severity (CRITICAL, HIGH, MEDIUM, LOW)
+- File paths and line numbers
+- Attack vector description
+- Recommended fix approach
+```
+
+#### Agent 3: Code Quality & Clean Code
+
+**Focus:** Dirty code, code smells, and quality issues
+
+**Instructions for Agent 3:**
+
+```
+Analyze the git diff for code quality issues:
+
+1. **Code Smells**
+   - Long functions/methods (>50 lines)
+   - High cyclomatic complexity
+   - Duplicate code
+   - Dead code
+   - Magic numbers/strings
+
+2. **Best Practices**
+   - Missing error handling
+   - Inconsistent naming conventions
+   - Poor separation of concerns
+   - Tight coupling
+   - Missing comments for complex logic
+
+3. **Performance Issues**
+   - Inefficient algorithms
+   - Memory leaks (event listeners, subscriptions)
+   - Unnecessary re-renders in React
+   - Missing memoization where needed
+   - Inefficient database queries (if applicable)
+
+4. **Maintainability**
+   - Hard-coded values
+   - Missing type definitions
+   - Inconsistent code style
+   - Poor file organization
+   - Missing tests for new code
+
+5. **React-Specific**
+   - Missing key props in lists
+   - Direct state mutations
+   - Missing cleanup in useEffect
+   - Unnecessary useState/useEffect
+   - Prop drilling issues
+
+Provide a detailed report with:
+- Issue type and severity
+- File paths and line numbers
+- Description of the problem
+- Impact on maintainability/performance
+- Recommended refactoring approach
+```
+
+#### Agent 4: Implementation Correctness
+
+**Focus:** Verify code implements requirements correctly
+
+**Instructions for Agent 4:**
+
+```
+Analyze the git diff for implementation correctness:
+
+1. **Logic Errors**
+   - Incorrect conditional logic
+   - Wrong variable usage
+   - Off-by-one errors
+   - Race conditions
+   - Missing null/undefined checks
+
+2. **Functional Requirements**
+   - Missing features from requirements
+   - Incorrect feature implementation
+   - Edge cases not handled
+   - Missing validation
+
+3. **Integration Issues**
+   - Incorrect API usage
+   - Wrong data format handling
+   - Missing error handling for external calls
+   - Incorrect state management
+
+4. **Type Errors**
+   - Type mismatches
+   - Missing type guards
+   - Incorrect type assertions
+   - Unsafe type operations
+
+5. **Testing Gaps**
+   - Missing unit tests
+   - Missing integration tests
+   - Tests don't cover edge cases
+   - Tests are incorrect
+
+Provide a detailed report with:
+- Issue description
+- File paths and line numbers
+- Expected vs actual behavior
+- Steps to reproduce (if applicable)
+- Recommended fix
+```
+
+#### Agent 5: Architecture & Design Patterns
+
+**Focus:** Architectural issues and design pattern violations
+
+**Instructions for Agent 5:**
+
+```
+Analyze the git diff for architectural and design issues:
+
+1. **Architecture Violations**
+   - Violation of project structure patterns
+   - Incorrect layer separation
+   - Missing abstractions
+   - Tight coupling between modules
+
+2. **Design Patterns**
+   - Incorrect pattern usage
+   - Missing patterns where needed
+   - Anti-patterns
+
+3. **Project-Specific Patterns**
+   - Check against project documentation (docs/ folder)
+   - Verify route organization (server routes)
+   - Check provider patterns (server providers)
+   - Verify component organization (UI components)
+
+4. **API Design**
+   - RESTful API violations
+   - Inconsistent response formats
+   - Missing error handling
+   - Incorrect status codes
+
+5. **State Management**
+   - Incorrect state management patterns
+   - Missing state normalization
+   - Inefficient state updates
+
+Provide a detailed report with:
+- Architectural issue description
+- File paths and affected areas
+- Impact on system design
+- Recommended architectural changes
+```
+
+### Phase 3: Consolidate Findings
+
+After all 5 deep dive agents complete their analysis:
+
+1. **Collect all findings** from each agent
+2. **Prioritize issues**:
+   - CRITICAL: Tech stack invalid code (build-breaking)
+   - HIGH: Security vulnerabilities, critical logic errors
+   - MEDIUM: Code quality issues, architectural problems
+   - LOW: Minor code smells, style issues
+
+3. **Group by file** to understand impact per file
+4. **Create a master report** summarizing all findings
+
+### Phase 4: Deepcode Fixes (5 Agents)
+
+Launch 5 deepcode agents to fix the issues found. Each agent should be invoked with the `@deepcode` agent.
+
+#### Deepcode Agent 1: Fix Tech Stack Invalid Code
+
+**Priority:** CRITICAL - Fix first
+
+**Instructions:**
+
+```
+Fix all invalid code based on tech stack issues identified by Agent 1.
+
+Focus on:
+1. Fixing TypeScript syntax errors
+2. Updating deprecated Node.js APIs
+3. Fixing React 19 compatibility issues
+4. Correcting Express 5 API usage
+5. Fixing type errors
+6. Resolving build-breaking issues
+
+After fixes, verify:
+- Code compiles without errors
+- TypeScript types are correct
+- No deprecated API usage
+```
+
+#### Deepcode Agent 2: Fix Security Vulnerabilities
+
+**Priority:** HIGH
+
+**Instructions:**
+
+```
+Fix all security vulnerabilities identified by Agent 2.
+
+Focus on:
+1. Adding input validation
+2. Fixing injection vulnerabilities
+3. Securing authentication/authorization
+4. Fixing insecure data handling
+5. Updating vulnerable dependencies
+6. Securing Electron IPC
+
+After fixes, verify:
+- Security vulnerabilities are addressed
+- No sensitive data exposure
+- Proper authentication/authorization
+```
+
+#### Deepcode Agent 3: Refactor Dirty Code
+
+**Priority:** MEDIUM
+
+**Instructions:**
+
+```
+Refactor code quality issues identified by Agent 3.
+
+Focus on:
+1. Extracting long functions
+2. Reducing complexity
+3. Removing duplicate code
+4. Adding error handling
+5. Improving React component structure
+6. Adding missing comments
+
+After fixes, verify:
+- Code follows best practices
+- No code smells remain
+- Performance optimizations applied
+```
+
+#### Deepcode Agent 4: Fix Implementation Errors
+
+**Priority:** HIGH
+
+**Instructions:**
+
+```
+Fix implementation correctness issues identified by Agent 4.
+
+Focus on:
+1. Fixing logic errors
+2. Adding missing features
+3. Handling edge cases
+4. Fixing type errors
+5. Adding missing tests
+
+After fixes, verify:
+- Logic is correct
+- Edge cases handled
+- Tests pass
+```
+
+#### Deepcode Agent 5: Fix Architectural Issues
+
+**Priority:** MEDIUM
+
+**Instructions:**
+
+```
+Fix architectural issues identified by Agent 5.
+
+Focus on:
+1. Correcting architecture violations
+2. Applying proper design patterns
+3. Fixing API design issues
+4. Improving state management
+5. Following project patterns
+
+After fixes, verify:
+- Architecture is sound
+- Patterns are correctly applied
+- Code follows project structure
+```
+
+### Phase 5: Verification
+
+After all fixes are complete:
+
+1. **Run TypeScript compilation check**
+
+   ```bash
+   npm run build:packages
+   ```
+
+2. **Run linting**
+
+   ```bash
+   npm run lint
+   ```
+
+3. **Run tests** (if applicable)
+
+   ```bash
+   npm run test:server
+   npm run test
+   ```
+
+4. **Verify git diff** shows only intended changes
+
+   ```bash
+   git diff HEAD
+   ```
+
+5. **Create summary report**:
+   - Issues found by each agent
+   - Issues fixed by each agent
+   - Remaining issues (if any)
+   - Verification results
+
+## Workflow Summary
+
+1. ✅ Get git diff
+2. ✅ Launch 5 deep dive agents (parallel analysis)
+3. ✅ Consolidate findings and prioritize
+4. ✅ Launch 5 deepcode agents (sequential fixes, priority order)
+5. ✅ Verify fixes with build/lint/test
+6. ✅ Report summary
+
+## Notes
+
+- **Tech stack validation is HIGHEST PRIORITY** - invalid code must be fixed first
+- Each deep dive agent should work independently and provide comprehensive analysis
+- Deepcode agents should fix issues in priority order
+- All fixes should maintain existing functionality
+- If an agent finds no issues in their domain, they should report "No issues found"
+- If fixes introduce new issues, they should be caught in verification phase
--- a/.claude/commands/thorough.md
+++ b/.claude/commands/thorough.md
@@ -0,0 +1,45 @@
+When you think you are done, you are NOT done.
+
+You must run a mandatory 3-pass verification before concluding:
+
+## Pass 1: Correctness & Functionality
+
+- [ ] Verify logic matches requirements and specifications
+- [ ] Check type safety (TypeScript types are correct and complete)
+- [ ] Ensure imports are correct and follow project conventions
+- [ ] Verify all functions/classes work as intended
+- [ ] Check that return values and side effects are correct
+- [ ] Run relevant tests if they exist, or verify testability
+- [ ] Confirm integration with existing code works properly
+
+## Pass 2: Edge Cases & Safety
+
+- [ ] Handle null/undefined inputs gracefully
+- [ ] Validate all user inputs and external data
+- [ ] Check error handling (try/catch, error boundaries, etc.)
+- [ ] Verify security considerations (no sensitive data exposure, proper auth checks)
+- [ ] Test boundary conditions (empty arrays, zero values, max lengths, etc.)
+- [ ] Ensure resource cleanup (file handles, connections, timers)
+- [ ] Check for potential race conditions or async issues
+- [ ] Verify file path security (no directory traversal vulnerabilities)
+
+## Pass 3: Maintainability & Code Quality
+
+- [ ] Code follows project style guide and conventions
+- [ ] Functions/classes are single-purpose and well-named
+- [ ] Remove dead code, unused imports, and console.logs
+- [ ] Extract magic numbers/strings into named constants
+- [ ] Check for code duplication (DRY principle)
+- [ ] Verify appropriate abstraction levels (not over/under-engineered)
+- [ ] Add necessary comments for complex logic
+- [ ] Ensure consistent error messages and logging
+- [ ] Check that code is readable and self-documenting
+- [ ] Verify proper separation of concerns
+
+**For each pass, explicitly report:**
+
+- What you checked
+- Any issues found and how they were fixed
+- Any remaining concerns or trade-offs
+
+Only after completing all three passes with explicit findings may you conclude the work is done.
--- a/.claude/commands/validate-build.md
+++ b/.claude/commands/validate-build.md
@@ -0,0 +1,49 @@
+# Project Build and Fix Command
+
+Run all builds and intelligently fix any failures based on what changed.
+
+## Instructions
+
+1. **Run the build**
+
+   ```bash
+   npm run build
+   ```
+
+   This builds all packages and the UI application.
+
+2. **If the build succeeds**, report success and stop.
+
+3. **If the build fails**, analyze the failures:
+   - Note which build step failed and the error messages
+   - Check for TypeScript compilation errors, missing dependencies, or configuration issues
+   - Run `git diff main` to see what code has changed
+
+4. **Determine the nature of the failure**:
+   - **If the failure is due to intentional changes** (new features, refactoring, dependency updates):
+     - Fix any TypeScript type errors introduced by the changes
+     - Update build configuration if needed (e.g., tsconfig.json, vite.config.mts)
+     - Ensure all new dependencies are properly installed
+     - Fix import paths or module resolution issues
+
+   - **If the failure appears to be a regression** (broken imports, missing files, configuration errors):
+     - Fix the source code to restore the build
+     - Check for accidentally deleted files or broken references
+     - Verify build configuration files are correct
+
+5. **Common build issues to check**:
+   - **TypeScript errors**: Fix type mismatches, missing types, or incorrect imports
+   - **Missing dependencies**: Run `npm install` if packages are missing
+   - **Import/export errors**: Fix incorrect import paths or missing exports
+   - **Build configuration**: Check tsconfig.json, vite.config.mts, or other build configs
+   - **Package build order**: Ensure `build:packages` completes before building apps
+
+6. **How to decide if it's intentional vs regression**:
+   - Look at the git diff and commit messages
+   - If the change was deliberate and introduced new code that needs fixing → fix the new code
+   - If the change broke existing functionality that should still build → fix the regression
+   - When in doubt, ask the user
+
+7. **After making fixes**, re-run the build to verify everything compiles successfully.
+
+8. **Report summary** of what was fixed (TypeScript errors, configuration issues, missing dependencies, etc.).
--- a/.claude/commands/validate-tests.md
+++ b/.claude/commands/validate-tests.md
@@ -0,0 +1,36 @@
+# Project Test and Fix Command
+
+Run all tests and intelligently fix any failures based on what changed.
+
+## Instructions
+
+1. **Run all tests**
+
+   ```bash
+   npm run test:all
+   ```
+
+2. **If all tests pass**, report success and stop.
+
+3. **If any tests fail**, analyze the failures:
+   - Note which tests failed and their error messages
+   - Run `git diff main` to see what code has changed
+
+4. **Determine the nature of the change**:
+   - **If the logic change is intentional** (new feature, refactor, behavior change):
+     - Update the failing tests to match the new expected behavior
+     - The tests should reflect what the code NOW does correctly
+
+   - **If the logic change appears to be a bug** (regression, unintended side effect):
+     - Fix the source code to restore the expected behavior
+     - Do NOT modify the tests - they are catching a real bug
+
+5. **How to decide if it's a bug vs intentional change**:
+   - Look at the git diff and commit messages
+   - If the change was deliberate and the test expectations are now outdated → update tests
+   - If the change broke existing functionality that should still work → fix the code
+   - When in doubt, ask the user
+
+6. **After making fixes**, re-run the tests to verify everything passes.
+
+7. **Report summary** of what was fixed (tests updated vs code fixed).
--- a/.claude_settings.json
+++ b/.claude_settings.json
@@ -1,24 +0,0 @@
-{
-  "sandbox": {
-    "enabled": true,
-    "autoAllowBashIfSandboxed": true
-  },
-  "permissions": {
-    "defaultMode": "acceptEdits",
-    "allow": [
-      "Read(./**)",
-      "Write(./**)",
-      "Edit(./**)",
-      "Glob(./**)",
-      "Grep(./**)",
-      "Bash(*)",
-      "mcp__puppeteer__puppeteer_navigate",
-      "mcp__puppeteer__puppeteer_screenshot",
-      "mcp__puppeteer__puppeteer_click",
-      "mcp__puppeteer__puppeteer_fill",
-      "mcp__puppeteer__puppeteer_select",
-      "mcp__puppeteer__puppeteer_hover",
-      "mcp__puppeteer__puppeteer_evaluate"
-    ]
-  }
-}
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,19 @@
+# Dependencies
+node_modules/
+**/node_modules/
+
+# Build outputs
+dist/
+**/dist/
+dist-electron/
+**/dist-electron/
+build/
+**/build/
+.next/
+**/.next/
+.nuxt/
+**/.nuxt/
+out/
+**/out/
+.cache/
+**/.cache/
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,117 @@
+name: Bug Report
+description: File a bug report to help us improve Automaker
+title: '[Bug]: '
+labels: ['bug']
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to report a bug! Please fill out the form below with as much detail as possible.
+
+  - type: dropdown
+    id: operating-system
+    attributes:
+      label: Operating System
+      description: What operating system are you using?
+      options:
+        - macOS
+        - Windows
+        - Linux
+        - Other
+      default: 0
+    validations:
+      required: true
+
+  - type: dropdown
+    id: run-mode
+    attributes:
+      label: Run Mode
+      description: How are you running Automaker?
+      options:
+        - Electron (Desktop App)
+        - Web (Browser)
+        - Docker
+      default: 0
+    validations:
+      required: true
+
+  - type: input
+    id: app-version
+    attributes:
+      label: App Version
+      description: What version of Automaker are you using? (e.g., 0.1.0)
+      placeholder: '0.1.0'
+    validations:
+      required: true
+
+  - type: textarea
+    id: bug-description
+    attributes:
+      label: Bug Description
+      description: A clear and concise description of what the bug is.
+      placeholder: Describe the bug...
+    validations:
+      required: true
+
+  - type: textarea
+    id: steps-to-reproduce
+    attributes:
+      label: Steps to Reproduce
+      description: Steps to reproduce the behavior
+      placeholder: |
+        1. Go to '...'
+        2. Click on '...'
+        3. Scroll down to '...'
+        4. See error
+    validations:
+      required: true
+
+  - type: textarea
+    id: expected-behavior
+    attributes:
+      label: Expected Behavior
+      description: A clear and concise description of what you expected to happen.
+      placeholder: What should have happened?
+    validations:
+      required: true
+
+  - type: textarea
+    id: actual-behavior
+    attributes:
+      label: Actual Behavior
+      description: A clear and concise description of what actually happened.
+      placeholder: What actually happened?
+    validations:
+      required: true
+
+  - type: textarea
+    id: screenshots
+    attributes:
+      label: Screenshots
+      description: If applicable, add screenshots to help explain your problem.
+      placeholder: Drag and drop screenshots here or paste image URLs
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Relevant Logs
+      description: If applicable, paste relevant logs or error messages.
+      placeholder: Paste logs here...
+      render: shell
+
+  - type: textarea
+    id: additional-context
+    attributes:
+      label: Additional Context
+      description: Add any other context about the problem here.
+      placeholder: Any additional information that might be helpful...
+
+  - type: checkboxes
+    id: terms
+    attributes:
+      label: Checklist
+      options:
+        - label: I have searched existing issues to ensure this bug hasn't been reported already
+          required: true
+        - label: I have provided all required information above
+          required: true
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,108 @@
+name: Feature Request
+description: Suggest a new feature or enhancement for Automaker
+title: '[Feature]: '
+labels: ['enhancement']
+body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to suggest a feature! Please fill out the form below to help us understand your request.
+
+  - type: dropdown
+    id: feature-area
+    attributes:
+      label: Feature Area
+      description: Which area of Automaker does this feature relate to?
+      options:
+        - UI/UX (User Interface)
+        - Agent/AI
+        - Kanban Board
+        - Git/Worktree Management
+        - Project Management
+        - Settings/Configuration
+        - Documentation
+        - Performance
+        - Other
+      default: 0
+    validations:
+      required: true
+
+  - type: dropdown
+    id: priority
+    attributes:
+      label: Priority
+      description: How important is this feature to your workflow?
+      options:
+        - Nice to have
+        - Would improve my workflow
+        - Critical for my use case
+      default: 0
+    validations:
+      required: true
+
+  - type: textarea
+    id: problem-statement
+    attributes:
+      label: Problem Statement
+      description: Is your feature request related to a problem? Please describe the problem you're trying to solve.
+      placeholder: A clear and concise description of what the problem is. Ex. I'm always frustrated when...
+    validations:
+      required: true
+
+  - type: textarea
+    id: proposed-solution
+    attributes:
+      label: Proposed Solution
+      description: Describe the solution you'd like to see implemented.
+      placeholder: A clear and concise description of what you want to happen.
+    validations:
+      required: true
+
+  - type: textarea
+    id: alternatives-considered
+    attributes:
+      label: Alternatives Considered
+      description: Describe any alternative solutions or workarounds you've considered.
+      placeholder: A clear and concise description of any alternative solutions or features you've considered.
+    validations:
+      required: false
+
+  - type: textarea
+    id: use-cases
+    attributes:
+      label: Use Cases
+      description: Describe specific scenarios where this feature would be useful.
+      placeholder: |
+        1. When working on...
+        2. As a user who needs to...
+        3. In situations where...
+    validations:
+      required: false
+
+  - type: textarea
+    id: mockups
+    attributes:
+      label: Mockups/Screenshots
+      description: If applicable, add mockups, wireframes, or screenshots to help illustrate your feature request.
+      placeholder: Drag and drop images here or paste image URLs
+    validations:
+      required: false
+
+  - type: textarea
+    id: additional-context
+    attributes:
+      label: Additional Context
+      description: Add any other context, references, or examples about the feature request here.
+      placeholder: Any additional information that might be helpful...
+    validations:
+      required: false
+
+  - type: checkboxes
+    id: terms
+    attributes:
+      label: Checklist
+      options:
+        - label: I have searched existing issues to ensure this feature hasn't been requested already
+          required: true
+        - label: I have provided a clear description of the problem and proposed solution
+          required: true
--- a/.github/actions/setup-project/action.yml
+++ b/.github/actions/setup-project/action.yml
@@ -41,7 +41,8 @@ runs:
      # Use npm install instead of npm ci to correctly resolve platform-specific
      # optional dependencies (e.g., @tailwindcss/oxide, lightningcss binaries)
      # Skip scripts to avoid electron-builder install-app-deps which uses too much memory
-      run: npm install --ignore-scripts
+      # Use --force to allow platform-specific dev dependencies like dmg-license on non-darwin platforms
+      run: npm install --ignore-scripts --force

    - name: Install Linux native bindings
      shell: bash
--- a/.github/workflows/e2e-tests.yml
+++ b/.github/workflows/e2e-tests.yml
@@ -31,24 +31,99 @@ jobs:
      - name: Build server
        run: npm run build --workspace=apps/server

+      - name: Set up Git user
+        run: |
+          git config --global user.name "GitHub CI"
+          git config --global user.email "ci@example.com"
+
      - name: Start backend server
-        run: npm run start --workspace=apps/server &
+        run: |
+          echo "Starting backend server..."
+          # Start server in background and save PID
+          npm run start --workspace=apps/server > backend.log 2>&1 &
+          SERVER_PID=$!
+          echo "Server started with PID: $SERVER_PID"
+          echo "SERVER_PID=$SERVER_PID" >> $GITHUB_ENV
+
        env:
          PORT: 3008
          NODE_ENV: test
+          # Use a deterministic API key so Playwright can log in reliably
+          AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
+          # Reduce log noise in CI
+          AUTOMAKER_HIDE_API_KEY: 'true'
+          # Avoid real API calls during CI
+          AUTOMAKER_MOCK_AGENT: 'true'
+          # Simulate containerized environment to skip sandbox confirmation dialogs
+          IS_CONTAINERIZED: 'true'

      - name: Wait for backend server
        run: |
          echo "Waiting for backend server to be ready..."
-          for i in {1..30}; do
-            if curl -s http://localhost:3008/api/health > /dev/null 2>&1; then
+
+          # Check if server process is running
+          if [ -z "$SERVER_PID" ]; then
+            echo "ERROR: Server PID not found in environment"
+            cat backend.log 2>/dev/null || echo "No backend log found"
+            exit 1
+          fi
+
+          # Check if process is actually running
+          if ! kill -0 $SERVER_PID 2>/dev/null; then
+            echo "ERROR: Server process $SERVER_PID is not running!"
+            echo "=== Backend logs ==="
+            cat backend.log
+            echo ""
+            echo "=== Recent system logs ==="
+            dmesg 2>/dev/null | tail -20 || echo "No dmesg available"
+            exit 1
+          fi
+
+          # Wait for health endpoint
+          for i in {1..60}; do
+            if curl -s -f http://localhost:3008/api/health > /dev/null 2>&1; then
              echo "Backend server is ready!"
+              echo "=== Backend logs ==="
+              cat backend.log
+              echo ""
+              echo "Health check response:"
+              curl -s http://localhost:3008/api/health | jq . 2>/dev/null || echo "Health check: $(curl -s http://localhost:3008/api/health 2>/dev/null || echo 'No response')"
              exit 0
            fi
-            echo "Waiting... ($i/30)"
+            
+            # Check if server process is still running
+            if ! kill -0 $SERVER_PID 2>/dev/null; then
+              echo "ERROR: Server process died during wait!"
+              echo "=== Backend logs ==="
+              cat backend.log
+              exit 1
+            fi
+            
+            echo "Waiting... ($i/60)"
            sleep 1
          done
-          echo "Backend server failed to start!"
+
+          echo "ERROR: Backend server failed to start within 60 seconds!"
+          echo "=== Backend logs ==="
+          cat backend.log
+          echo ""
+          echo "=== Process status ==="
+          ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
+          echo ""
+          echo "=== Port status ==="
+          netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"
+          lsof -i :3008 2>/dev/null || echo "lsof not available or port not in use"
+          echo ""
+          echo "=== Health endpoint test ==="
+          curl -v http://localhost:3008/api/health 2>&1 || echo "Health endpoint failed"
+
+          # Kill the server process if it's still hanging
+          if kill -0 $SERVER_PID 2>/dev/null; then
+            echo ""
+            echo "Killing stuck server process..."
+            kill -9 $SERVER_PID 2>/dev/null || true
+          fi
+
          exit 1

      - name: Run E2E tests
@@ -59,6 +134,20 @@ jobs:
          CI: true
          VITE_SERVER_URL: http://localhost:3008
          VITE_SKIP_SETUP: 'true'
+          # Keep UI-side login/defaults consistent
+          AUTOMAKER_API_KEY: test-api-key-for-e2e-tests
+
+      - name: Print backend logs on failure
+        if: failure()
+        run: |
+          echo "=== E2E Tests Failed - Backend Logs ==="
+          cat backend.log 2>/dev/null || echo "No backend log found"
+          echo ""
+          echo "=== Process status at failure ==="
+          ps aux | grep -E "(node|tsx)" | grep -v grep || echo "No node processes found"
+          echo ""
+          echo "=== Port status ==="
+          netstat -tlnp 2>/dev/null | grep :3008 || echo "Port 3008 not listening"

      - name: Upload Playwright report
        uses: actions/upload-artifact@v4
@@ -68,10 +157,22 @@ jobs:
          path: apps/ui/playwright-report/
          retention-days: 7

-      - name: Upload test results
+      - name: Upload test results (screenshots, traces, videos)
        uses: actions/upload-artifact@v4
-        if: failure()
+        if: always()
        with:
          name: test-results
-          path: apps/ui/test-results/
+          path: |
+            apps/ui/test-results/
          retention-days: 7
+          if-no-files-found: ignore
+
+      - name: Cleanup - Kill backend server
+        if: always()
+        run: |
+          if [ -n "$SERVER_PID" ]; then
+            echo "Cleaning up backend server (PID: $SERVER_PID)..."
+            kill $SERVER_PID 2>/dev/null || true
+            kill -9 $SERVER_PID 2>/dev/null || true
+            echo "Backend server cleanup complete"
+          fi
--- a/.github/workflows/format-check.yml
+++ b/.github/workflows/format-check.yml
@@ -25,7 +25,7 @@ jobs:
          cache-dependency-path: package-lock.json

      - name: Install dependencies
-        run: npm install --ignore-scripts
+        run: npm install --ignore-scripts --force

      - name: Check formatting
        run: npm run format:check
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -4,6 +4,9 @@ on:
  release:
    types: [published]

+permissions:
+  contents: write
+
 jobs:
  build:
    strategy:
@@ -35,6 +38,11 @@ jobs:
        with:
          check-lockfile: 'true'

+      - name: Install RPM build tools (Linux)
+        if: matrix.os == 'ubuntu-latest'
+        shell: bash
+        run: sudo apt-get update && sudo apt-get install -y rpm
+
      - name: Build Electron app (macOS)
        if: matrix.os == 'macos-latest'
        shell: bash
@@ -57,7 +65,10 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: macos-builds
-          path: apps/ui/release/*.{dmg,zip}
+          path: |
+            apps/ui/release/*.dmg
+            apps/ui/release/*.zip
+          if-no-files-found: error
          retention-days: 30

      - name: Upload Windows artifacts
@@ -66,6 +77,7 @@ jobs:
        with:
          name: windows-builds
          path: apps/ui/release/*.exe
+          if-no-files-found: error
          retention-days: 30

      - name: Upload Linux artifacts
@@ -73,7 +85,11 @@ jobs:
        uses: actions/upload-artifact@v4
        with:
          name: linux-builds
-          path: apps/ui/release/*.{AppImage,deb}
+          path: |
+            apps/ui/release/*.AppImage
+            apps/ui/release/*.deb
+            apps/ui/release/*.rpm
+          if-no-files-found: error
          retention-days: 30

  upload:
@@ -103,9 +119,13 @@ jobs:
      - name: Upload to GitHub Release
        uses: softprops/action-gh-release@v2
        with:
+          fail_on_unmatched_files: true
          files: |
-            artifacts/macos-builds/*
-            artifacts/windows-builds/*
-            artifacts/linux-builds/*
+            artifacts/macos-builds/*.dmg
+            artifacts/macos-builds/*.zip
+            artifacts/windows-builds/*.exe
+            artifacts/linux-builds/*.AppImage
+            artifacts/linux-builds/*.deb
+            artifacts/linux-builds/*.rpm
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/security-audit.yml
+++ b/.github/workflows/security-audit.yml
@@ -26,5 +26,5 @@ jobs:
          check-lockfile: 'true'

      - name: Run npm audit
-        run: npm audit --audit-level=moderate
+        run: npm audit --audit-level=critical
        continue-on-error: false
--- a/.gitignore
+++ b/.gitignore
@@ -73,10 +73,25 @@ blob-report/
 !.env.example
 !.env.local.example

+# Codex config (contains API keys)
+.codex/config.toml
+
 # TypeScript
 *.tsbuildinfo

 # Misc
 *.pem

-docker-compose.override.yml
+docker-compose.override.yml
+.claude/docker-compose.override.yml
+.claude/hans/
+
+pnpm-lock.yaml
+yarn.lock
+
+# Fork-specific workflow files (should never be committed)
+# API key files
+data/.api-key
+data/credentials.json
+data/
+.codex/
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@@ -1 +1,51 @@
-npx lint-staged
+#!/usr/bin/env sh
+
+# Try to load nvm if available (optional - works without it too)
+if [ -z "$NVM_DIR" ]; then
+  # Check for Herd's nvm first (macOS with Herd)
+  if [ -s "$HOME/Library/Application Support/Herd/config/nvm/nvm.sh" ]; then
+    export NVM_DIR="$HOME/Library/Application Support/Herd/config/nvm"
+  # Then check standard nvm location
+  elif [ -s "$HOME/.nvm/nvm.sh" ]; then
+    export NVM_DIR="$HOME/.nvm"
+  fi
+fi
+
+# Source nvm if found (silently skip if not available)
+[ -n "$NVM_DIR" ] && [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" 2>/dev/null
+
+# Load node version from .nvmrc if using nvm (silently skip if nvm not available or fails)
+if [ -f .nvmrc ] && command -v nvm >/dev/null 2>&1; then
+  # Check if Unix nvm was sourced (it's a shell function with NVM_DIR set)
+  if [ -n "$NVM_DIR" ] && type nvm 2>/dev/null | grep -q "function"; then
+    # Unix nvm: reads .nvmrc automatically
+    nvm use >/dev/null 2>&1 || true
+  else
+    # nvm-windows: needs explicit version from .nvmrc
+    NODE_VERSION=$(cat .nvmrc | tr -d '[:space:]')
+    if [ -n "$NODE_VERSION" ]; then
+      nvm use "$NODE_VERSION" >/dev/null 2>&1 || true
+    fi
+  fi
+fi
+
+# Ensure common system paths are in PATH (for systems without nvm)
+# This helps find node/npm installed via Homebrew, system packages, etc.
+if [ -n "$WINDIR" ]; then
+  export PATH="$PATH:/c/Program Files/nodejs:/c/Program Files (x86)/nodejs"
+  export PATH="$PATH:$APPDATA/npm:$LOCALAPPDATA/Programs/nodejs"
+else 
+  export PATH="$PATH:/usr/local/bin:/opt/homebrew/bin:/usr/bin"
+fi
+
+# Run lint-staged - works with or without nvm
+# Prefer npx, fallback to npm exec, both work with system-installed Node.js
+if command -v npx >/dev/null 2>&1; then
+  npx lint-staged
+elif command -v npm >/dev/null 2>&1; then
+  npm exec -- lint-staged
+else
+  echo "Error: Neither npx nor npm found in PATH."
+  echo "Please ensure Node.js is installed (via nvm, Homebrew, system package manager, etc.)"
+  exit 1
+fi
--- a/.nvmrc
+++ b/.nvmrc
@@ -0,0 +1,2 @@
+22
+
--- a/.prettierignore
+++ b/.prettierignore
@@ -23,6 +23,8 @@ pnpm-lock.yaml
 # Generated files
 *.min.js
 *.min.css
+routeTree.gen.ts
+apps/ui/src/routeTree.gen.ts

 # Test artifacts
 test-results/
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -0,0 +1,176 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+Automaker is an autonomous AI development studio built as an npm workspace monorepo. It provides a Kanban-based workflow where AI agents (powered by Claude Agent SDK) implement features in isolated git worktrees.
+
+## Common Commands
+
+```bash
+# Development
+npm run dev                 # Interactive launcher (choose web or electron)
+npm run dev:web             # Web browser mode (localhost:3007)
+npm run dev:electron        # Desktop app mode
+npm run dev:electron:debug  # Desktop with DevTools open
+
+# Building
+npm run build               # Build web application
+npm run build:packages      # Build all shared packages (required before other builds)
+npm run build:electron      # Build desktop app for current platform
+npm run build:server        # Build server only
+
+# Testing
+npm run test                # E2E tests (Playwright, headless)
+npm run test:headed         # E2E tests with browser visible
+npm run test:server         # Server unit tests (Vitest)
+npm run test:packages       # All shared package tests
+npm run test:all            # All tests (packages + server)
+
+# Single test file
+npm run test:server -- tests/unit/specific.test.ts
+
+# Linting and formatting
+npm run lint                # ESLint
+npm run format              # Prettier write
+npm run format:check        # Prettier check
+```
+
+## Architecture
+
+### Monorepo Structure
+
+```
+automaker/
+├── apps/
+│   ├── ui/           # React + Vite + Electron frontend (port 3007)
+│   └── server/       # Express + WebSocket backend (port 3008)
+└── libs/             # Shared packages (@automaker/*)
+    ├── types/        # Core TypeScript definitions (no dependencies)
+    ├── utils/        # Logging, errors, image processing, context loading
+    ├── prompts/      # AI prompt templates
+    ├── platform/     # Path management, security, process spawning
+    ├── model-resolver/    # Claude model alias resolution
+    ├── dependency-resolver/  # Feature dependency ordering
+    └── git-utils/    # Git operations & worktree management
+```
+
+### Package Dependency Chain
+
+Packages can only depend on packages above them:
+
+```
+@automaker/types (no dependencies)
+    ↓
+@automaker/utils, @automaker/prompts, @automaker/platform, @automaker/model-resolver, @automaker/dependency-resolver
+    ↓
+@automaker/git-utils
+    ↓
+@automaker/server, @automaker/ui
+```
+
+### Key Technologies
+
+- **Frontend**: React 19, Vite 7, Electron 39, TanStack Router, Zustand 5, Tailwind CSS 4
+- **Backend**: Express 5, WebSocket (ws), Claude Agent SDK, node-pty
+- **Testing**: Playwright (E2E), Vitest (unit)
+
+### Server Architecture
+
+The server (`apps/server/src/`) follows a modular pattern:
+
+- `routes/` - Express route handlers organized by feature (agent, features, auto-mode, worktree, etc.)
+- `services/` - Business logic (AgentService, AutoModeService, FeatureLoader, TerminalService)
+- `providers/` - AI provider abstraction (currently Claude via Claude Agent SDK)
+- `lib/` - Utilities (events, auth, worktree metadata)
+
+### Frontend Architecture
+
+The UI (`apps/ui/src/`) uses:
+
+- `routes/` - TanStack Router file-based routing
+- `components/views/` - Main view components (board, settings, terminal, etc.)
+- `store/` - Zustand stores with persistence (app-store.ts, setup-store.ts)
+- `hooks/` - Custom React hooks
+- `lib/` - Utilities and API client
+
+## Data Storage
+
+### Per-Project Data (`.automaker/`)
+
+```
+.automaker/
+├── features/              # Feature JSON files and images
+│   └── {featureId}/
+│       ├── feature.json
+│       ├── agent-output.md
+│       └── images/
+├── context/               # Context files for AI agents (CLAUDE.md, etc.)
+├── settings.json          # Project-specific settings
+├── spec.md               # Project specification
+└── analysis.json         # Project structure analysis
+```
+
+### Global Data (`DATA_DIR`, default `./data`)
+
+```
+data/
+├── settings.json          # Global settings, profiles, shortcuts
+├── credentials.json       # API keys
+├── sessions-metadata.json # Chat session metadata
+└── agent-sessions/        # Conversation histories
+```
+
+## Import Conventions
+
+Always import from shared packages, never from old paths:
+
+```typescript
+// ✅ Correct
+import type { Feature, ExecuteOptions } from '@automaker/types';
+import { createLogger, classifyError } from '@automaker/utils';
+import { getEnhancementPrompt } from '@automaker/prompts';
+import { getFeatureDir, ensureAutomakerDir } from '@automaker/platform';
+import { resolveModelString } from '@automaker/model-resolver';
+import { resolveDependencies } from '@automaker/dependency-resolver';
+import { getGitRepositoryDiffs } from '@automaker/git-utils';
+
+// ❌ Never import from old paths
+import { Feature } from '../services/feature-loader'; // Wrong
+import { createLogger } from '../lib/logger'; // Wrong
+```
+
+## Key Patterns
+
+### Event-Driven Architecture
+
+All server operations emit events that stream to the frontend via WebSocket. Events are created using `createEventEmitter()` from `lib/events.ts`.
+
+### Git Worktree Isolation
+
+Each feature executes in an isolated git worktree, created via `@automaker/git-utils`. This protects the main branch during AI agent execution.
+
+### Context Files
+
+Project-specific rules are stored in `.automaker/context/` and automatically loaded into agent prompts via `loadContextFiles()` from `@automaker/utils`.
+
+### Model Resolution
+
+Use `resolveModelString()` from `@automaker/model-resolver` to convert model aliases:
+
+- `haiku` → `claude-haiku-4-5`
+- `sonnet` → `claude-sonnet-4-20250514`
+- `opus` → `claude-opus-4-5-20251101`
+
+## Environment Variables
+
+- `ANTHROPIC_API_KEY` - Anthropic API key (or use Claude Code CLI auth)
+- `HOST` - Host to bind server to (default: 0.0.0.0)
+- `HOSTNAME` - Hostname for user-facing URLs (default: localhost)
+- `PORT` - Server port (default: 3008)
+- `DATA_DIR` - Data storage directory (default: ./data)
+- `ALLOWED_ROOT_DIRECTORY` - Restrict file operations to specific directory
+- `AUTOMAKER_MOCK_AGENT=true` - Enable mock agent mode for CI testing
+- `AUTOMAKER_AUTO_LOGIN=true` - Skip login prompt in development (disabled when NODE_ENV=production)
+- `VITE_HOSTNAME` - Hostname for frontend API URLs (default: localhost)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -0,0 +1,740 @@
+# Contributing to Automaker
+
+Thank you for your interest in contributing to Automaker! We're excited to have you join our community of developers building the future of autonomous AI development.
+
+Automaker is an autonomous AI development studio that provides a Kanban-based workflow where AI agents implement features in isolated git worktrees. Whether you're fixing bugs, adding features, improving documentation, or suggesting ideas, your contributions help make this project better for everyone.
+
+This guide will help you get started with contributing to Automaker. Please take a moment to read through these guidelines to ensure a smooth contribution process.
+
+## Contribution License Agreement
+
+**Important:** By submitting, pushing, or contributing any code, documentation, pull requests, issues, or other materials to the Automaker project, you agree to assign all right, title, and interest in and to your contributions, including all copyrights, patents, and other intellectual property rights, to the Core Contributors of Automaker. This assignment is irrevocable and includes the right to use, modify, distribute, and monetize your contributions in any manner.
+
+**You understand and agree that you will have no right to receive any royalties, compensation, or other financial benefits from any revenue, income, or commercial use generated from your contributed code or any derivative works thereof.** All contributions are made without expectation of payment or financial return.
+
+For complete details on contribution terms and rights assignment, please review [Section 5 (CONTRIBUTIONS AND RIGHTS ASSIGNMENT) of the LICENSE](LICENSE#5-contributions-and-rights-assignment).
+
+## Table of Contents
+
+- [Contributing to Automaker](#contributing-to-automaker)
+  - [Table of Contents](#table-of-contents)
+  - [Getting Started](#getting-started)
+    - [Prerequisites](#prerequisites)
+    - [Fork and Clone](#fork-and-clone)
+    - [Development Setup](#development-setup)
+    - [Project Structure](#project-structure)
+  - [Pull Request Process](#pull-request-process)
+    - [Branching Strategy (RC Branches)](#branching-strategy-rc-branches)
+    - [Branch Naming Convention](#branch-naming-convention)
+    - [Commit Message Format](#commit-message-format)
+    - [Submitting a Pull Request](#submitting-a-pull-request)
+      - [1. Prepare Your Changes](#1-prepare-your-changes)
+      - [2. Run Pre-submission Checks](#2-run-pre-submission-checks)
+      - [3. Push Your Changes](#3-push-your-changes)
+      - [4. Open a Pull Request](#4-open-a-pull-request)
+      - [PR Requirements Checklist](#pr-requirements-checklist)
+    - [Review Process](#review-process)
+      - [What to Expect](#what-to-expect)
+      - [Review Focus Areas](#review-focus-areas)
+      - [Responding to Feedback](#responding-to-feedback)
+      - [Approval Criteria](#approval-criteria)
+      - [Getting Help](#getting-help)
+  - [Code Style Guidelines](#code-style-guidelines)
+  - [Testing Requirements](#testing-requirements)
+    - [Running Tests](#running-tests)
+    - [Test Frameworks](#test-frameworks)
+      - [End-to-End Tests (Playwright)](#end-to-end-tests-playwright)
+      - [Unit Tests (Vitest)](#unit-tests-vitest)
+    - [Writing Tests](#writing-tests)
+      - [When to Write Tests](#when-to-write-tests)
+    - [CI/CD Pipeline](#cicd-pipeline)
+      - [CI Checks](#ci-checks)
+      - [CI Testing Environment](#ci-testing-environment)
+      - [Viewing CI Results](#viewing-ci-results)
+      - [Common CI Failures](#common-ci-failures)
+    - [Coverage Requirements](#coverage-requirements)
+  - [Issue Reporting](#issue-reporting)
+    - [Bug Reports](#bug-reports)
+      - [Before Reporting](#before-reporting)
+      - [Bug Report Template](#bug-report-template)
+    - [Feature Requests](#feature-requests)
+      - [Before Requesting](#before-requesting)
+      - [Feature Request Template](#feature-request-template)
+    - [Security Issues](#security-issues)
+
+---
+
+## Getting Started
+
+### Prerequisites
+
+Before contributing to Automaker, ensure you have the following installed on your system:
+
+- **Node.js 18+** (tested with Node.js 22)
+  - Download from [nodejs.org](https://nodejs.org/)
+  - Verify installation: `node --version`
+- **npm** (comes with Node.js)
+  - Verify installation: `npm --version`
+- **Git** for version control
+  - Verify installation: `git --version`
+- **Claude Code CLI** or **Anthropic API Key** (for AI agent functionality)
+  - Required to run the AI development features
+
+**Optional but recommended:**
+
+- A code editor with TypeScript support (VS Code recommended)
+- GitHub CLI (`gh`) for easier PR management
+
+### Fork and Clone
+
+1. **Fork the repository** on GitHub
+   - Navigate to [https://github.com/AutoMaker-Org/automaker](https://github.com/AutoMaker-Org/automaker)
+   - Click the "Fork" button in the top-right corner
+   - This creates your own copy of the repository
+
+2. **Clone your fork locally**
+
+   ```bash
+   git clone https://github.com/YOUR_USERNAME/automaker.git
+   cd automaker
+   ```
+
+3. **Add the upstream remote** to keep your fork in sync
+
+   ```bash
+   git remote add upstream https://github.com/AutoMaker-Org/automaker.git
+   ```
+
+4. **Verify remotes**
+   ```bash
+   git remote -v
+   # Should show:
+   # origin    https://github.com/YOUR_USERNAME/automaker.git (fetch)
+   # origin    https://github.com/YOUR_USERNAME/automaker.git (push)
+   # upstream  https://github.com/AutoMaker-Org/automaker.git (fetch)
+   # upstream  https://github.com/AutoMaker-Org/automaker.git (push)
+   ```
+
+### Development Setup
+
+1. **Install dependencies**
+
+   ```bash
+   npm install
+   ```
+
+2. **Build shared packages** (required before running the app)
+
+   ```bash
+   npm run build:packages
+   ```
+
+3. **Start the development server**
+   ```bash
+   npm run dev          # Interactive launcher - choose mode
+   npm run dev:web      # Browser mode (web interface)
+   npm run dev:electron # Desktop app mode
+   ```
+
+**Common development commands:**
+
+| Command                  | Description                      |
+| ------------------------ | -------------------------------- |
+| `npm run dev`            | Interactive development launcher |
+| `npm run dev:web`        | Start in browser mode            |
+| `npm run dev:electron`   | Start desktop app                |
+| `npm run build`          | Build all packages and apps      |
+| `npm run build:packages` | Build shared packages only       |
+| `npm run lint`           | Run ESLint checks                |
+| `npm run format`         | Format code with Prettier        |
+| `npm run format:check`   | Check formatting without changes |
+| `npm run test`           | Run E2E tests (Playwright)       |
+| `npm run test:server`    | Run server unit tests            |
+| `npm run test:packages`  | Run package tests                |
+| `npm run test:all`       | Run all tests                    |
+
+### Project Structure
+
+Automaker is organized as an npm workspace monorepo:
+
+```
+automaker/
+├── apps/
+│   ├── ui/              # React + Vite + Electron frontend
+│   └── server/          # Express + WebSocket backend
+├── libs/
+│   ├── @automaker/types/            # Shared TypeScript types
+│   ├── @automaker/utils/            # Utility functions
+│   ├── @automaker/prompts/          # AI prompt templates
+│   ├── @automaker/platform/         # Platform abstractions
+│   ├── @automaker/model-resolver/   # AI model resolution
+│   ├── @automaker/dependency-resolver/ # Dependency management
+│   └── @automaker/git-utils/        # Git operations
+├── docs/                # Documentation
+└── package.json         # Root package configuration
+```
+
+**Key conventions:**
+
+- Always import from `@automaker/*` shared packages, never use relative paths to `libs/`
+- Frontend code lives in `apps/ui/`
+- Backend code lives in `apps/server/`
+- Shared logic should be in the appropriate `libs/` package
+
+---
+
+## Pull Request Process
+
+This section covers everything you need to know about contributing changes through pull requests, from creating your branch to getting your code merged.
+
+### Branching Strategy (RC Branches)
+
+Automaker uses **Release Candidate (RC) branches** for all development work. Understanding this workflow is essential before contributing.
+
+**How it works:**
+
+1. **All development happens on RC branches** - We maintain version-specific RC branches (e.g., `v0.10.0rc`, `v0.11.0rc`) where all active development occurs
+2. **RC branches are eventually merged to main** - Once an RC branch is stable and ready for release, it gets merged into `main`
+3. **Main branch is for releases only** - The `main` branch contains only released, stable code
+
+**Before creating a PR:**
+
+1. **Check for the latest RC branch** - Before starting work, check the repository for the current RC branch:
+
+   ```bash
+   git fetch upstream
+   git branch -r | grep rc
+   ```
+
+2. **Base your work on the RC branch** - Create your feature branch from the latest RC branch, not from `main`:
+
+   ```bash
+   # Find the latest RC branch (e.g., v0.11.0rc)
+   git checkout upstream/v0.11.0rc
+   git checkout -b feature/your-feature-name
+   ```
+
+3. **Target the RC branch in your PR** - When opening your pull request, set the base branch to the current RC branch, not `main`
+
+**Example workflow:**
+
+```bash
+# 1. Fetch latest changes
+git fetch upstream
+
+# 2. Check for RC branches
+git branch -r | grep rc
+# Output: upstream/v0.11.0rc
+
+# 3. Create your branch from the RC
+git checkout -b feature/add-dark-mode upstream/v0.11.0rc
+
+# 4. Make your changes and commit
+git commit -m "feat: Add dark mode support"
+
+# 5. Push to your fork
+git push origin feature/add-dark-mode
+
+# 6. Open PR targeting the RC branch (v0.11.0rc), NOT main
+```
+
+**Important:** PRs opened directly against `main` will be asked to retarget to the current RC branch.
+
+### Branch Naming Convention
+
+We use a consistent branch naming pattern to keep our repository organized:
+
+```
+<type>/<description>
+```
+
+**Branch types:**
+
+| Type       | Purpose                  | Example                           |
+| ---------- | ------------------------ | --------------------------------- |
+| `feature`  | New functionality        | `feature/add-user-authentication` |
+| `fix`      | Bug fixes                | `fix/resolve-memory-leak`         |
+| `docs`     | Documentation changes    | `docs/update-contributing-guide`  |
+| `refactor` | Code restructuring       | `refactor/simplify-api-handlers`  |
+| `test`     | Adding or updating tests | `test/add-utils-unit-tests`       |
+| `chore`    | Maintenance tasks        | `chore/update-dependencies`       |
+
+**Guidelines:**
+
+- Use lowercase letters and hyphens (no underscores or spaces)
+- Keep descriptions short but descriptive
+- Include issue number when applicable: `feature/123-add-login`
+
+```bash
+# Create and checkout a new feature branch
+git checkout -b feature/add-dark-mode
+
+# Create a fix branch with issue reference
+git checkout -b fix/456-resolve-login-error
+```
+
+### Commit Message Format
+
+We follow the **Conventional Commits** style for clear, readable commit history:
+
+```
+<type>: <description>
+
+[optional body]
+```
+
+**Commit types:**
+
+| Type       | Purpose                     |
+| ---------- | --------------------------- |
+| `feat`     | New feature                 |
+| `fix`      | Bug fix                     |
+| `docs`     | Documentation only          |
+| `style`    | Formatting (no code change) |
+| `refactor` | Code restructuring          |
+| `test`     | Adding or updating tests    |
+| `chore`    | Maintenance tasks           |
+
+**Guidelines:**
+
+- Use **imperative mood** ("Add feature" not "Added feature")
+- Keep first line under **72 characters**
+- Capitalize the first letter after the type prefix
+- No period at the end of the subject line
+- Add a blank line before the body for detailed explanations
+
+**Examples:**
+
+```bash
+# Simple commit
+git commit -m "feat: Add user authentication flow"
+
+# Commit with body for more context
+git commit -m "fix: Resolve memory leak in WebSocket handler
+
+The connection cleanup was not being called when clients
+disconnected unexpectedly. Added proper cleanup in the
+error handler to prevent memory accumulation."
+
+# Documentation update
+git commit -m "docs: Update API documentation"
+
+# Refactoring
+git commit -m "refactor: Simplify state management logic"
+```
+
+### Submitting a Pull Request
+
+Follow these steps to submit your contribution:
+
+#### 1. Prepare Your Changes
+
+Ensure you've synced with the latest upstream changes from the RC branch:
+
+```bash
+# Fetch latest changes from upstream
+git fetch upstream
+
+# Rebase your branch on the current RC branch (if needed)
+git rebase upstream/v0.11.0rc  # Use the current RC branch name
+```
+
+#### 2. Run Pre-submission Checks
+
+Before opening your PR, verify everything passes locally:
+
+```bash
+# Run all tests
+npm run test:all
+
+# Check formatting
+npm run format:check
+
+# Run linter
+npm run lint
+
+# Build to verify no compile errors
+npm run build
+```
+
+#### 3. Push Your Changes
+
+```bash
+# Push your branch to your fork
+git push origin feature/your-feature-name
+```
+
+#### 4. Open a Pull Request
+
+1. Go to your fork on GitHub
+2. Click "Compare & pull request" for your branch
+3. **Important:** Set the base repository to `AutoMaker-Org/automaker` and the base branch to the **current RC branch** (e.g., `v0.11.0rc`), not `main`
+4. Fill out the PR template completely
+
+#### PR Requirements Checklist
+
+Your PR should include:
+
+- [ ] **Targets the current RC branch** (not `main`) - see [Branching Strategy](#branching-strategy-rc-branches)
+- [ ] **Clear title** describing the change (use conventional commit format)
+- [ ] **Description** explaining what changed and why
+- [ ] **Link to related issue** (if applicable): `Closes #123` or `Fixes #456`
+- [ ] **All CI checks passing** (format, lint, build, tests)
+- [ ] **No merge conflicts** with the RC branch
+- [ ] **Tests included** for new functionality
+- [ ] **Documentation updated** if adding/changing public APIs
+
+**Example PR Description:**
+
+```markdown
+## Summary
+
+This PR adds dark mode support to the Automaker UI.
+
+- Implements theme toggle in settings panel
+- Adds CSS custom properties for theme colors
+- Persists theme preference to localStorage
+
+## Related Issue
+
+Closes #123
+
+## Testing
+
+- [x] Tested toggle functionality in Chrome and Firefox
+- [x] Verified theme persists across page reloads
+- [x] Checked accessibility contrast ratios
+
+## Screenshots
+
+[Include before/after screenshots for UI changes]
+```
+
+### Review Process
+
+All contributions go through code review to maintain quality:
+
+#### What to Expect
+
+1. **CI Checks Run First** - Automated checks (format, lint, build, tests) must pass before review
+2. **Maintainer Review** - The project maintainers will review your PR and decide whether to merge it
+3. **Feedback & Discussion** - The reviewer may ask questions or request changes
+4. **Iteration** - Make requested changes and push updates to the same branch
+5. **Approval & Merge** - Once approved and checks pass, your PR will be merged
+
+#### Review Focus Areas
+
+The reviewer checks for:
+
+- **Correctness** - Does the code work as intended?
+- **Clean Code** - Does it follow our [code style guidelines](#code-style-guidelines)?
+- **Test Coverage** - Are new features properly tested?
+- **Documentation** - Are public APIs documented?
+- **Breaking Changes** - Are any breaking changes discussed first?
+
+#### Responding to Feedback
+
+- Respond to **all** review comments, even if just to acknowledge
+- Ask questions if feedback is unclear
+- Push additional commits to address feedback (don't force-push during review)
+- Mark conversations as resolved once addressed
+
+#### Approval Criteria
+
+Your PR is ready to merge when:
+
+- ✅ All CI checks pass
+- ✅ The maintainer has approved the changes
+- ✅ All review comments are addressed
+- ✅ No unresolved merge conflicts
+
+#### Getting Help
+
+If your PR seems stuck:
+
+- Comment asking for status update (mention @webdevcody if needed)
+- Reach out on [Discord](https://discord.gg/jjem7aEDKU)
+- Make sure all checks are passing and you've responded to all feedback
+
+---
+
+## Code Style Guidelines
+
+Automaker uses automated tooling to enforce code style. Run `npm run format` to format code and `npm run lint` to check for issues. Pre-commit hooks automatically format staged files before committing.
+
+---
+
+## Testing Requirements
+
+Testing helps prevent regressions. Automaker uses **Playwright** for end-to-end testing and **Vitest** for unit tests.
+
+### Running Tests
+
+Use these commands to run tests locally:
+
+| Command                        | Description                           |
+| ------------------------------ | ------------------------------------- |
+| `npm run test`                 | Run E2E tests (Playwright)            |
+| `npm run test:server`          | Run server unit tests (Vitest)        |
+| `npm run test:packages`        | Run shared package tests              |
+| `npm run test:all`             | Run all tests                         |
+| `npm run test:server:coverage` | Run server tests with coverage report |
+
+**Before submitting a PR**, always run the full test suite:
+
+```bash
+npm run test:all
+```
+
+### Test Frameworks
+
+#### End-to-End Tests (Playwright)
+
+E2E tests verify the entire application works correctly from a user's perspective.
+
+- **Framework:** [Playwright](https://playwright.dev/)
+- **Location:** `e2e/` directory
+- **Test ports:** UI on port 3007, Server on port 3008
+
+**Running E2E tests:**
+
+```bash
+# Run all E2E tests
+npm run test
+
+# Run with headed browser (useful for debugging)
+npx playwright test --headed
+
+# Run a specific test file
+npm test --workspace=@automaker/ui -- tests/example.spec.ts
+```
+
+**E2E Test Guidelines:**
+
+- Write tests from a user's perspective
+- Use descriptive test names that explain the scenario
+- Clean up test data after each test
+- Use appropriate timeouts for async operations
+- Prefer `locator` over direct selectors for resilience
+
+#### Unit Tests (Vitest)
+
+Unit tests verify individual functions and modules work correctly in isolation.
+
+- **Framework:** [Vitest](https://vitest.dev/)
+- **Location:** In the `tests/` directory within each package (e.g., `apps/server/tests/`)
+
+**Running unit tests:**
+
+```bash
+# Run all server unit tests
+npm run test:server
+
+# Run with coverage report
+npm run test:server:coverage
+
+# Run package tests
+npm run test:packages
+
+# Run in watch mode during development
+npx vitest --watch
+```
+
+**Unit Test Guidelines:**
+
+- Keep tests small and focused on one behavior
+- Use descriptive test names: `it('should return null when user is not found')`
+- Follow the AAA pattern: Arrange, Act, Assert
+- Mock external dependencies to isolate the unit under test
+- Aim for meaningful coverage, not just line coverage
+
+### Writing Tests
+
+#### When to Write Tests
+
+- **New features:** All new features should include tests
+- **Bug fixes:** Add a test that reproduces the bug before fixing
+- **Refactoring:** Ensure existing tests pass after refactoring
+- **Public APIs:** All public APIs must have test coverage
+
+### CI/CD Pipeline
+
+Automaker uses **GitHub Actions** for continuous integration. Every pull request triggers automated checks.
+
+#### CI Checks
+
+The following checks must pass before your PR can be merged:
+
+| Check             | Description                                   |
+| ----------------- | --------------------------------------------- |
+| **Format**        | Verifies code is formatted with Prettier      |
+| **Build**         | Ensures the project compiles without errors   |
+| **Package Tests** | Runs tests for shared `@automaker/*` packages |
+| **Server Tests**  | Runs server unit tests with coverage          |
+
+#### CI Testing Environment
+
+For CI environments, Automaker supports a mock agent mode:
+
+```bash
+# Enable mock agent mode for CI testing
+AUTOMAKER_MOCK_AGENT=true npm run test
+```
+
+This allows tests to run without requiring a real Claude API connection.
+
+#### Viewing CI Results
+
+1. Go to your PR on GitHub
+2. Scroll to the "Checks" section at the bottom
+3. Click on any failed check to see detailed logs
+4. Fix issues locally and push updates
+
+#### Common CI Failures
+
+| Issue               | Solution                                      |
+| ------------------- | --------------------------------------------- |
+| Format check failed | Run `npm run format` locally                  |
+| Build failed        | Run `npm run build` and fix TypeScript errors |
+| Tests failed        | Run `npm run test:all` locally to reproduce   |
+| Coverage decreased  | Add tests for new code paths                  |
+
+### Coverage Requirements
+
+While we don't enforce strict coverage percentages, we expect:
+
+- **New features:** Should include comprehensive tests
+- **Bug fixes:** Should include a regression test
+- **Critical paths:** Must have test coverage (authentication, data persistence, etc.)
+
+To view coverage reports locally:
+
+```bash
+npm run test:server:coverage
+```
+
+This generates an HTML report you can open in your browser to see which lines are covered.
+
+---
+
+## Issue Reporting
+
+Found a bug or have an idea for a new feature? We'd love to hear from you! This section explains how to report issues effectively.
+
+### Bug Reports
+
+When reporting a bug, please provide as much information as possible to help us understand and reproduce the issue.
+
+#### Before Reporting
+
+1. **Search existing issues** - Check if the bug has already been reported
+2. **Try the latest version** - Make sure you're running the latest version of Automaker
+3. **Reproduce the issue** - Verify you can consistently reproduce the bug
+
+#### Bug Report Template
+
+When creating a bug report, include:
+
+- **Title:** A clear, descriptive title summarizing the issue
+- **Environment:**
+  - Operating System and version
+  - Node.js version (`node --version`)
+  - Automaker version or commit hash
+- **Steps to Reproduce:** Numbered list of steps to reproduce the bug
+- **Expected Behavior:** What you expected to happen
+- **Actual Behavior:** What actually happened
+- **Logs/Screenshots:** Any relevant error messages, console output, or screenshots
+
+**Example Bug Report:**
+
+```markdown
+## Bug: WebSocket connection drops after 5 minutes of inactivity
+
+### Environment
+
+- OS: Windows 11
+- Node.js: 22.11.0
+- Automaker: commit abc1234
+
+### Steps to Reproduce
+
+1. Start the application with `npm run dev:web`
+2. Open the Kanban board
+3. Leave the browser tab open for 5+ minutes without interaction
+4. Try to move a card
+
+### Expected Behavior
+
+The card should move to the new column.
+
+### Actual Behavior
+
+The UI shows "Connection lost" and the card doesn't move.
+
+### Logs
+
+[WebSocket] Connection closed: 1006
+```
+
+### Feature Requests
+
+We welcome ideas for improving Automaker! Here's how to submit a feature request:
+
+#### Before Requesting
+
+1. **Check existing issues** - Your idea may already be proposed or in development
+2. **Consider scope** - Think about whether the feature fits Automaker's mission as an autonomous AI development studio
+
+#### Feature Request Template
+
+A good feature request includes:
+
+- **Title:** A brief, descriptive title
+- **Problem Statement:** What problem does this feature solve?
+- **Proposed Solution:** How do you envision this working?
+- **Alternatives Considered:** What other approaches did you consider?
+- **Additional Context:** Mockups, examples, or references that help explain your idea
+
+**Example Feature Request:**
+
+```markdown
+## Feature: Dark Mode Support
+
+### Problem Statement
+
+Working late at night, the bright UI causes eye strain and doesn't match
+my system's dark theme preference.
+
+### Proposed Solution
+
+Add a theme toggle in the settings panel that allows switching between
+light and dark modes. Ideally, it should also detect system preference.
+
+### Alternatives Considered
+
+- Browser extension to force dark mode (doesn't work well with custom styling)
+- Custom CSS override (breaks with updates)
+
+### Additional Context
+
+Similar to how VS Code handles themes - a dropdown in settings with
+immediate preview.
+```
+
+### Security Issues
+
+**Important:** If you discover a security vulnerability, please do NOT open a public issue. Instead:
+
+1. Join our [Discord server](https://discord.gg/jjem7aEDKU) and send a direct message to the user `@webdevcody`
+2. Include detailed steps to reproduce
+3. Allow time for us to address the issue before public disclosure
+
+We take security seriously and appreciate responsible disclosure.
+
+---
+
+For license and contribution terms, see the [LICENSE](LICENSE) file in the repository root and the [README.md](README.md#license) for more details.
+
+---
+
+Thank you for contributing to Automaker!
--- a/DEVELOPMENT_WORKFLOW.md
+++ b/DEVELOPMENT_WORKFLOW.md
@@ -0,0 +1,253 @@
+# Development Workflow
+
+This document defines the standard workflow for keeping a branch in sync with the upstream
+release candidate (RC) and for shipping feature work. It is paired with `check-sync.sh`.
+
+## Quick Decision Rule
+
+1. Ask the user to select a workflow:
+   - **Sync Workflow** → you are maintaining the current RC branch with fixes/improvements
+     and will push the same fixes to both origin and upstream RC when you have local
+     commits to publish.
+   - **PR Workflow** → you are starting new feature work on a new branch; upstream updates
+     happen via PR only.
+2. After the user selects, run:
+   ```bash
+   ./check-sync.sh
+   ```
+3. Use the status output to confirm alignment. If it reports **diverged**, default to
+   merging `upstream/<TARGET_RC>` into the current branch and preserving local commits.
+   For Sync Workflow, when the working tree is clean and you are behind upstream RC,
+   proceed with the fetch + merge without asking for additional confirmation.
+
+## Target RC Resolution
+
+The target RC is resolved dynamically so the workflow stays current as the RC changes.
+
+Resolution order:
+
+1. Latest `upstream/v*rc` branch (auto-detected)
+2. `upstream/HEAD` (fallback)
+3. If neither is available, you must pass `--rc <branch>`
+
+Override for a single run:
+
+```bash
+./check-sync.sh --rc <rc-branch>
+```
+
+## Pre-Flight Checklist
+
+1. Confirm a clean working tree:
+   ```bash
+   git status
+   ```
+2. Confirm the current branch:
+   ```bash
+   git branch --show-current
+   ```
+3. Ensure remotes exist (origin + upstream):
+   ```bash
+   git remote -v
+   ```
+
+## Sync Workflow (Upstream Sync)
+
+Use this flow when you are updating the current branch with fixes or improvements and
+intend to keep origin and upstream RC in lockstep.
+
+1. **Check sync status**
+   ```bash
+   ./check-sync.sh
+   ```
+2. **Update from upstream RC before editing (no pulls)**
+   - **Behind upstream RC** → fetch and merge RC into your branch:
+     ```bash
+     git fetch upstream
+     git merge upstream/<TARGET_RC> --no-edit
+     ```
+     When the working tree is clean and the user selected Sync Workflow, proceed without
+     an extra confirmation prompt.
+   - **Diverged** → stop and resolve manually.
+3. **Resolve conflicts if needed**
+   - Handle conflicts intelligently: preserve upstream behavior and your local intent.
+4. **Make changes and commit (if you are delivering fixes)**
+   ```bash
+   git add -A
+   git commit -m "type: description"
+   ```
+5. **Build to verify**
+   ```bash
+   npm run build:packages
+   npm run build
+   ```
+6. **Push after a successful merge to keep remotes aligned**
+   - If you only merged upstream RC changes, push **origin only** to sync your fork:
+     ```bash
+     git push origin <branch>
+     ```
+   - If you have local fixes to publish, push **origin + upstream**:
+     ```bash
+     git push origin <branch>
+     git push upstream <branch>:<TARGET_RC>
+     ```
+   - Always ask the user which push to perform.
+   - Origin (origin-only sync):
+     ```bash
+     git push origin <branch>
+     ```
+   - Upstream RC (publish the same fixes when you have local commits):
+     ```bash
+     git push upstream <branch>:<TARGET_RC>
+     ```
+7. **Re-check sync**
+   ```bash
+   ./check-sync.sh
+   ```
+
+## PR Workflow (Feature Work)
+
+Use this flow only for new feature work on a new branch. Do not push to upstream RC.
+
+1. **Create or switch to a feature branch**
+   ```bash
+   git checkout -b <branch>
+   ```
+2. **Make changes and commit**
+   ```bash
+   git add -A
+   git commit -m "type: description"
+   ```
+3. **Merge upstream RC before shipping**
+   ```bash
+   git merge upstream/<TARGET_RC> --no-edit
+   ```
+4. **Build and/or test**
+   ```bash
+   npm run build:packages
+   npm run build
+   ```
+5. **Push to origin**
+   ```bash
+   git push -u origin <branch>
+   ```
+6. **Create or update the PR**
+   - Use `gh pr create` or the GitHub UI.
+7. **Review and follow-up**
+
+- Apply feedback, commit changes, and push again.
+- Re-run `./check-sync.sh` if additional upstream sync is needed.
+
+## Conflict Resolution Checklist
+
+1. Identify which changes are from upstream vs. local.
+2. Preserve both behaviors where possible; avoid dropping either side.
+3. Prefer minimal, safe integrations over refactors.
+4. Re-run build commands after resolving conflicts.
+5. Re-run `./check-sync.sh` to confirm status.
+
+## Build/Test Matrix
+
+- **Sync Workflow**: `npm run build:packages` and `npm run build`.
+- **PR Workflow**: `npm run build:packages` and `npm run build` (plus relevant tests).
+
+## Post-Sync Verification
+
+1. `git status` should be clean.
+2. `./check-sync.sh` should show expected alignment.
+3. Verify recent commits with:
+   ```bash
+   git log --oneline -5
+   ```
+
+## check-sync.sh Usage
+
+- Uses dynamic Target RC resolution (see above).
+- Override target RC:
+  ```bash
+  ./check-sync.sh --rc <rc-branch>
+  ```
+- Optional preview limit:
+  ```bash
+  ./check-sync.sh --preview 10
+  ```
+- The script prints sync status for both origin and upstream and previews recent commits
+  when you are behind.
+
+## Stop Conditions
+
+Stop and ask for guidance if any of the following are true:
+
+- The working tree is dirty and you are about to merge or push.
+- `./check-sync.sh` reports **diverged** during PR Workflow, or a merge cannot be completed.
+- The script cannot resolve a target RC and requests `--rc`.
+- A build fails after sync or conflict resolution.
+
+## AI Agent Guardrails
+
+- Always run `./check-sync.sh` before merges or pushes.
+- Always ask for explicit user approval before any push command.
+- Do not ask for additional confirmation before a Sync Workflow fetch + merge when the
+  working tree is clean and the user has already selected the Sync Workflow.
+- Choose Sync vs PR workflow based on intent (RC maintenance vs new feature work), not
+  on the script's workflow hint.
+- Only use force push when the user explicitly requests a history rewrite.
+- Ask for explicit approval before dependency installs, branch deletion, or destructive operations.
+- When resolving merge conflicts, preserve both upstream changes and local intent where possible.
+- Do not create or switch to new branches unless the user explicitly requests it.
+
+## AI Agent Decision Guidance
+
+Agents should provide concrete, task-specific suggestions instead of repeatedly asking
+open-ended questions. Use the user's stated goal and the `./check-sync.sh` status to
+propose a default path plus one or two alternatives, and only ask for confirmation when
+an action requires explicit approval.
+
+Default behavior:
+
+- If the intent is RC maintenance, recommend the Sync Workflow and proceed with
+  safe preparation steps (status checks, previews). If the branch is behind upstream RC,
+  fetch and merge without additional confirmation when the working tree is clean, then
+  push to origin to keep the fork aligned. Push upstream only when there are local fixes
+  to publish.
+- If the intent is new feature work, recommend the PR Workflow and proceed with safe
+  preparation steps (status checks, identifying scope). Ask for approval before merges,
+  pushes, or dependency installs.
+- If `./check-sync.sh` reports **diverged** during Sync Workflow, merge
+  `upstream/<TARGET_RC>` into the current branch and preserve local commits.
+- If `./check-sync.sh` reports **diverged** during PR Workflow, stop and ask for guidance
+  with a short explanation of the divergence and the minimal options to resolve it.
+  If the user's intent is RC maintenance, prefer the Sync Workflow regardless of the
+  script hint. When the intent is new feature work, use the PR Workflow and avoid upstream
+  RC pushes.
+
+Suggestion format (keep it short):
+
+- **Recommended**: one sentence with the default path and why it fits the task.
+- **Alternatives**: one or two options with the tradeoff or prerequisite.
+- **Approval points**: mention any upcoming actions that need explicit approval (exclude sync
+  workflow pushes and merges).
+
+## Failure Modes and How to Avoid Them
+
+Sync Workflow:
+
+- Wrong RC target: verify the auto-detected RC in `./check-sync.sh` output before merging.
+- Diverged from upstream RC: stop and resolve manually before any merge or push.
+- Dirty working tree: commit or stash before syncing to avoid accidental merges.
+- Missing remotes: ensure both `origin` and `upstream` are configured before syncing.
+- Build breaks after sync: run `npm run build:packages` and `npm run build` before pushing.
+
+PR Workflow:
+
+- Branch not synced to current RC: re-run `./check-sync.sh` and merge RC before shipping.
+- Pushing the wrong branch: confirm `git branch --show-current` before pushing.
+- Unreviewed changes: always commit and push to origin before opening or updating a PR.
+- Skipped tests/builds: run the build commands before declaring the PR ready.
+
+## Notes
+
+- Avoid merging with uncommitted changes; commit or stash first.
+- Prefer merge over rebase for PR branches; rebases rewrite history and often require a force push,
+  which should only be done with an explicit user request.
+- Use clear, conventional commit messages and split unrelated changes into separate commits.
--- a/226
+++ b/226
@@ -0,0 +1,226 @@
+# Automaker Multi-Stage Dockerfile
+# Single Dockerfile for both server and UI builds
+# Usage:
+#   docker build --target server -t automaker-server .
+#   docker build --target ui -t automaker-ui .
+# Or use docker-compose which selects targets automatically
+
+# =============================================================================
+# BASE STAGE - Common setup for all builds (DRY: defined once, used by all)
+# =============================================================================
+FROM node:22-slim AS base
+
+# Install build dependencies for native modules (node-pty)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 make g++ \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Copy root package files
+COPY package*.json ./
+
+# Copy all libs package.json files (centralized - add new libs here)
+COPY libs/types/package*.json ./libs/types/
+COPY libs/utils/package*.json ./libs/utils/
+COPY libs/prompts/package*.json ./libs/prompts/
+COPY libs/platform/package*.json ./libs/platform/
+COPY libs/spec-parser/package*.json ./libs/spec-parser/
+COPY libs/model-resolver/package*.json ./libs/model-resolver/
+COPY libs/dependency-resolver/package*.json ./libs/dependency-resolver/
+COPY libs/git-utils/package*.json ./libs/git-utils/
+COPY libs/spec-parser/package*.json ./libs/spec-parser/
+
+# Copy scripts (needed by npm workspace)
+COPY scripts ./scripts
+
+# =============================================================================
+# SERVER BUILD STAGE
+# =============================================================================
+FROM base AS server-builder
+
+# Copy server-specific package.json
+COPY apps/server/package*.json ./apps/server/
+
+# Install dependencies (--ignore-scripts to skip husky/prepare, then rebuild native modules)
+RUN npm ci --ignore-scripts && npm rebuild node-pty
+
+# Copy all source files
+COPY libs ./libs
+COPY apps/server ./apps/server
+
+# Build packages in dependency order, then build server
+RUN npm run build:packages && npm run build --workspace=apps/server
+
+# =============================================================================
+# SERVER PRODUCTION STAGE
+# =============================================================================
+FROM node:22-slim AS server
+
+# Build argument for tracking which commit this image was built from
+ARG GIT_COMMIT_SHA=unknown
+LABEL automaker.git.commit.sha="${GIT_COMMIT_SHA}"
+
+# Build arguments for user ID matching (allows matching host user for mounted volumes)
+# Override at build time: docker build --build-arg UID=$(id -u) --build-arg GID=$(id -g) ...
+ARG UID=1001
+ARG GID=1001
+
+# Install git, curl, bash (for terminal), gosu (for user switching), and GitHub CLI (pinned version, multi-arch)
+# Also install Playwright/Chromium system dependencies (aligns with playwright install-deps on Debian/Ubuntu)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git curl bash gosu ca-certificates openssh-client \
+    # Playwright/Chromium dependencies
+    libglib2.0-0 libnss3 libnspr4 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 \
+    libcups2 libdrm2 libxkbcommon0 libatspi2.0-0 libxcomposite1 libxdamage1 \
+    libxfixes3 libxrandr2 libgbm1 libasound2 libpango-1.0-0 libcairo2 \
+    libx11-6 libx11-xcb1 libxcb1 libxext6 libxrender1 libxss1 libxtst6 \
+    libxshmfence1 libgtk-3-0 libexpat1 libfontconfig1 fonts-liberation \
+    xdg-utils libpangocairo-1.0-0 libpangoft2-1.0-0 libu2f-udev libvulkan1 \
+    && GH_VERSION="2.63.2" \
+    && ARCH=$(uname -m) \
+    && case "$ARCH" in \
+        x86_64) GH_ARCH="amd64" ;; \
+        aarch64|arm64) GH_ARCH="arm64" ;; \
+        *) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
+    esac \
+    && curl -L "https://github.com/cli/cli/releases/download/v${GH_VERSION}/gh_${GH_VERSION}_linux_${GH_ARCH}.tar.gz" -o gh.tar.gz \
+    && tar -xzf gh.tar.gz \
+    && mv gh_${GH_VERSION}_linux_${GH_ARCH}/bin/gh /usr/local/bin/gh \
+    && rm -rf gh.tar.gz gh_${GH_VERSION}_linux_${GH_ARCH} \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Claude CLI globally (available to all users via npm global bin)
+RUN npm install -g @anthropic-ai/claude-code
+
+# Create non-root user with home directory BEFORE installing Cursor CLI
+# Uses UID/GID build args to match host user for mounted volume permissions
+# Use -o flag to allow non-unique IDs (GID 1000 may already exist as 'node' group)
+RUN groupadd -o -g ${GID} automaker && \
+    useradd -o -u ${UID} -g automaker -m -d /home/automaker -s /bin/bash automaker && \
+    mkdir -p /home/automaker/.local/bin && \
+    mkdir -p /home/automaker/.cursor && \
+    chown -R automaker:automaker /home/automaker && \
+    chmod 700 /home/automaker/.cursor
+
+# Install Cursor CLI as the automaker user
+# Set HOME explicitly and install to /home/automaker/.local/bin/
+USER automaker
+ENV HOME=/home/automaker
+RUN curl https://cursor.com/install -fsS | bash && \
+    echo "=== Checking Cursor CLI installation ===" && \
+    ls -la /home/automaker/.local/bin/ && \
+    echo "=== PATH is: $PATH ===" && \
+    (which cursor-agent && cursor-agent --version) || echo "cursor-agent installed (may need auth setup)"
+
+# Install OpenCode CLI (for multi-provider AI model access)
+RUN curl -fsSL https://opencode.ai/install | bash && \
+    echo "=== Checking OpenCode CLI installation ===" && \
+    ls -la /home/automaker/.local/bin/ && \
+    (which opencode && opencode --version) || echo "opencode installed (may need auth setup)"
+USER root
+
+# Add PATH to profile so it's available in all interactive shells (for login shells)
+RUN mkdir -p /etc/profile.d && \
+    echo 'export PATH="/home/automaker/.local/bin:$PATH"' > /etc/profile.d/cursor-cli.sh && \
+    chmod +x /etc/profile.d/cursor-cli.sh
+
+# Add to automaker's .bashrc for bash interactive shells
+RUN echo 'export PATH="/home/automaker/.local/bin:$PATH"' >> /home/automaker/.bashrc && \
+    chown automaker:automaker /home/automaker/.bashrc
+
+# Also add to root's .bashrc since docker exec defaults to root
+RUN echo 'export PATH="/home/automaker/.local/bin:$PATH"' >> /root/.bashrc
+
+WORKDIR /app
+
+# Copy root package.json (needed for workspace resolution)
+COPY --from=server-builder /app/package*.json ./
+
+# Copy built libs (workspace packages are symlinked in node_modules)
+COPY --from=server-builder /app/libs ./libs
+
+# Copy built server
+COPY --from=server-builder /app/apps/server/dist ./apps/server/dist
+COPY --from=server-builder /app/apps/server/package*.json ./apps/server/
+
+# Copy node_modules (includes symlinks to libs)
+COPY --from=server-builder /app/node_modules ./node_modules
+
+# Create data and projects directories
+RUN mkdir -p /data /projects && chown automaker:automaker /data /projects
+
+# Configure git for mounted volumes and authentication
+# Use --system so it's not overwritten by mounted user .gitconfig
+RUN git config --system --add safe.directory '*' && \
+    # Use gh as credential helper (works with GH_TOKEN env var)
+    git config --system credential.helper '!gh auth git-credential'
+
+# Copy entrypoint script for fixing permissions on mounted volumes
+COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+
+# Note: We stay as root here so entrypoint can fix permissions
+# The entrypoint script will switch to automaker user before running the command
+
+# Environment variables
+ENV PORT=3008
+ENV DATA_DIR=/data
+ENV HOME=/home/automaker
+# Add user's local bin to PATH for cursor-agent
+ENV PATH="/home/automaker/.local/bin:${PATH}"
+
+# Expose port
+EXPOSE 3008
+
+# Health check (using curl since it's already installed, more reliable than busybox wget)
+HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:3008/api/health || exit 1
+
+# Use entrypoint to fix permissions before starting
+ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
+
+# Start server
+CMD ["node", "apps/server/dist/index.js"]
+
+# =============================================================================
+# UI BUILD STAGE
+# =============================================================================
+FROM base AS ui-builder
+
+# Copy UI-specific package.json
+COPY apps/ui/package*.json ./apps/ui/
+
+# Install dependencies (--ignore-scripts to skip husky and build:packages in prepare script)
+RUN npm ci --ignore-scripts
+
+# Copy all source files
+COPY libs ./libs
+COPY apps/ui ./apps/ui
+
+# Build packages in dependency order, then build UI
+# VITE_SERVER_URL tells the UI where to find the API server
+# Use ARG to allow overriding at build time: --build-arg VITE_SERVER_URL=http://api.example.com
+ARG VITE_SERVER_URL=http://localhost:3008
+ENV VITE_SKIP_ELECTRON=true
+ENV VITE_SERVER_URL=${VITE_SERVER_URL}
+RUN npm run build:packages && npm run build --workspace=apps/ui
+
+# =============================================================================
+# UI PRODUCTION STAGE
+# =============================================================================
+FROM nginx:alpine AS ui
+
+# Build argument for tracking which commit this image was built from
+ARG GIT_COMMIT_SHA=unknown
+LABEL automaker.git.commit.sha="${GIT_COMMIT_SHA}"
+
+# Copy built files
+COPY --from=ui-builder /app/apps/ui/dist /usr/share/nginx/html
+
+# Copy nginx config for SPA routing
+COPY apps/ui/nginx.conf /etc/nginx/conf.d/default.conf
+
+EXPOSE 80
+
+CMD ["nginx", "-g", "daemon off;"]
--- a/Dockerfile.dev
+++ b/Dockerfile.dev
@@ -0,0 +1,94 @@
+# Automaker Development Dockerfile
+# For development with live reload via volume mounting
+# Source code is NOT copied - it's mounted as a volume
+#
+# Usage:
+#   docker compose -f docker-compose.dev.yml up
+
+FROM node:22-slim
+
+# Install build dependencies for native modules (node-pty) and runtime tools
+# Also install Playwright/Chromium system dependencies (aligns with playwright install-deps on Debian/Ubuntu)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3 make g++ \
+    git curl bash gosu ca-certificates openssh-client \
+    # Playwright/Chromium dependencies
+    libglib2.0-0 libnss3 libnspr4 libdbus-1-3 libatk1.0-0 libatk-bridge2.0-0 \
+    libcups2 libdrm2 libxkbcommon0 libatspi2.0-0 libxcomposite1 libxdamage1 \
+    libxfixes3 libxrandr2 libgbm1 libasound2 libpango-1.0-0 libcairo2 \
+    libx11-6 libx11-xcb1 libxcb1 libxext6 libxrender1 libxss1 libxtst6 \
+    libxshmfence1 libgtk-3-0 libexpat1 libfontconfig1 fonts-liberation \
+    xdg-utils libpangocairo-1.0-0 libpangoft2-1.0-0 libu2f-udev libvulkan1 \
+    && GH_VERSION="2.63.2" \
+    && ARCH=$(uname -m) \
+    && case "$ARCH" in \
+        x86_64) GH_ARCH="amd64" ;; \
+        aarch64|arm64) GH_ARCH="arm64" ;; \
+        *) echo "Unsupported architecture: $ARCH" && exit 1 ;; \
+    esac \
+    && curl -L "https://github.com/cli/cli/releases/download/v${GH_VERSION}/gh_${GH_VERSION}_linux_${GH_ARCH}.tar.gz" -o gh.tar.gz \
+    && tar -xzf gh.tar.gz \
+    && mv gh_${GH_VERSION}_linux_${GH_ARCH}/bin/gh /usr/local/bin/gh \
+    && rm -rf gh.tar.gz gh_${GH_VERSION}_linux_${GH_ARCH} \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install Claude CLI globally
+RUN npm install -g @anthropic-ai/claude-code
+
+# Build arguments for user ID matching (allows matching host user for mounted volumes)
+# Override at build time: docker-compose build --build-arg UID=$(id -u) --build-arg GID=$(id -g)
+ARG UID=1001
+ARG GID=1001
+
+# Create non-root user with configurable UID/GID
+# Use -o flag to allow non-unique IDs (GID 1000 may already exist as 'node' group)
+RUN groupadd -o -g ${GID} automaker && \
+    useradd -o -u ${UID} -g automaker -m -d /home/automaker -s /bin/bash automaker && \
+    mkdir -p /home/automaker/.local/bin && \
+    mkdir -p /home/automaker/.cursor && \
+    chown -R automaker:automaker /home/automaker && \
+    chmod 700 /home/automaker/.cursor
+
+# Install Cursor CLI as automaker user
+USER automaker
+ENV HOME=/home/automaker
+RUN curl https://cursor.com/install -fsS | bash || true
+USER root
+
+# Add PATH to profile for Cursor CLI
+RUN mkdir -p /etc/profile.d && \
+    echo 'export PATH="/home/automaker/.local/bin:$PATH"' > /etc/profile.d/cursor-cli.sh && \
+    chmod +x /etc/profile.d/cursor-cli.sh
+
+# Add to user bashrc files
+RUN echo 'export PATH="/home/automaker/.local/bin:$PATH"' >> /home/automaker/.bashrc && \
+    chown automaker:automaker /home/automaker/.bashrc
+RUN echo 'export PATH="/home/automaker/.local/bin:$PATH"' >> /root/.bashrc
+
+WORKDIR /app
+
+# Create directories with proper permissions
+RUN mkdir -p /data /projects && chown automaker:automaker /data /projects
+
+# Configure git for mounted volumes
+RUN git config --system --add safe.directory '*' && \
+    git config --system credential.helper '!gh auth git-credential'
+
+# Copy entrypoint script
+COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+
+# Environment variables
+ENV PORT=3008
+ENV DATA_DIR=/data
+ENV HOME=/home/automaker
+ENV PATH="/home/automaker/.local/bin:${PATH}"
+
+# Expose both dev ports
+EXPOSE 3007 3008
+
+# Use entrypoint for permission handling
+ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
+
+# Default command - will be overridden by docker-compose
+CMD ["npm", "run", "dev:web"]
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
-  <img src="apps/ui/public/readme_logo.png" alt="Automaker Logo" height="80" />
+  <img src="apps/ui/public/readme_logo.svg" alt="Automaker Logo" height="80" />
 </p>

 > **[!TIP]**
@@ -19,7 +19,7 @@

 - [What Makes Automaker Different?](#what-makes-automaker-different)
  - [The Workflow](#the-workflow)
-  - [Powered by Claude Code](#powered-by-claude-code)
+  - [Powered by Claude Agent SDK](#powered-by-claude-agent-sdk)
  - [Why This Matters](#why-this-matters)
 - [Security Disclaimer](#security-disclaimer)
 - [Community & Support](#community--support)
@@ -28,22 +28,37 @@
  - [Quick Start](#quick-start)
 - [How to Run](#how-to-run)
  - [Development Mode](#development-mode)
-    - [Electron Desktop App (Recommended)](#electron-desktop-app-recommended)
-    - [Web Browser Mode](#web-browser-mode)
+  - [Interactive TUI Launcher](#interactive-tui-launcher-recommended-for-new-users)
  - [Building for Production](#building-for-production)
-  - [Running Production Build](#running-production-build)
  - [Testing](#testing)
  - [Linting](#linting)
-  - [Authentication Options](#authentication-options)
-  - [Persistent Setup (Optional)](#persistent-setup-optional)
+  - [Environment Configuration](#environment-configuration)
+  - [Authentication Setup](#authentication-setup)
 - [Features](#features)
+  - [Core Workflow](#core-workflow)
+  - [AI & Planning](#ai--planning)
+  - [Project Management](#project-management)
+  - [Collaboration & Review](#collaboration--review)
+  - [Developer Tools](#developer-tools)
+  - [Advanced Features](#advanced-features)
 - [Tech Stack](#tech-stack)
+  - [Frontend](#frontend)
+  - [Backend](#backend)
+  - [Testing & Quality](#testing--quality)
+  - [Shared Libraries](#shared-libraries)
+- [Available Views](#available-views)
+- [Architecture](#architecture)
+  - [Monorepo Structure](#monorepo-structure)
+  - [How It Works](#how-it-works)
+  - [Key Architectural Patterns](#key-architectural-patterns)
+  - [Security & Isolation](#security--isolation)
+  - [Data Storage](#data-storage)
 - [Learn More](#learn-more)
 - [License](#license)

 </details>

-Automaker is an autonomous AI development studio that transforms how you build software. Instead of manually writing every line of code, you describe features on a Kanban board and watch as AI agents powered by Claude Code automatically implement them.
+Automaker is an autonomous AI development studio that transforms how you build software. Instead of manually writing every line of code, you describe features on a Kanban board and watch as AI agents powered by Claude Agent SDK automatically implement them. Built with React, Vite, Electron, and Express, Automaker provides a complete workflow for managing AI agents through a desktop application (or web browser), with features like real-time streaming, git worktree isolation, plan approval, and multi-agent task execution.

 ![Automaker UI](https://i.imgur.com/jdwKydM.png)

@@ -59,30 +74,14 @@ Traditional development tools help you write code. Automaker helps you **orchest
 4. **Review & Verify** - Review the changes, run tests, and approve when ready
 5. **Ship Faster** - Build entire applications in days, not weeks

-### Powered by Claude Code
+### Powered by Claude Agent SDK

-Automaker leverages the [Claude Agent SDK](https://platform.claude.com/docs/en/agent-sdk/overview) to give AI agents full access to your codebase. Agents can read files, write code, execute commands, run tests, and make git commits—all while working in isolated git worktrees to keep your main branch safe.
+Automaker leverages the [Claude Agent SDK](https://www.npmjs.com/package/@anthropic-ai/claude-agent-sdk) to give AI agents full access to your codebase. Agents can read files, write code, execute commands, run tests, and make git commits—all while working in isolated git worktrees to keep your main branch safe. The SDK provides autonomous AI agents that can use tools, make decisions, and complete complex multi-step tasks without constant human intervention.

 ### Why This Matters

 The future of software development is **agentic coding**—where developers become architects directing AI agents rather than manual coders. Automaker puts this future in your hands today, letting you experience what it's like to build software 10x faster with AI agents handling the implementation while you focus on architecture and business logic.

---
-
-> **[!CAUTION]**
->
-> ## Security Disclaimer
->
-> **This software uses AI-powered tooling that has access to your operating system and can read, modify, and delete files. Use at your own risk.**
->
-> We have reviewed this codebase for security vulnerabilities, but you assume all risk when running this software. You should review the code yourself before running it.
->
-> **We do not recommend running Automaker directly on your local computer** due to the risk of AI agents having access to your entire file system. Please sandbox this application using Docker or a virtual machine.
->
-> **[Read the full disclaimer](./DISCLAIMER.md)**
-
---
-
 ## Community & Support

 Join the **Agentic Jumpstart** to connect with other builders exploring **agentic coding** and autonomous development workflows.
@@ -95,8 +94,7 @@ In the Discord, you can:
 - 🚀 Show off projects built with AI agents
 - 🤝 Collaborate with other developers and contributors

-👉 **Join the Discord:**  
-https://discord.gg/jjem7aEDKU
+👉 **Join the Discord:** [Agentic Jumpstart Discord](https://discord.gg/jjem7aEDKU)

 ---

@@ -104,28 +102,31 @@ https://discord.gg/jjem7aEDKU

 ### Prerequisites

- Node.js 18+
- npm
- [Claude Code CLI](https://code.claude.com/docs/en/overview) installed and authenticated
+- **Node.js 22+** (required: >=22.0.0 <23.0.0)
+- **npm** (comes with Node.js)
+- **[Claude Code CLI](https://code.claude.com/docs/en/overview)** - Install and authenticate with your Anthropic subscription. Automaker integrates with your authenticated Claude Code CLI to access Claude models.

 ### Quick Start

 ```bash
-# 1. Clone the repo
+# 1. Clone the repository
 git clone https://github.com/AutoMaker-Org/automaker.git
 cd automaker

 # 2. Install dependencies
 npm install

-# 3. Build local shared packages
-npm run build:packages
-
-# 4. Run Automaker (pick your mode)
+# 3. Start Automaker
 npm run dev
-# Then choose your run mode when prompted, or use specific commands below
+# Choose between:
+#   1. Web Application (browser at localhost:3007)
+#   2. Desktop Application (Electron - recommended)
 ```

+**Authentication:** Automaker integrates with your authenticated Claude Code CLI. Make sure you have [installed and authenticated](https://code.claude.com/docs/en/quickstart) the Claude Code CLI before running Automaker. Your CLI credentials will be detected automatically.
+
+**For Development:** `npm run dev` starts the development server with Vite live reload and hot module replacement for fast refresh and instant updates as you make changes.
+
 ## How to Run

 ### Development Mode
@@ -161,33 +162,207 @@ npm run dev:electron:wsl:gpu
 npm run dev:web
 ```

+### Interactive TUI Launcher (Recommended for New Users)
+
+For a user-friendly interactive menu, use the built-in TUI launcher script:
+
+```bash
+# Show interactive menu with all launch options
+./start-automaker.sh
+
+# Or launch directly without menu
+./start-automaker.sh web          # Web browser
+./start-automaker.sh electron     # Desktop app
+./start-automaker.sh electron-debug  # Desktop + DevTools
+
+# Additional options
+./start-automaker.sh --help       # Show all available options
+./start-automaker.sh --version    # Show version information
+./start-automaker.sh --check-deps # Verify project dependencies
+./start-automaker.sh --no-colors  # Disable colored output
+./start-automaker.sh --no-history # Don't remember last choice
+```
+
+**Features:**
+
+- 🎨 Beautiful terminal UI with gradient colors and ASCII art
+- ⌨️ Interactive menu (press 1-3 to select, Q to exit)
+- 💾 Remembers your last choice
+- ✅ Pre-flight checks (validates Node.js, npm, dependencies)
+- 📏 Responsive layout (adapts to terminal size)
+- ⏱️ 30-second timeout for hands-free selection
+- 🌐 Cross-shell compatible (bash/zsh)
+
+**History File:**
+Your last selected mode is saved in `~/.automaker_launcher_history` for quick re-runs.
+
 ### Building for Production

+#### Web Application
+
 ```bash
-# Build Next.js app
+# Build for web deployment (uses Vite)
 npm run build
-
-# Build Electron app for distribution
-npm run build:electron
 ```

-### Running Production Build
+#### Desktop Application

 ```bash
-# Start production Next.js server
-npm run start
+# Build for current platform (macOS/Windows/Linux)
+npm run build:electron
+
+# Platform-specific builds
+npm run build:electron:mac     # macOS (DMG + ZIP, x64 + arm64)
+npm run build:electron:win     # Windows (NSIS installer, x64)
+npm run build:electron:linux   # Linux (AppImage + DEB + RPM, x64)
+
+# Output directory: apps/ui/release/
 ```

+**Linux Distribution Packages:**
+
+- **AppImage**: Universal format, works on any Linux distribution
+- **DEB**: Ubuntu, Debian, Linux Mint, Pop!\_OS
+- **RPM**: Fedora, RHEL, Rocky Linux, AlmaLinux, openSUSE
+
+**Installing on Fedora/RHEL:**
+
+```bash
+# Download the RPM package
+wget https://github.com/AutoMaker-Org/automaker/releases/latest/download/Automaker-<version>-x86_64.rpm
+
+# Install with dnf (Fedora)
+sudo dnf install ./Automaker-<version>-x86_64.rpm
+
+# Or with yum (RHEL/CentOS)
+sudo yum localinstall ./Automaker-<version>-x86_64.rpm
+```
+
+#### Docker Deployment
+
+Docker provides the most secure way to run Automaker by isolating it from your host filesystem.
+
+```bash
+# Build and run with Docker Compose
+docker-compose up -d
+
+# Access UI at http://localhost:3007
+# API at http://localhost:3008
+
+# View logs
+docker-compose logs -f
+
+# Stop containers
+docker-compose down
+```
+
+##### Authentication
+
+Automaker integrates with your authenticated Claude Code CLI. To use CLI authentication in Docker, mount your Claude CLI config directory (see [Claude CLI Authentication](#claude-cli-authentication) below).
+
+##### Working with Projects (Host Directory Access)
+
+By default, the container is isolated from your host filesystem. To work on projects from your host machine, create a `docker-compose.override.yml` file (gitignored):
+
+```yaml
+services:
+  server:
+    volumes:
+      # Mount your project directories
+      - /path/to/your/project:/projects/your-project
+```
+
+##### Claude CLI Authentication
+
+Mount your Claude CLI config directory to use your authenticated CLI credentials:
+
+```yaml
+services:
+  server:
+    volumes:
+      # Linux/macOS
+      - ~/.claude:/home/automaker/.claude
+      # Windows
+      - C:/Users/YourName/.claude:/home/automaker/.claude
+```
+
+**Note:** The Claude CLI config must be writable (do not use `:ro` flag) as the CLI writes debug files.
+
+##### GitHub CLI Authentication (For Git Push/PR Operations)
+
+To enable git push and GitHub CLI operations inside the container:
+
+```yaml
+services:
+  server:
+    volumes:
+      # Mount GitHub CLI config
+      # Linux/macOS
+      - ~/.config/gh:/home/automaker/.config/gh
+      # Windows
+      - 'C:/Users/YourName/AppData/Roaming/GitHub CLI:/home/automaker/.config/gh'
+
+      # Mount git config for user identity (name, email)
+      - ~/.gitconfig:/home/automaker/.gitconfig:ro
+    environment:
+      # GitHub token (required on Windows where tokens are in Credential Manager)
+      # Get your token with: gh auth token
+      - GH_TOKEN=${GH_TOKEN}
+```
+
+Then add `GH_TOKEN` to your `.env` file:
+
+```bash
+GH_TOKEN=gho_your_github_token_here
+```
+
+##### Complete docker-compose.override.yml Example
+
+```yaml
+services:
+  server:
+    volumes:
+      # Your projects
+      - /path/to/project1:/projects/project1
+      - /path/to/project2:/projects/project2
+
+      # Authentication configs
+      - ~/.claude:/home/automaker/.claude
+      - ~/.config/gh:/home/automaker/.config/gh
+      - ~/.gitconfig:/home/automaker/.gitconfig:ro
+    environment:
+      - GH_TOKEN=${GH_TOKEN}
+```
+
+##### Architecture Support
+
+The Docker image supports both AMD64 and ARM64 architectures. The GitHub CLI and Claude CLI are automatically downloaded for the correct architecture during build.
+
 ### Testing

-```bash
-# Run tests headless
-npm run test
+#### End-to-End Tests (Playwright)

-# Run tests with browser visible
-npm run test:headed
+```bash
+npm run test            # Headless E2E tests
+npm run test:headed     # Browser visible E2E tests
 ```

+#### Unit Tests (Vitest)
+
+```bash
+npm run test:server              # Server unit tests
+npm run test:server:coverage     # Server tests with coverage
+npm run test:packages            # All shared package tests
+npm run test:all                 # Packages + server tests
+```
+
+#### Test Configuration
+
+- E2E tests run on ports 3007 (UI) and 3008 (server)
+- Automatically starts test servers before running
+- Uses Chromium browser via Playwright
+- Mock agent mode available in CI with `AUTOMAKER_MOCK_AGENT=true`
+
 ### Linting

 ```bash
@@ -195,59 +370,279 @@ npm run test:headed
 npm run lint
 ```

-### Authentication Options
+### Environment Configuration

-Automaker supports multiple authentication methods (in order of priority):
+#### Optional - Server

-| Method           | Environment Variable | Description                     |
-| ---------------- | -------------------- | ------------------------------- |
-| API Key (env)    | `ANTHROPIC_API_KEY`  | Anthropic API key               |
-| API Key (stored) | —                    | Anthropic API key stored in app |
+- `PORT` - Server port (default: 3008)
+- `DATA_DIR` - Data storage directory (default: ./data)
+- `ENABLE_REQUEST_LOGGING` - HTTP request logging (default: true)

-### Persistent Setup (Optional)
+#### Optional - Security

-Add to your `~/.bashrc` or `~/.zshrc`:
+- `AUTOMAKER_API_KEY` - Optional API authentication for the server
+- `ALLOWED_ROOT_DIRECTORY` - Restrict file operations to specific directory
+- `CORS_ORIGIN` - CORS allowed origins (comma-separated list; defaults to localhost only)

-```bash
-export ANTHROPIC_API_KEY="YOUR_API_KEY_HERE"
-```
+#### Optional - Development

-Then restart your terminal or run `source ~/.bashrc`.
+- `VITE_SKIP_ELECTRON` - Skip Electron in dev mode
+- `OPEN_DEVTOOLS` - Auto-open DevTools in Electron
+- `AUTOMAKER_SKIP_SANDBOX_WARNING` - Skip sandbox warning dialog (useful for dev/CI)
+- `AUTOMAKER_AUTO_LOGIN=true` - Skip login prompt in development (ignored when NODE_ENV=production)
+
+### Authentication Setup
+
+Automaker integrates with your authenticated Claude Code CLI and uses your Anthropic subscription.
+
+Install and authenticate the Claude Code CLI following the [official quickstart guide](https://code.claude.com/docs/en/quickstart).
+
+Once authenticated, Automaker will automatically detect and use your CLI credentials. No additional configuration needed!

 ## Features

+### Core Workflow
+
 - 📋 **Kanban Board** - Visual drag-and-drop board to manage features through backlog, in progress, waiting approval, and verified stages
 - 🤖 **AI Agent Integration** - Automatic AI agent assignment to implement features when moved to "In Progress"
- 🧠 **Multi-Model Support** - Choose from multiple AI models including Claude Opus, Sonnet, and more
- 💭 **Extended Thinking** - Enable extended thinking modes for complex problem-solving
- 📡 **Real-time Agent Output** - View live agent output, logs, and file diffs as features are being implemented
- 🔍 **Project Analysis** - AI-powered project structure analysis to understand your codebase
- 📁 **Context Management** - Add context files to help AI agents understand your project better
- 💡 **Feature Suggestions** - AI-generated feature suggestions based on your project
- 🖼️ **Image Support** - Attach images and screenshots to feature descriptions
- ⚡ **Concurrent Processing** - Configure concurrency to process multiple features simultaneously
- 🧪 **Test Integration** - Automatic test running and verification for implemented features
- 🔀 **Git Integration** - View git diffs and track changes made by AI agents
- 👤 **AI Profiles** - Create and manage different AI agent profiles for various tasks
- 💬 **Chat History** - Keep track of conversations and interactions with AI agents
- ⌨️ **Keyboard Shortcuts** - Efficient navigation and actions via keyboard shortcuts
- 🎨 **Dark/Light Theme** - Beautiful UI with theme support
- 🖥️ **Cross-Platform** - Desktop application built with Electron for Windows, macOS, and Linux
+- 🔀 **Git Worktree Isolation** - Each feature executes in isolated git worktrees to protect your main branch
+- 📡 **Real-time Streaming** - Watch AI agents work in real-time with live tool usage, progress updates, and task completion
+- 🔄 **Follow-up Instructions** - Send additional instructions to running agents without stopping them
+
+### AI & Planning
+
+- 🧠 **Multi-Model Support** - Choose from Claude Opus, Sonnet, and Haiku per feature
+- 💭 **Extended Thinking** - Enable thinking modes (none, medium, deep, ultra) for complex problem-solving
+- 📝 **Planning Modes** - Four planning levels: skip (direct implementation), lite (quick plan), spec (task breakdown), full (phased execution)
+- ✅ **Plan Approval** - Review and approve AI-generated plans before implementation begins
+- 📊 **Multi-Agent Task Execution** - Spec mode spawns dedicated agents per task for focused implementation
+
+### Project Management
+
+- 🔍 **Project Analysis** - AI-powered codebase analysis to understand your project structure
+- 💡 **Feature Suggestions** - AI-generated feature suggestions based on project analysis
+- 📁 **Context Management** - Add markdown, images, and documentation files that agents automatically reference
+- 🔗 **Dependency Blocking** - Features can depend on other features, enforcing execution order
+- 🌳 **Graph View** - Visualize feature dependencies with interactive graph visualization
+- 📋 **GitHub Integration** - Import issues, validate feasibility, and convert to tasks automatically
+
+### Collaboration & Review
+
+- 🧪 **Verification Workflow** - Features move to "Waiting Approval" for review and testing
+- 💬 **Agent Chat** - Interactive chat sessions with AI agents for exploratory work
+- 👤 **AI Profiles** - Create custom agent configurations with different prompts, models, and settings
+- 📜 **Session History** - Persistent chat sessions across restarts with full conversation history
+- 🔍 **Git Diff Viewer** - Review changes made by agents before approving
+
+### Developer Tools
+
+- 🖥️ **Integrated Terminal** - Full terminal access with tabs, splits, and persistent sessions
+- 🖼️ **Image Support** - Attach screenshots and diagrams to feature descriptions for visual context
+- ⚡ **Concurrent Execution** - Configure how many features can run simultaneously (default: 3)
+- ⌨️ **Keyboard Shortcuts** - Fully customizable shortcuts for navigation and actions
+- 🎨 **Theme System** - 25+ themes including Dark, Light, Dracula, Nord, Catppuccin, and more
+- 🖥️ **Cross-Platform** - Desktop app for macOS (x64, arm64), Windows (x64), and Linux (x64)
+- 🌐 **Web Mode** - Run in browser or as Electron desktop app
+
+### Advanced Features
+
+- 🔐 **Docker Isolation** - Security-focused Docker deployment with no host filesystem access
+- 🎯 **Worktree Management** - Create, switch, commit, and create PRs from worktrees
+- 📊 **Usage Tracking** - Monitor Claude API usage with detailed metrics
+- 🔊 **Audio Notifications** - Optional completion sounds (mutable in settings)
+- 💾 **Auto-save** - All work automatically persisted to `.automaker/` directory

 ## Tech Stack

- [Next.js](https://nextjs.org) - React framework
- [Electron](https://www.electronjs.org/) - Desktop application framework
- [Tailwind CSS](https://tailwindcss.com/) - Styling
- [Zustand](https://zustand-demo.pmnd.rs/) - State management
- [dnd-kit](https://dndkit.com/) - Drag and drop functionality
+### Frontend
+
+- **React 19** - UI framework
+- **Vite 7** - Build tool and development server
+- **Electron 39** - Desktop application framework
+- **TypeScript 5.9** - Type safety
+- **TanStack Router** - File-based routing
+- **Zustand 5** - State management with persistence
+- **Tailwind CSS 4** - Utility-first styling with 25+ themes
+- **Radix UI** - Accessible component primitives
+- **dnd-kit** - Drag and drop for Kanban board
+- **@xyflow/react** - Graph visualization for dependencies
+- **xterm.js** - Integrated terminal emulator
+- **CodeMirror 6** - Code editor for XML/syntax highlighting
+- **Lucide Icons** - Icon library
+
+### Backend
+
+- **Node.js** - JavaScript runtime with ES modules
+- **Express 5** - HTTP server framework
+- **TypeScript 5.9** - Type safety
+- **Claude Agent SDK** - AI agent integration (@anthropic-ai/claude-agent-sdk)
+- **WebSocket (ws)** - Real-time event streaming
+- **node-pty** - PTY terminal sessions
+
+### Testing & Quality
+
+- **Playwright** - End-to-end testing
+- **Vitest** - Unit testing framework
+- **ESLint 9** - Code linting
+- **Prettier 3** - Code formatting
+- **Husky** - Git hooks for pre-commit formatting
+
+### Shared Libraries
+
+- **@automaker/types** - Shared TypeScript definitions
+- **@automaker/utils** - Logging, error handling, image processing
+- **@automaker/prompts** - AI prompt templates
+- **@automaker/platform** - Path management and security
+- **@automaker/model-resolver** - Claude model alias resolution
+- **@automaker/dependency-resolver** - Feature dependency ordering
+- **@automaker/git-utils** - Git operations and worktree management
+
+## Available Views
+
+Automaker provides several specialized views accessible via the sidebar or keyboard shortcuts:
+
+| View               | Shortcut | Description                                                                                      |
+| ------------------ | -------- | ------------------------------------------------------------------------------------------------ |
+| **Board**          | `K`      | Kanban board for managing feature workflow (Backlog → In Progress → Waiting Approval → Verified) |
+| **Agent**          | `A`      | Interactive chat sessions with AI agents for exploratory work and questions                      |
+| **Spec**           | `D`      | Project specification editor with AI-powered generation and feature suggestions                  |
+| **Context**        | `C`      | Manage context files (markdown, images) that AI agents automatically reference                   |
+| **Settings**       | `S`      | Configure themes, shortcuts, defaults, authentication, and more                                  |
+| **Terminal**       | `T`      | Integrated terminal with tabs, splits, and persistent sessions                                   |
+| **Graph**          | `H`      | Visualize feature dependencies with interactive graph visualization                              |
+| **Ideation**       | `I`      | Brainstorm and generate ideas with AI assistance                                                 |
+| **Memory**         | `Y`      | View and manage agent memory and conversation history                                            |
+| **GitHub Issues**  | `G`      | Import and validate GitHub issues, convert to tasks                                              |
+| **GitHub PRs**     | `R`      | View and manage GitHub pull requests                                                             |
+| **Running Agents** | -        | View all active agents across projects with status and progress                                  |
+
+### Keyboard Navigation
+
+All shortcuts are customizable in Settings. Default shortcuts:
+
+- **Navigation:** `K` (Board), `A` (Agent), `D` (Spec), `C` (Context), `S` (Settings), `T` (Terminal), `H` (Graph), `I` (Ideation), `Y` (Memory), `G` (GitHub Issues), `R` (GitHub PRs)
+- **UI:** `` ` `` (Toggle sidebar)
+- **Actions:** `N` (New item in current view), `O` (Open project), `P` (Project picker)
+- **Projects:** `Q`/`E` (Cycle previous/next project)
+- **Terminal:** `Alt+D` (Split right), `Alt+S` (Split down), `Alt+W` (Close), `Alt+T` (New tab)
+
+## Architecture
+
+### Monorepo Structure
+
+Automaker is built as an npm workspace monorepo with two main applications and seven shared packages:
+
+```text
+automaker/
+├── apps/
+│   ├── ui/          # React + Vite + Electron frontend
+│   └── server/      # Express + WebSocket backend
+└── libs/            # Shared packages
+    ├── types/                  # Core TypeScript definitions
+    ├── utils/                  # Logging, errors, utilities
+    ├── prompts/                # AI prompt templates
+    ├── platform/               # Path management, security
+    ├── model-resolver/         # Claude model aliasing
+    ├── dependency-resolver/    # Feature dependency ordering
+    └── git-utils/              # Git operations & worktree management
+```
+
+### How It Works
+
+1. **Feature Definition** - Users create feature cards on the Kanban board with descriptions, images, and configuration
+2. **Git Worktree Creation** - When a feature starts, a git worktree is created for isolated development
+3. **Agent Execution** - Claude Agent SDK executes in the worktree with full file system and command access
+4. **Real-time Streaming** - Agent output streams via WebSocket to the frontend for live monitoring
+5. **Plan Approval** (optional) - For spec/full planning modes, agents generate plans that require user approval
+6. **Multi-Agent Tasks** (spec mode) - Each task in the spec gets a dedicated agent for focused implementation
+7. **Verification** - Features move to "Waiting Approval" where changes can be reviewed via git diff
+8. **Integration** - After approval, changes can be committed and PRs created from the worktree
+
+### Key Architectural Patterns
+
+- **Event-Driven Architecture** - All server operations emit events that stream to the frontend
+- **Provider Pattern** - Extensible AI provider system (currently Claude, designed for future providers)
+- **Service-Oriented Backend** - Modular services for agent management, features, terminals, settings
+- **State Management** - Zustand with persistence for frontend state across restarts
+- **File-Based Storage** - No database; features stored as JSON files in `.automaker/` directory
+
+### Security & Isolation
+
+- **Git Worktrees** - Each feature executes in an isolated git worktree, protecting your main branch
+- **Path Sandboxing** - Optional `ALLOWED_ROOT_DIRECTORY` restricts file access
+- **Docker Isolation** - Recommended deployment uses Docker with no host filesystem access
+- **Plan Approval** - Optional plan review before implementation prevents unwanted changes
+
+### Data Storage
+
+Automaker uses a file-based storage system (no database required):
+
+#### Per-Project Data
+
+Stored in `{projectPath}/.automaker/`:
+
+```text
+.automaker/
+├── features/              # Feature JSON files and images
+│   └── {featureId}/
+│       ├── feature.json   # Feature metadata
+│       ├── agent-output.md # AI agent output log
+│       └── images/        # Attached images
+├── context/               # Context files for AI agents
+├── worktrees/             # Git worktree metadata
+├── validations/           # GitHub issue validation results
+├── ideation/              # Brainstorming and analysis data
+│   └── analysis.json      # Project structure analysis
+├── board/                 # Board-related data
+├── images/                # Project-level images
+├── settings.json          # Project-specific settings
+├── app_spec.txt           # Project specification (XML format)
+├── active-branches.json   # Active git branches tracking
+└── execution-state.json   # Auto-mode execution state
+```
+
+#### Global Data
+
+Stored in `DATA_DIR` (default `./data`):
+
+```text
+data/
+├── settings.json          # Global settings, profiles, shortcuts
+├── credentials.json       # API keys (encrypted)
+├── sessions-metadata.json # Chat session metadata
+└── agent-sessions/        # Conversation histories
+    └── {sessionId}.json
+```
+
+---
+
+> **[!CAUTION]**
+>
+> ## Security Disclaimer
+>
+> **This software uses AI-powered tooling that has access to your operating system and can read, modify, and delete files. Use at your own risk.**
+>
+> We have reviewed this codebase for security vulnerabilities, but you assume all risk when running this software. You should review the code yourself before running it.
+>
+> **We do not recommend running Automaker directly on your local computer** due to the risk of AI agents having access to your entire file system. Please sandbox this application using Docker or a virtual machine.
+>
+> **[Read the full disclaimer](./DISCLAIMER.md)**
+
+---

 ## Learn More

-To learn more about Next.js, take a look at the following resources:
+### Documentation

- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
+- [Contributing Guide](./CONTRIBUTING.md) - How to contribute to Automaker
+- [Project Documentation](./docs/) - Architecture guides, patterns, and developer docs
+- [Shared Packages Guide](./docs/llm-shared-packages.md) - Using monorepo packages
+
+### Community
+
+Join the **Agentic Jumpstart** Discord to connect with other builders exploring **agentic coding**:
+
+👉 [Agentic Jumpstart Discord](https://discord.gg/jjem7aEDKU)

 ## License

--- a/apps/server/.env.example
+++ b/apps/server/.env.example
@@ -8,6 +8,20 @@
 # Your Anthropic API key for Claude models
 ANTHROPIC_API_KEY=sk-ant-...

+# ============================================
+# OPTIONAL - Additional API Keys
+# ============================================
+
+# OpenAI API key for Codex/GPT models
+OPENAI_API_KEY=sk-...
+
+# Cursor API key for Cursor models
+CURSOR_API_KEY=...
+
+# OAuth credentials for CLI authentication (extracted automatically)
+CLAUDE_OAUTH_CREDENTIALS=
+CURSOR_AUTH_TOKEN=
+
 # ============================================
 # OPTIONAL - Security
 # ============================================
@@ -24,12 +38,17 @@ ALLOWED_ROOT_DIRECTORY=

 # CORS origin - which domains can access the API
 # Use "*" for development, set specific origin for production
-CORS_ORIGIN=*
+CORS_ORIGIN=http://localhost:3007

 # ============================================
 # OPTIONAL - Server
 # ============================================

+# Host to bind the server to (default: 0.0.0.0)
+# Use 0.0.0.0 to listen on all interfaces (recommended for Docker/remote access)
+# Use 127.0.0.1 or localhost to restrict to local connections only
+HOST=0.0.0.0
+
 # Port to run the server on
 PORT=3008

@@ -48,3 +67,23 @@ TERMINAL_ENABLED=true
 TERMINAL_PASSWORD=

 ENABLE_REQUEST_LOGGING=false
+
+# ============================================
+# OPTIONAL - UI Behavior
+# ============================================
+
+# Skip the sandbox warning dialog on startup (default: false)
+# Set to "true" to disable the warning entirely (useful for dev/CI environments)
+AUTOMAKER_SKIP_SANDBOX_WARNING=false
+
+# ============================================
+# OPTIONAL - Debugging
+# ============================================
+
+# Enable raw output logging for agent streams (default: false)
+# When enabled, saves unprocessed stream events to raw-output.jsonl
+# in each feature's directory (.automaker/features/{id}/raw-output.jsonl)
+# Useful for debugging provider streaming issues, improving log parsing,
+# or analyzing how different providers (Claude, Cursor) stream responses
+# Note: This adds disk I/O overhead, only enable when debugging
+AUTOMAKER_DEBUG_RAW_OUTPUT=false
--- a/apps/server/Dockerfile
+++ b/apps/server/Dockerfile
@@ -1,67 +0,0 @@
-# Automaker Backend Server
-# Multi-stage build for minimal production image
-
-# Build stage
-FROM node:20-alpine AS builder
-
-# Install build dependencies for native modules (node-pty)
-RUN apk add --no-cache python3 make g++
-
-WORKDIR /app
-
-# Copy package files and scripts needed for postinstall
-COPY package*.json ./
-COPY apps/server/package*.json ./apps/server/
-COPY scripts ./scripts
-
-# Install dependencies
-RUN npm ci --workspace=apps/server
-
-# Copy source
-COPY apps/server ./apps/server
-
-# Build TypeScript
-RUN npm run build --workspace=apps/server
-
-# Production stage
-FROM node:20-alpine
-
-# Install git, curl, and GitHub CLI (pinned version for reproducible builds)
-RUN apk add --no-cache git curl && \
-    GH_VERSION="2.63.2" && \
-    curl -L "https://github.com/cli/cli/releases/download/v${GH_VERSION}/gh_${GH_VERSION}_linux_amd64.tar.gz" -o gh.tar.gz && \
-    tar -xzf gh.tar.gz && \
-    mv "gh_${GH_VERSION}_linux_amd64/bin/gh" /usr/local/bin/gh && \
-    rm -rf gh.tar.gz "gh_${GH_VERSION}_linux_amd64"
-
-WORKDIR /app
-
-# Create non-root user
-RUN addgroup -g 1001 -S automaker && \
-    adduser -S automaker -u 1001
-
-# Copy built files and production dependencies
-COPY --from=builder /app/apps/server/dist ./dist
-COPY --from=builder /app/apps/server/package*.json ./
-COPY --from=builder /app/node_modules ./node_modules
-
-# Create data directory
-RUN mkdir -p /data && chown automaker:automaker /data
-
-# Switch to non-root user
-USER automaker
-
-# Environment variables
-ENV NODE_ENV=production
-ENV PORT=3008
-ENV DATA_DIR=/data
-
-# Expose port
-EXPOSE 3008
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
-    CMD wget --no-verbose --tries=1 --spider http://localhost:3008/api/health || exit 1
-
-# Start server
-CMD ["node", "dist/index.js"]
--- a/apps/server/package.json
+++ b/apps/server/package.json
@@ -1,14 +1,18 @@
 {
  "name": "@automaker/server",
-  "version": "0.1.0",
+  "version": "0.13.0",
  "description": "Backend server for Automaker - provides API for both web and Electron modes",
  "author": "AutoMaker Team",
  "license": "SEE LICENSE IN LICENSE",
  "private": true,
+  "engines": {
+    "node": ">=22.0.0 <23.0.0"
+  },
  "type": "module",
  "main": "dist/index.js",
  "scripts": {
    "dev": "tsx watch src/index.ts",
+    "dev:test": "tsx src/index.ts",
    "build": "tsc",
    "start": "node dist/index.js",
    "lint": "eslint src/",
@@ -20,31 +24,38 @@
    "test:unit": "vitest run tests/unit"
  },
  "dependencies": {
-    "@anthropic-ai/claude-agent-sdk": "^0.1.72",
-    "@automaker/dependency-resolver": "^1.0.0",
-    "@automaker/git-utils": "^1.0.0",
-    "@automaker/model-resolver": "^1.0.0",
-    "@automaker/platform": "^1.0.0",
-    "@automaker/prompts": "^1.0.0",
-    "@automaker/types": "^1.0.0",
-    "@automaker/utils": "^1.0.0",
-    "cors": "^2.8.5",
-    "dotenv": "^17.2.3",
-    "express": "^5.2.1",
-    "morgan": "^1.10.1",
+    "@anthropic-ai/claude-agent-sdk": "0.1.76",
+    "@automaker/dependency-resolver": "1.0.0",
+    "@automaker/git-utils": "1.0.0",
+    "@automaker/model-resolver": "1.0.0",
+    "@automaker/platform": "1.0.0",
+    "@automaker/prompts": "1.0.0",
+    "@automaker/types": "1.0.0",
+    "@automaker/utils": "1.0.0",
+    "@github/copilot-sdk": "^0.1.16",
+    "@modelcontextprotocol/sdk": "1.25.2",
+    "@openai/codex-sdk": "^0.77.0",
+    "cookie-parser": "1.4.7",
+    "cors": "2.8.5",
+    "dotenv": "17.2.3",
+    "express": "5.2.1",
+    "morgan": "1.10.1",
    "node-pty": "1.1.0-beta41",
-    "ws": "^8.18.3"
+    "ws": "8.18.3",
+    "yaml": "2.7.0"
  },
  "devDependencies": {
-    "@types/cors": "^2.8.19",
-    "@types/express": "^5.0.6",
-    "@types/morgan": "^1.9.10",
-    "@types/node": "^22",
-    "@types/ws": "^8.18.1",
-    "@vitest/coverage-v8": "^4.0.16",
-    "@vitest/ui": "^4.0.16",
-    "tsx": "^4.21.0",
-    "typescript": "^5",
-    "vitest": "^4.0.16"
+    "@types/cookie": "0.6.0",
+    "@types/cookie-parser": "1.4.10",
+    "@types/cors": "2.8.19",
+    "@types/express": "5.0.6",
+    "@types/morgan": "1.9.10",
+    "@types/node": "22.19.3",
+    "@types/ws": "8.18.1",
+    "@vitest/coverage-v8": "4.0.16",
+    "@vitest/ui": "4.0.16",
+    "tsx": "4.21.0",
+    "typescript": "5.9.3",
+    "vitest": "4.0.16"
  }
 }
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -9,15 +9,32 @@
 import express from 'express';
 import cors from 'cors';
 import morgan from 'morgan';
+import cookieParser from 'cookie-parser';
+import cookie from 'cookie';
 import { WebSocketServer, WebSocket } from 'ws';
 import { createServer } from 'http';
 import dotenv from 'dotenv';

 import { createEventEmitter, type EventEmitter } from './lib/events.js';
-import { initAllowedPaths } from '@automaker/platform';
-import { authMiddleware, getAuthStatus } from './lib/auth.js';
+import { initAllowedPaths, getClaudeAuthIndicators } from '@automaker/platform';
+import { createLogger, setLogLevel, LogLevel } from '@automaker/utils';
+
+const logger = createLogger('Server');
+
+/**
+ * Map server log level string to LogLevel enum
+ */
+const LOG_LEVEL_MAP: Record<string, LogLevel> = {
+  error: LogLevel.ERROR,
+  warn: LogLevel.WARN,
+  info: LogLevel.INFO,
+  debug: LogLevel.DEBUG,
+};
+import { authMiddleware, validateWsConnectionToken, checkRawAuthentication } from './lib/auth.js';
+import { requireJsonContentType } from './middleware/require-json-content-type.js';
+import { createAuthRoutes } from './routes/auth/index.js';
 import { createFsRoutes } from './routes/fs/index.js';
-import { createHealthRoutes } from './routes/health/index.js';
+import { createHealthRoutes, createDetailedHandler } from './routes/health/index.js';
 import { createAgentRoutes } from './routes/agent/index.js';
 import { createSessionsRoutes } from './routes/sessions/index.js';
 import { createFeaturesRoutes } from './routes/features/index.js';
@@ -26,7 +43,6 @@ import { createEnhancePromptRoutes } from './routes/enhance-prompt/index.js';
 import { createWorktreeRoutes } from './routes/worktree/index.js';
 import { createGitRoutes } from './routes/git/index.js';
 import { createSetupRoutes } from './routes/setup/index.js';
-import { createSuggestionsRoutes } from './routes/suggestions/index.js';
 import { createModelsRoutes } from './routes/models/index.js';
 import { createRunningAgentsRoutes } from './routes/running-agents/index.js';
 import { createWorkspaceRoutes } from './routes/workspace/index.js';
@@ -46,37 +62,119 @@ import { SettingsService } from './services/settings-service.js';
 import { createSpecRegenerationRoutes } from './routes/app-spec/index.js';
 import { createClaudeRoutes } from './routes/claude/index.js';
 import { ClaudeUsageService } from './services/claude-usage-service.js';
+import { createCodexRoutes } from './routes/codex/index.js';
+import { CodexUsageService } from './services/codex-usage-service.js';
+import { CodexAppServerService } from './services/codex-app-server-service.js';
+import { CodexModelCacheService } from './services/codex-model-cache-service.js';
 import { createGitHubRoutes } from './routes/github/index.js';
 import { createContextRoutes } from './routes/context/index.js';
+import { createBacklogPlanRoutes } from './routes/backlog-plan/index.js';
 import { cleanupStaleValidations } from './routes/github/routes/validation-common.js';
+import { createMCPRoutes } from './routes/mcp/index.js';
+import { MCPTestService } from './services/mcp-test-service.js';
+import { createPipelineRoutes } from './routes/pipeline/index.js';
+import { pipelineService } from './services/pipeline-service.js';
+import { createIdeationRoutes } from './routes/ideation/index.js';
+import { IdeationService } from './services/ideation-service.js';
+import { getDevServerService } from './services/dev-server-service.js';
+import { eventHookService } from './services/event-hook-service.js';
+import { createNotificationsRoutes } from './routes/notifications/index.js';
+import { getNotificationService } from './services/notification-service.js';
+import { createEventHistoryRoutes } from './routes/event-history/index.js';
+import { getEventHistoryService } from './services/event-history-service.js';
+import { getTestRunnerService } from './services/test-runner-service.js';
+import { createProjectsRoutes } from './routes/projects/index.js';

 // Load environment variables
 dotenv.config();

 const PORT = parseInt(process.env.PORT || '3008', 10);
+const HOST = process.env.HOST || '0.0.0.0';
+const HOSTNAME = process.env.HOSTNAME || 'localhost';
 const DATA_DIR = process.env.DATA_DIR || './data';
-const ENABLE_REQUEST_LOGGING = process.env.ENABLE_REQUEST_LOGGING !== 'false'; // Default to true
+logger.info('[SERVER_STARTUP] process.env.DATA_DIR:', process.env.DATA_DIR);
+logger.info('[SERVER_STARTUP] Resolved DATA_DIR:', DATA_DIR);
+logger.info('[SERVER_STARTUP] process.cwd():', process.cwd());
+const ENABLE_REQUEST_LOGGING_DEFAULT = process.env.ENABLE_REQUEST_LOGGING !== 'false'; // Default to true

-// Check for required environment variables
-const hasAnthropicKey = !!process.env.ANTHROPIC_API_KEY;
+// Runtime-configurable request logging flag (can be changed via settings)
+let requestLoggingEnabled = ENABLE_REQUEST_LOGGING_DEFAULT;

-if (!hasAnthropicKey) {
-  console.warn(`
-╔═══════════════════════════════════════════════════════════════════════╗
-║  ⚠️  WARNING: No Claude authentication configured                      ║
-║                                                                       ║
-║  The Claude Agent SDK requires authentication to function.            ║
-║                                                                       ║
-║  Set your Anthropic API key:                                          ║
-║    export ANTHROPIC_API_KEY="sk-ant-..."                              ║
-║                                                                       ║
-║  Or use the setup wizard in Settings to configure authentication.     ║
-╚═══════════════════════════════════════════════════════════════════════╝
-`);
-} else {
-  console.log('[Server] ✓ ANTHROPIC_API_KEY detected (API key auth)');
+/**
+ * Enable or disable HTTP request logging at runtime
+ */
+export function setRequestLoggingEnabled(enabled: boolean): void {
+  requestLoggingEnabled = enabled;
 }

+/**
+ * Get current request logging state
+ */
+export function isRequestLoggingEnabled(): boolean {
+  return requestLoggingEnabled;
+}
+
+// Width for log box content (excluding borders)
+const BOX_CONTENT_WIDTH = 67;
+
+// Check for Claude authentication (async - runs in background)
+// The Claude Agent SDK can use either ANTHROPIC_API_KEY or Claude Code CLI authentication
+(async () => {
+  const hasAnthropicKey = !!process.env.ANTHROPIC_API_KEY;
+
+  if (hasAnthropicKey) {
+    logger.info('✓ ANTHROPIC_API_KEY detected');
+    return;
+  }
+
+  // Check for Claude Code CLI authentication
+  try {
+    const indicators = await getClaudeAuthIndicators();
+    const hasCliAuth =
+      indicators.hasStatsCacheWithActivity ||
+      (indicators.hasSettingsFile && indicators.hasProjectsSessions) ||
+      (indicators.hasCredentialsFile &&
+        (indicators.credentials?.hasOAuthToken || indicators.credentials?.hasApiKey));
+
+    if (hasCliAuth) {
+      logger.info('✓ Claude Code CLI authentication detected');
+      return;
+    }
+  } catch (error) {
+    // Ignore errors checking CLI auth - will fall through to warning
+    logger.warn('Error checking for Claude Code CLI authentication:', error);
+  }
+
+  // No authentication found - show warning
+  const wHeader = '⚠️  WARNING: No Claude authentication configured'.padEnd(BOX_CONTENT_WIDTH);
+  const w1 = 'The Claude Agent SDK requires authentication to function.'.padEnd(BOX_CONTENT_WIDTH);
+  const w2 = 'Options:'.padEnd(BOX_CONTENT_WIDTH);
+  const w3 = '1. Install Claude Code CLI and authenticate with subscription'.padEnd(
+    BOX_CONTENT_WIDTH
+  );
+  const w4 = '2. Set your Anthropic API key:'.padEnd(BOX_CONTENT_WIDTH);
+  const w5 = '   export ANTHROPIC_API_KEY="sk-ant-..."'.padEnd(BOX_CONTENT_WIDTH);
+  const w6 = '3. Use the setup wizard in Settings to configure authentication.'.padEnd(
+    BOX_CONTENT_WIDTH
+  );
+
+  logger.warn(`
+╔═════════════════════════════════════════════════════════════════════╗
+║  ${wHeader}║
+╠═════════════════════════════════════════════════════════════════════╣
+║                                                                     ║
+║  ${w1}║
+║                                                                     ║
+║  ${w2}║
+║  ${w3}║
+║  ${w4}║
+║  ${w5}║
+║  ${w6}║
+║                                                                     ║
+╚═════════════════════════════════════════════════════════════════════╝
+`);
+})();
+
 // Initialize security
 initAllowedPaths();

@@ -84,44 +182,149 @@ initAllowedPaths();
 const app = express();

 // Middleware
-// Custom colored logger showing only endpoint and status code (configurable via ENABLE_REQUEST_LOGGING env var)
-if (ENABLE_REQUEST_LOGGING) {
-  morgan.token('status-colored', (req, res) => {
-    const status = res.statusCode;
-    if (status >= 500) return `\x1b[31m${status}\x1b[0m`; // Red for server errors
-    if (status >= 400) return `\x1b[33m${status}\x1b[0m`; // Yellow for client errors
-    if (status >= 300) return `\x1b[36m${status}\x1b[0m`; // Cyan for redirects
-    return `\x1b[32m${status}\x1b[0m`; // Green for success
-  });
+// Custom colored logger showing only endpoint and status code (dynamically configurable)
+morgan.token('status-colored', (_req, res) => {
+  const status = res.statusCode;
+  if (status >= 500) return `\x1b[31m${status}\x1b[0m`; // Red for server errors
+  if (status >= 400) return `\x1b[33m${status}\x1b[0m`; // Yellow for client errors
+  if (status >= 300) return `\x1b[36m${status}\x1b[0m`; // Cyan for redirects
+  return `\x1b[32m${status}\x1b[0m`; // Green for success
+});

-  app.use(
-    morgan(':method :url :status-colored', {
-      skip: (req) => req.url === '/api/health', // Skip health check logs
-    })
-  );
-}
+app.use(
+  morgan(':method :url :status-colored', {
+    // Skip when request logging is disabled or for health check endpoints
+    skip: (req) => !requestLoggingEnabled || req.url === '/api/health',
+  })
+);
+// CORS configuration
+// When using credentials (cookies), origin cannot be '*'
+// We dynamically allow the requesting origin for local development
 app.use(
  cors({
-    origin: process.env.CORS_ORIGIN || '*',
+    origin: (origin, callback) => {
+      // Allow requests with no origin (like mobile apps, curl, Electron)
+      if (!origin) {
+        callback(null, true);
+        return;
+      }
+
+      // If CORS_ORIGIN is set, use it (can be comma-separated list)
+      const allowedOrigins = process.env.CORS_ORIGIN?.split(',').map((o) => o.trim());
+      if (allowedOrigins && allowedOrigins.length > 0 && allowedOrigins[0] !== '*') {
+        if (allowedOrigins.includes(origin)) {
+          callback(null, origin);
+        } else {
+          callback(new Error('Not allowed by CORS'));
+        }
+        return;
+      }
+
+      // For local development, allow all localhost/loopback origins (any port)
+      try {
+        const url = new URL(origin);
+        const hostname = url.hostname;
+
+        if (
+          hostname === 'localhost' ||
+          hostname === '127.0.0.1' ||
+          hostname === '::1' ||
+          hostname === '0.0.0.0' ||
+          hostname.startsWith('192.168.') ||
+          hostname.startsWith('10.') ||
+          hostname.startsWith('172.')
+        ) {
+          callback(null, origin);
+          return;
+        }
+      } catch (err) {
+        // Ignore URL parsing errors
+      }
+
+      // Reject other origins by default for security
+      callback(new Error('Not allowed by CORS'));
+    },
    credentials: true,
  })
 );
 app.use(express.json({ limit: '50mb' }));
+app.use(cookieParser());

 // Create shared event emitter for streaming
 const events: EventEmitter = createEventEmitter();

 // Create services
-const agentService = new AgentService(DATA_DIR, events);
-const featureLoader = new FeatureLoader();
-const autoModeService = new AutoModeService(events);
+// Note: settingsService is created first so it can be injected into other services
 const settingsService = new SettingsService(DATA_DIR);
+const agentService = new AgentService(DATA_DIR, events, settingsService);
+const featureLoader = new FeatureLoader();
+const autoModeService = new AutoModeService(events, settingsService);
 const claudeUsageService = new ClaudeUsageService();
+const codexAppServerService = new CodexAppServerService();
+const codexModelCacheService = new CodexModelCacheService(DATA_DIR, codexAppServerService);
+const codexUsageService = new CodexUsageService(codexAppServerService);
+const mcpTestService = new MCPTestService(settingsService);
+const ideationService = new IdeationService(events, settingsService, featureLoader);
+
+// Initialize DevServerService with event emitter for real-time log streaming
+const devServerService = getDevServerService();
+devServerService.setEventEmitter(events);
+
+// Initialize Notification Service with event emitter for real-time updates
+const notificationService = getNotificationService();
+notificationService.setEventEmitter(events);
+
+// Initialize Event History Service
+const eventHistoryService = getEventHistoryService();
+
+// Initialize Test Runner Service with event emitter for real-time test output streaming
+const testRunnerService = getTestRunnerService();
+testRunnerService.setEventEmitter(events);
+
+// Initialize Event Hook Service for custom event triggers (with history storage)
+eventHookService.initialize(events, settingsService, eventHistoryService, featureLoader);

 // Initialize services
 (async () => {
+  // Migrate settings from legacy Electron userData location if needed
+  // This handles users upgrading from versions that stored settings in ~/.config/Automaker (Linux),
+  // ~/Library/Application Support/Automaker (macOS), or %APPDATA%\Automaker (Windows)
+  // to the new shared ./data directory
+  try {
+    const migrationResult = await settingsService.migrateFromLegacyElectronPath();
+    if (migrationResult.migrated) {
+      logger.info(`Settings migrated from legacy location: ${migrationResult.legacyPath}`);
+      logger.info(`Migrated files: ${migrationResult.migratedFiles.join(', ')}`);
+    }
+    if (migrationResult.errors.length > 0) {
+      logger.warn('Migration errors:', migrationResult.errors);
+    }
+  } catch (err) {
+    logger.warn('Failed to check for legacy settings migration:', err);
+  }
+
+  // Apply logging settings from saved settings
+  try {
+    const settings = await settingsService.getGlobalSettings();
+    if (settings.serverLogLevel && LOG_LEVEL_MAP[settings.serverLogLevel] !== undefined) {
+      setLogLevel(LOG_LEVEL_MAP[settings.serverLogLevel]);
+      logger.info(`Server log level set to: ${settings.serverLogLevel}`);
+    }
+    // Apply request logging setting (default true if not set)
+    const enableRequestLog = settings.enableRequestLogging ?? true;
+    setRequestLoggingEnabled(enableRequestLog);
+    logger.info(`HTTP request logging: ${enableRequestLog ? 'enabled' : 'disabled'}`);
+  } catch (err) {
+    logger.warn('Failed to load logging settings, using defaults');
+  }
+
  await agentService.initialize();
-  console.log('[Server] Agent service initialized');
+  logger.info('Agent service initialized');
+
+  // Bootstrap Codex model cache in background (don't block server startup)
+  void codexModelCacheService.getModels().catch((err) => {
+    logger.error('Failed to bootstrap Codex model cache:', err);
+  });
 })();

 // Run stale validation cleanup every hour to prevent memory leaks from crashed validations
@@ -129,36 +332,57 @@ const VALIDATION_CLEANUP_INTERVAL_MS = 60 * 60 * 1000; // 1 hour
 setInterval(() => {
  const cleaned = cleanupStaleValidations();
  if (cleaned > 0) {
-    console.log(`[Server] Cleaned up ${cleaned} stale validation entries`);
+    logger.info(`Cleaned up ${cleaned} stale validation entries`);
  }
 }, VALIDATION_CLEANUP_INTERVAL_MS);

-// Mount API routes - health is unauthenticated for monitoring
+// Require Content-Type: application/json for all API POST/PUT/PATCH requests
+// This helps prevent CSRF and content-type confusion attacks
+app.use('/api', requireJsonContentType);
+
+// Mount API routes - health, auth, and setup are unauthenticated
 app.use('/api/health', createHealthRoutes());
+app.use('/api/auth', createAuthRoutes());
+app.use('/api/setup', createSetupRoutes());

 // Apply authentication to all other routes
 app.use('/api', authMiddleware);

+// Protected health endpoint with detailed info
+app.get('/api/health/detailed', createDetailedHandler());
+
 app.use('/api/fs', createFsRoutes(events));
 app.use('/api/agent', createAgentRoutes(agentService, events));
 app.use('/api/sessions', createSessionsRoutes(agentService));
-app.use('/api/features', createFeaturesRoutes(featureLoader));
+app.use(
+  '/api/features',
+  createFeaturesRoutes(featureLoader, settingsService, events, autoModeService)
+);
 app.use('/api/auto-mode', createAutoModeRoutes(autoModeService));
-app.use('/api/enhance-prompt', createEnhancePromptRoutes());
-app.use('/api/worktree', createWorktreeRoutes());
+app.use('/api/enhance-prompt', createEnhancePromptRoutes(settingsService));
+app.use('/api/worktree', createWorktreeRoutes(events, settingsService));
 app.use('/api/git', createGitRoutes());
-app.use('/api/setup', createSetupRoutes());
-app.use('/api/suggestions', createSuggestionsRoutes(events));
 app.use('/api/models', createModelsRoutes());
-app.use('/api/spec-regeneration', createSpecRegenerationRoutes(events));
+app.use('/api/spec-regeneration', createSpecRegenerationRoutes(events, settingsService));
 app.use('/api/running-agents', createRunningAgentsRoutes(autoModeService));
 app.use('/api/workspace', createWorkspaceRoutes());
 app.use('/api/templates', createTemplatesRoutes());
 app.use('/api/terminal', createTerminalRoutes());
 app.use('/api/settings', createSettingsRoutes(settingsService));
 app.use('/api/claude', createClaudeRoutes(claudeUsageService));
-app.use('/api/github', createGitHubRoutes(events));
-app.use('/api/context', createContextRoutes());
+app.use('/api/codex', createCodexRoutes(codexUsageService, codexModelCacheService));
+app.use('/api/github', createGitHubRoutes(events, settingsService));
+app.use('/api/context', createContextRoutes(settingsService));
+app.use('/api/backlog-plan', createBacklogPlanRoutes(events, settingsService));
+app.use('/api/mcp', createMCPRoutes(mcpTestService));
+app.use('/api/pipeline', createPipelineRoutes(pipelineService));
+app.use('/api/ideation', createIdeationRoutes(events, ideationService, featureLoader));
+app.use('/api/notifications', createNotificationsRoutes(notificationService));
+app.use('/api/event-history', createEventHistoryRoutes(eventHistoryService, settingsService));
+app.use(
+  '/api/projects',
+  createProjectsRoutes(featureLoader, autoModeService, settingsService, notificationService)
+);

 // Create HTTP server
 const server = createServer(app);
@@ -168,10 +392,55 @@ const wss = new WebSocketServer({ noServer: true });
 const terminalWss = new WebSocketServer({ noServer: true });
 const terminalService = getTerminalService();

+/**
+ * Authenticate WebSocket upgrade requests
+ * Checks for API key in header/query, session token in header/query, OR valid session cookie
+ */
+function authenticateWebSocket(request: import('http').IncomingMessage): boolean {
+  const url = new URL(request.url || '', `http://${request.headers.host}`);
+
+  // Convert URL search params to query object
+  const query: Record<string, string | undefined> = {};
+  url.searchParams.forEach((value, key) => {
+    query[key] = value;
+  });
+
+  // Parse cookies from header
+  const cookieHeader = request.headers.cookie;
+  const cookies = cookieHeader ? cookie.parse(cookieHeader) : {};
+
+  // Use shared authentication logic for standard auth methods
+  if (
+    checkRawAuthentication(
+      request.headers as Record<string, string | string[] | undefined>,
+      query,
+      cookies
+    )
+  ) {
+    return true;
+  }
+
+  // Additionally check for short-lived WebSocket connection token (WebSocket-specific)
+  const wsToken = url.searchParams.get('wsToken');
+  if (wsToken && validateWsConnectionToken(wsToken)) {
+    return true;
+  }
+
+  return false;
+}
+
 // Handle HTTP upgrade requests manually to route to correct WebSocket server
 server.on('upgrade', (request, socket, head) => {
  const { pathname } = new URL(request.url || '', `http://${request.headers.host}`);

+  // Authenticate all WebSocket connections
+  if (!authenticateWebSocket(request)) {
+    logger.info('Authentication failed, rejecting connection');
+    socket.write('HTTP/1.1 401 Unauthorized\r\n\r\n');
+    socket.destroy();
+    return;
+  }
+
  if (pathname === '/api/events') {
    wss.handleUpgrade(request, socket, head, (ws) => {
      wss.emit('connection', ws, request);
@@ -187,22 +456,38 @@ server.on('upgrade', (request, socket, head) => {

 // Events WebSocket connection handler
 wss.on('connection', (ws: WebSocket) => {
-  console.log('[WebSocket] Client connected');
+  logger.info('Client connected, ready state:', ws.readyState);

  // Subscribe to all events and forward to this client
  const unsubscribe = events.subscribe((type, payload) => {
+    logger.info('Event received:', {
+      type,
+      hasPayload: !!payload,
+      payloadKeys: payload ? Object.keys(payload) : [],
+      wsReadyState: ws.readyState,
+      wsOpen: ws.readyState === WebSocket.OPEN,
+    });
+
    if (ws.readyState === WebSocket.OPEN) {
-      ws.send(JSON.stringify({ type, payload }));
+      const message = JSON.stringify({ type, payload });
+      logger.info('Sending event to client:', {
+        type,
+        messageLength: message.length,
+        sessionId: (payload as any)?.sessionId,
+      });
+      ws.send(message);
+    } else {
+      logger.info('WARNING: Cannot send event, WebSocket not open. ReadyState:', ws.readyState);
    }
  });

  ws.on('close', () => {
-    console.log('[WebSocket] Client disconnected');
+    logger.info('Client disconnected');
    unsubscribe();
  });

  ws.on('error', (error) => {
-    console.error('[WebSocket] Error:', error);
+    logger.error('ERROR:', error);
    unsubscribe();
  });
 });
@@ -229,24 +514,24 @@ terminalWss.on('connection', (ws: WebSocket, req: import('http').IncomingMessage
  const sessionId = url.searchParams.get('sessionId');
  const token = url.searchParams.get('token');

-  console.log(`[Terminal WS] Connection attempt for session: ${sessionId}`);
+  logger.info(`Connection attempt for session: ${sessionId}`);

  // Check if terminal is enabled
  if (!isTerminalEnabled()) {
-    console.log('[Terminal WS] Terminal is disabled');
+    logger.info('Terminal is disabled');
    ws.close(4003, 'Terminal access is disabled');
    return;
  }

  // Validate token if password is required
  if (isTerminalPasswordRequired() && !validateTerminalToken(token || undefined)) {
-    console.log('[Terminal WS] Invalid or missing token');
+    logger.info('Invalid or missing token');
    ws.close(4001, 'Authentication required');
    return;
  }

  if (!sessionId) {
-    console.log('[Terminal WS] No session ID provided');
+    logger.info('No session ID provided');
    ws.close(4002, 'Session ID required');
    return;
  }
@@ -254,12 +539,12 @@ terminalWss.on('connection', (ws: WebSocket, req: import('http').IncomingMessage
  // Check if session exists
  const session = terminalService.getSession(sessionId);
  if (!session) {
-    console.log(`[Terminal WS] Session ${sessionId} not found`);
+    logger.info(`Session ${sessionId} not found`);
    ws.close(4004, 'Session not found');
    return;
  }

-  console.log(`[Terminal WS] Client connected to session ${sessionId}`);
+  logger.info(`Client connected to session ${sessionId}`);

  // Track this connection
  if (!terminalConnections.has(sessionId)) {
@@ -375,15 +660,15 @@ terminalWss.on('connection', (ws: WebSocket, req: import('http').IncomingMessage
          break;

        default:
-          console.warn(`[Terminal WS] Unknown message type: ${msg.type}`);
+          logger.warn(`Unknown message type: ${msg.type}`);
      }
    } catch (error) {
-      console.error('[Terminal WS] Error processing message:', error);
+      logger.error('Error processing message:', error);
    }
  });

  ws.on('close', () => {
-    console.log(`[Terminal WS] Client disconnected from session ${sessionId}`);
+    logger.info(`Client disconnected from session ${sessionId}`);
    unsubscribeData();
    unsubscribeExit();

@@ -402,79 +687,149 @@ terminalWss.on('connection', (ws: WebSocket, req: import('http').IncomingMessage
  });

  ws.on('error', (error) => {
-    console.error(`[Terminal WS] Error on session ${sessionId}:`, error);
+    logger.error(`Error on session ${sessionId}:`, error);
    unsubscribeData();
    unsubscribeExit();
  });
 });

 // Start server with error handling for port conflicts
-const startServer = (port: number) => {
-  server.listen(port, () => {
+const startServer = (port: number, host: string) => {
+  server.listen(port, host, () => {
    const terminalStatus = isTerminalEnabled()
      ? isTerminalPasswordRequired()
        ? 'enabled (password protected)'
        : 'enabled'
      : 'disabled';
-    const portStr = port.toString().padEnd(4);
-    console.log(`
-╔═══════════════════════════════════════════════════════╗
-║           Automaker Backend Server                    ║
-╠═══════════════════════════════════════════════════════╣
-║  HTTP API:    http://localhost:${portStr}                 ║
-║  WebSocket:   ws://localhost:${portStr}/api/events        ║
-║  Terminal:    ws://localhost:${portStr}/api/terminal/ws   ║
-║  Health:      http://localhost:${portStr}/api/health      ║
-║  Terminal:    ${terminalStatus.padEnd(37)}║
-╚═══════════════════════════════════════════════════════╝
+
+    // Build URLs for display
+    const listenAddr = `${host}:${port}`;
+    const httpUrl = `http://${HOSTNAME}:${port}`;
+    const wsEventsUrl = `ws://${HOSTNAME}:${port}/api/events`;
+    const wsTerminalUrl = `ws://${HOSTNAME}:${port}/api/terminal/ws`;
+    const healthUrl = `http://${HOSTNAME}:${port}/api/health`;
+
+    const sHeader = '🚀 Automaker Backend Server'.padEnd(BOX_CONTENT_WIDTH);
+    const s1 = `Listening:    ${listenAddr}`.padEnd(BOX_CONTENT_WIDTH);
+    const s2 = `HTTP API:     ${httpUrl}`.padEnd(BOX_CONTENT_WIDTH);
+    const s3 = `WebSocket:    ${wsEventsUrl}`.padEnd(BOX_CONTENT_WIDTH);
+    const s4 = `Terminal WS:  ${wsTerminalUrl}`.padEnd(BOX_CONTENT_WIDTH);
+    const s5 = `Health:       ${healthUrl}`.padEnd(BOX_CONTENT_WIDTH);
+    const s6 = `Terminal:     ${terminalStatus}`.padEnd(BOX_CONTENT_WIDTH);
+
+    logger.info(`
+╔═════════════════════════════════════════════════════════════════════╗
+║  ${sHeader}║
+╠═════════════════════════════════════════════════════════════════════╣
+║                                                                     ║
+║  ${s1}║
+║  ${s2}║
+║  ${s3}║
+║  ${s4}║
+║  ${s5}║
+║  ${s6}║
+║                                                                     ║
+╚═════════════════════════════════════════════════════════════════════╝
 `);
  });

  server.on('error', (error: NodeJS.ErrnoException) => {
    if (error.code === 'EADDRINUSE') {
-      console.error(`
-╔═══════════════════════════════════════════════════════╗
-║  ❌ ERROR: Port ${port} is already in use              ║
-╠═══════════════════════════════════════════════════════╣
-║  Another process is using this port.                  ║
-║                                                       ║
-║  To fix this, try one of:                             ║
-║                                                       ║
-║  1. Kill the process using the port:                  ║
-║     lsof -ti:${port} | xargs kill -9                   ║
-║                                                       ║
-║  2. Use a different port:                             ║
-║     PORT=${port + 1} npm run dev:server                ║
-║                                                       ║
-║  3. Use the init.sh script which handles this:        ║
-║     ./init.sh                                         ║
-╚═══════════════════════════════════════════════════════╝
+      const portStr = port.toString();
+      const nextPortStr = (port + 1).toString();
+      const killCmd = `lsof -ti:${portStr} | xargs kill -9`;
+      const altCmd = `PORT=${nextPortStr} npm run dev:server`;
+
+      const eHeader = `❌ ERROR: Port ${portStr} is already in use`.padEnd(BOX_CONTENT_WIDTH);
+      const e1 = 'Another process is using this port.'.padEnd(BOX_CONTENT_WIDTH);
+      const e2 = 'To fix this, try one of:'.padEnd(BOX_CONTENT_WIDTH);
+      const e3 = '1. Kill the process using the port:'.padEnd(BOX_CONTENT_WIDTH);
+      const e4 = `   ${killCmd}`.padEnd(BOX_CONTENT_WIDTH);
+      const e5 = '2. Use a different port:'.padEnd(BOX_CONTENT_WIDTH);
+      const e6 = `   ${altCmd}`.padEnd(BOX_CONTENT_WIDTH);
+      const e7 = '3. Use the init.sh script which handles this:'.padEnd(BOX_CONTENT_WIDTH);
+      const e8 = '   ./init.sh'.padEnd(BOX_CONTENT_WIDTH);
+
+      logger.error(`
+╔═════════════════════════════════════════════════════════════════════╗
+║  ${eHeader}║
+╠═════════════════════════════════════════════════════════════════════╣
+║                                                                     ║
+║  ${e1}║
+║                                                                     ║
+║  ${e2}║
+║                                                                     ║
+║  ${e3}║
+║  ${e4}║
+║                                                                     ║
+║  ${e5}║
+║  ${e6}║
+║                                                                     ║
+║  ${e7}║
+║  ${e8}║
+║                                                                     ║
+╚═════════════════════════════════════════════════════════════════════╝
 `);
      process.exit(1);
    } else {
-      console.error('[Server] Error starting server:', error);
+      logger.error('Error starting server:', error);
      process.exit(1);
    }
  });
 };

-startServer(PORT);
+startServer(PORT, HOST);
+
+// Global error handlers to prevent crashes from uncaught errors
+process.on('unhandledRejection', (reason: unknown, _promise: Promise<unknown>) => {
+  logger.error('Unhandled Promise Rejection:', {
+    reason: reason instanceof Error ? reason.message : String(reason),
+    stack: reason instanceof Error ? reason.stack : undefined,
+  });
+  // Don't exit - log the error and continue running
+  // This prevents the server from crashing due to unhandled rejections
+});
+
+process.on('uncaughtException', (error: Error) => {
+  logger.error('Uncaught Exception:', {
+    message: error.message,
+    stack: error.stack,
+  });
+  // Exit on uncaught exceptions to prevent undefined behavior
+  // The process is in an unknown state after an uncaught exception
+  process.exit(1);
+});
+
+// Graceful shutdown timeout (30 seconds)
+const SHUTDOWN_TIMEOUT_MS = 30000;
+
+// Graceful shutdown helper
+const gracefulShutdown = async (signal: string) => {
+  logger.info(`${signal} received, shutting down...`);
+
+  // Set up a force-exit timeout to prevent hanging
+  const forceExitTimeout = setTimeout(() => {
+    logger.error(`Shutdown timed out after ${SHUTDOWN_TIMEOUT_MS}ms, forcing exit`);
+    process.exit(1);
+  }, SHUTDOWN_TIMEOUT_MS);
+
+  // Mark all running features as interrupted before shutdown
+  // This ensures they can be resumed when the server restarts
+  // Note: markAllRunningFeaturesInterrupted handles errors internally and never rejects
+  await autoModeService.markAllRunningFeaturesInterrupted(`${signal} signal received`);

-// Graceful shutdown
-process.on('SIGTERM', () => {
-  console.log('SIGTERM received, shutting down...');
  terminalService.cleanup();
  server.close(() => {
-    console.log('Server closed');
+    clearTimeout(forceExitTimeout);
+    logger.info('Server closed');
    process.exit(0);
  });
+};
+
+process.on('SIGTERM', () => {
+  gracefulShutdown('SIGTERM');
 });

 process.on('SIGINT', () => {
-  console.log('SIGINT received, shutting down...');
-  terminalService.cleanup();
-  server.close(() => {
-    console.log('Server closed');
-    process.exit(0);
-  });
+  gracefulShutdown('SIGINT');
 });
--- a/apps/server/src/lib/agent-discovery.ts
+++ b/apps/server/src/lib/agent-discovery.ts
@@ -0,0 +1,257 @@
+/**
+ * Agent Discovery - Scans filesystem for AGENT.md files
+ *
+ * Discovers agents from:
+ * - ~/.claude/agents/ (user-level, global)
+ * - .claude/agents/ (project-level)
+ *
+ * Similar to Skills, but for custom subagents defined in AGENT.md files.
+ */
+
+import path from 'path';
+import os from 'os';
+import { createLogger } from '@automaker/utils';
+import { secureFs, systemPaths } from '@automaker/platform';
+import type { AgentDefinition } from '@automaker/types';
+
+const logger = createLogger('AgentDiscovery');
+
+export interface FilesystemAgent {
+  name: string; // Directory name (e.g., 'code-reviewer')
+  definition: AgentDefinition;
+  source: 'user' | 'project';
+  filePath: string; // Full path to AGENT.md
+}
+
+/**
+ * Parse agent content string into AgentDefinition
+ * Format:
+ * ---
+ * name: agent-name  # Optional
+ * description: When to use this agent
+ * tools: tool1, tool2, tool3  # Optional (comma or space separated list)
+ * model: sonnet  # Optional: sonnet, opus, haiku
+ * ---
+ * System prompt content here...
+ */
+function parseAgentContent(content: string, filePath: string): AgentDefinition | null {
+  // Extract frontmatter
+  const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
+  if (!frontmatterMatch) {
+    logger.warn(`Invalid agent file format (missing frontmatter): ${filePath}`);
+    return null;
+  }
+
+  const [, frontmatter, prompt] = frontmatterMatch;
+
+  // Parse description (required)
+  const description = frontmatter.match(/description:\s*(.+)/)?.[1]?.trim();
+  if (!description) {
+    logger.warn(`Missing description in agent file: ${filePath}`);
+    return null;
+  }
+
+  // Parse tools (optional) - supports both comma-separated and space-separated
+  const toolsMatch = frontmatter.match(/tools:\s*(.+)/);
+  const tools = toolsMatch
+    ? toolsMatch[1]
+        .split(/[,\s]+/) // Split by comma or whitespace
+        .map((t) => t.trim())
+        .filter((t) => t && t !== '')
+    : undefined;
+
+  // Parse model (optional) - validate against allowed values
+  const modelMatch = frontmatter.match(/model:\s*(\w+)/);
+  const modelValue = modelMatch?.[1]?.trim();
+  const validModels = ['sonnet', 'opus', 'haiku', 'inherit'] as const;
+  const model =
+    modelValue && validModels.includes(modelValue as (typeof validModels)[number])
+      ? (modelValue as 'sonnet' | 'opus' | 'haiku' | 'inherit')
+      : undefined;
+
+  if (modelValue && !model) {
+    logger.warn(
+      `Invalid model "${modelValue}" in agent file: ${filePath}. Expected one of: ${validModels.join(', ')}`
+    );
+  }
+
+  return {
+    description,
+    prompt: prompt.trim(),
+    tools,
+    model,
+  };
+}
+
+/**
+ * Directory entry with type information
+ */
+interface DirEntry {
+  name: string;
+  isFile: boolean;
+  isDirectory: boolean;
+}
+
+/**
+ * Filesystem adapter interface for abstracting systemPaths vs secureFs
+ */
+interface FsAdapter {
+  exists: (filePath: string) => Promise<boolean>;
+  readdir: (dirPath: string) => Promise<DirEntry[]>;
+  readFile: (filePath: string) => Promise<string>;
+}
+
+/**
+ * Create a filesystem adapter for system paths (user directory)
+ */
+function createSystemPathAdapter(): FsAdapter {
+  return {
+    exists: (filePath) => Promise.resolve(systemPaths.systemPathExists(filePath)),
+    readdir: async (dirPath) => {
+      const entryNames = await systemPaths.systemPathReaddir(dirPath);
+      const entries: DirEntry[] = [];
+      for (const name of entryNames) {
+        const stat = await systemPaths.systemPathStat(path.join(dirPath, name));
+        entries.push({
+          name,
+          isFile: stat.isFile(),
+          isDirectory: stat.isDirectory(),
+        });
+      }
+      return entries;
+    },
+    readFile: (filePath) => systemPaths.systemPathReadFile(filePath, 'utf-8') as Promise<string>,
+  };
+}
+
+/**
+ * Create a filesystem adapter for project paths (secureFs)
+ */
+function createSecureFsAdapter(): FsAdapter {
+  return {
+    exists: (filePath) =>
+      secureFs
+        .access(filePath)
+        .then(() => true)
+        .catch(() => false),
+    readdir: async (dirPath) => {
+      const entries = await secureFs.readdir(dirPath, { withFileTypes: true });
+      return entries.map((entry) => ({
+        name: entry.name,
+        isFile: entry.isFile(),
+        isDirectory: entry.isDirectory(),
+      }));
+    },
+    readFile: (filePath) => secureFs.readFile(filePath, 'utf-8') as Promise<string>,
+  };
+}
+
+/**
+ * Parse agent file using the provided filesystem adapter
+ */
+async function parseAgentFileWithAdapter(
+  filePath: string,
+  fsAdapter: FsAdapter
+): Promise<AgentDefinition | null> {
+  try {
+    const content = await fsAdapter.readFile(filePath);
+    return parseAgentContent(content, filePath);
+  } catch (error) {
+    logger.error(`Failed to parse agent file: ${filePath}`, error);
+    return null;
+  }
+}
+
+/**
+ * Scan a directory for agent .md files
+ * Agents can be in two formats:
+ * 1. Flat: agent-name.md (file directly in agents/)
+ * 2. Subdirectory: agent-name/AGENT.md (folder + file, similar to Skills)
+ */
+async function scanAgentsDirectory(
+  baseDir: string,
+  source: 'user' | 'project'
+): Promise<FilesystemAgent[]> {
+  const agents: FilesystemAgent[] = [];
+  const fsAdapter = source === 'user' ? createSystemPathAdapter() : createSecureFsAdapter();
+
+  try {
+    // Check if directory exists
+    const exists = await fsAdapter.exists(baseDir);
+    if (!exists) {
+      logger.debug(`Directory does not exist: ${baseDir}`);
+      return agents;
+    }
+
+    // Read all entries in the directory
+    const entries = await fsAdapter.readdir(baseDir);
+
+    for (const entry of entries) {
+      // Check for flat .md file format (agent-name.md)
+      if (entry.isFile && entry.name.endsWith('.md')) {
+        const agentName = entry.name.slice(0, -3); // Remove .md extension
+        const agentFilePath = path.join(baseDir, entry.name);
+        const definition = await parseAgentFileWithAdapter(agentFilePath, fsAdapter);
+        if (definition) {
+          agents.push({
+            name: agentName,
+            definition,
+            source,
+            filePath: agentFilePath,
+          });
+          logger.debug(`Discovered ${source} agent (flat): ${agentName}`);
+        }
+      }
+      // Check for subdirectory format (agent-name/AGENT.md)
+      else if (entry.isDirectory) {
+        const agentFilePath = path.join(baseDir, entry.name, 'AGENT.md');
+        const agentFileExists = await fsAdapter.exists(agentFilePath);
+
+        if (agentFileExists) {
+          const definition = await parseAgentFileWithAdapter(agentFilePath, fsAdapter);
+          if (definition) {
+            agents.push({
+              name: entry.name,
+              definition,
+              source,
+              filePath: agentFilePath,
+            });
+            logger.debug(`Discovered ${source} agent (subdirectory): ${entry.name}`);
+          }
+        }
+      }
+    }
+  } catch (error) {
+    logger.error(`Failed to scan agents directory: ${baseDir}`, error);
+  }
+
+  return agents;
+}
+
+/**
+ * Discover all filesystem-based agents from user and project sources
+ */
+export async function discoverFilesystemAgents(
+  projectPath?: string,
+  sources: Array<'user' | 'project'> = ['user', 'project']
+): Promise<FilesystemAgent[]> {
+  const agents: FilesystemAgent[] = [];
+
+  // Discover user-level agents from ~/.claude/agents/
+  if (sources.includes('user')) {
+    const userAgentsDir = path.join(os.homedir(), '.claude', 'agents');
+    const userAgents = await scanAgentsDirectory(userAgentsDir, 'user');
+    agents.push(...userAgents);
+    logger.info(`Discovered ${userAgents.length} user-level agents from ${userAgentsDir}`);
+  }
+
+  // Discover project-level agents from .claude/agents/
+  if (sources.includes('project') && projectPath) {
+    const projectAgentsDir = path.join(projectPath, '.claude', 'agents');
+    const projectAgents = await scanAgentsDirectory(projectAgentsDir, 'project');
+    agents.push(...projectAgents);
+    logger.info(`Discovered ${projectAgents.length} project-level agents from ${projectAgentsDir}`);
+  }
+
+  return agents;
+}
--- a/apps/server/src/lib/app-spec-format.ts
+++ b/apps/server/src/lib/app-spec-format.ts
@@ -11,8 +11,12 @@ export { specOutputSchema } from '@automaker/types';

 /**
 * Escape special XML characters
+ * Handles undefined/null values by converting them to empty strings
 */
-function escapeXml(str: string): string {
+export function escapeXml(str: string | undefined | null): string {
+  if (str == null) {
+    return '';
+  }
  return str
    .replace(/&/g, '&amp;')
    .replace(/</g, '&lt;')
--- a/apps/server/src/lib/auth-utils.ts
+++ b/apps/server/src/lib/auth-utils.ts
@@ -0,0 +1,263 @@
+/**
+ * Secure authentication utilities that avoid environment variable race conditions
+ */
+
+import { spawn } from 'child_process';
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('AuthUtils');
+
+export interface SecureAuthEnv {
+  [key: string]: string | undefined;
+}
+
+export interface AuthValidationResult {
+  isValid: boolean;
+  error?: string;
+  normalizedKey?: string;
+}
+
+/**
+ * Validates API key format without modifying process.env
+ */
+export function validateApiKey(
+  key: string,
+  provider: 'anthropic' | 'openai' | 'cursor'
+): AuthValidationResult {
+  if (!key || typeof key !== 'string' || key.trim().length === 0) {
+    return { isValid: false, error: 'API key is required' };
+  }
+
+  const trimmedKey = key.trim();
+
+  switch (provider) {
+    case 'anthropic':
+      if (!trimmedKey.startsWith('sk-ant-')) {
+        return {
+          isValid: false,
+          error: 'Invalid Anthropic API key format. Should start with "sk-ant-"',
+        };
+      }
+      if (trimmedKey.length < 20) {
+        return { isValid: false, error: 'Anthropic API key too short' };
+      }
+      break;
+
+    case 'openai':
+      if (!trimmedKey.startsWith('sk-')) {
+        return { isValid: false, error: 'Invalid OpenAI API key format. Should start with "sk-"' };
+      }
+      if (trimmedKey.length < 20) {
+        return { isValid: false, error: 'OpenAI API key too short' };
+      }
+      break;
+
+    case 'cursor':
+      // Cursor API keys might have different format
+      if (trimmedKey.length < 10) {
+        return { isValid: false, error: 'Cursor API key too short' };
+      }
+      break;
+  }
+
+  return { isValid: true, normalizedKey: trimmedKey };
+}
+
+/**
+ * Creates a secure environment object for authentication testing
+ * without modifying the global process.env
+ */
+export function createSecureAuthEnv(
+  authMethod: 'cli' | 'api_key',
+  apiKey?: string,
+  provider: 'anthropic' | 'openai' | 'cursor' = 'anthropic'
+): SecureAuthEnv {
+  const env: SecureAuthEnv = { ...process.env };
+
+  if (authMethod === 'cli') {
+    // For CLI auth, remove the API key to force CLI authentication
+    const envKey = provider === 'openai' ? 'OPENAI_API_KEY' : 'ANTHROPIC_API_KEY';
+    delete env[envKey];
+  } else if (authMethod === 'api_key' && apiKey) {
+    // For API key auth, validate and set the provided key
+    const validation = validateApiKey(apiKey, provider);
+    if (!validation.isValid) {
+      throw new Error(validation.error);
+    }
+    const envKey = provider === 'openai' ? 'OPENAI_API_KEY' : 'ANTHROPIC_API_KEY';
+    env[envKey] = validation.normalizedKey;
+  }
+
+  return env;
+}
+
+/**
+ * Creates a temporary environment override for the current process
+ * WARNING: This should only be used in isolated contexts and immediately cleaned up
+ */
+export function createTempEnvOverride(authEnv: SecureAuthEnv): () => void {
+  const originalEnv = { ...process.env };
+
+  // Apply the auth environment
+  Object.assign(process.env, authEnv);
+
+  // Return cleanup function
+  return () => {
+    // Restore original environment
+    Object.keys(process.env).forEach((key) => {
+      if (!(key in originalEnv)) {
+        delete process.env[key];
+      }
+    });
+    Object.assign(process.env, originalEnv);
+  };
+}
+
+/**
+ * Spawns a process with secure environment isolation
+ */
+export function spawnSecureAuth(
+  command: string,
+  args: string[],
+  authEnv: SecureAuthEnv,
+  options: {
+    cwd?: string;
+    timeout?: number;
+  } = {}
+): Promise<{ stdout: string; stderr: string; exitCode: number | null }> {
+  return new Promise((resolve, reject) => {
+    const { cwd = process.cwd(), timeout = 30000 } = options;
+
+    logger.debug(`Spawning secure auth process: ${command} ${args.join(' ')}`);
+
+    const child = spawn(command, args, {
+      cwd,
+      env: authEnv,
+      stdio: 'pipe',
+      shell: false,
+    });
+
+    let stdout = '';
+    let stderr = '';
+    let isResolved = false;
+
+    const timeoutId = setTimeout(() => {
+      if (!isResolved) {
+        child.kill('SIGTERM');
+        isResolved = true;
+        reject(new Error('Authentication process timed out'));
+      }
+    }, timeout);
+
+    child.stdout?.on('data', (data) => {
+      stdout += data.toString();
+    });
+
+    child.stderr?.on('data', (data) => {
+      stderr += data.toString();
+    });
+
+    child.on('close', (code) => {
+      clearTimeout(timeoutId);
+      if (!isResolved) {
+        isResolved = true;
+        resolve({ stdout, stderr, exitCode: code });
+      }
+    });
+
+    child.on('error', (error) => {
+      clearTimeout(timeoutId);
+      if (!isResolved) {
+        isResolved = true;
+        reject(error);
+      }
+    });
+  });
+}
+
+/**
+ * Safely extracts environment variable without race conditions
+ */
+export function safeGetEnv(key: string): string | undefined {
+  return process.env[key];
+}
+
+/**
+ * Checks if an environment variable would be modified without actually modifying it
+ */
+export function wouldModifyEnv(key: string, newValue: string): boolean {
+  const currentValue = safeGetEnv(key);
+  return currentValue !== newValue;
+}
+
+/**
+ * Secure auth session management
+ */
+export class AuthSessionManager {
+  private static activeSessions = new Map<string, SecureAuthEnv>();
+
+  static createSession(
+    sessionId: string,
+    authMethod: 'cli' | 'api_key',
+    apiKey?: string,
+    provider: 'anthropic' | 'openai' | 'cursor' = 'anthropic'
+  ): SecureAuthEnv {
+    const env = createSecureAuthEnv(authMethod, apiKey, provider);
+    this.activeSessions.set(sessionId, env);
+    return env;
+  }
+
+  static getSession(sessionId: string): SecureAuthEnv | undefined {
+    return this.activeSessions.get(sessionId);
+  }
+
+  static destroySession(sessionId: string): void {
+    this.activeSessions.delete(sessionId);
+  }
+
+  static cleanup(): void {
+    this.activeSessions.clear();
+  }
+}
+
+/**
+ * Rate limiting for auth attempts to prevent abuse
+ */
+export class AuthRateLimiter {
+  private attempts = new Map<string, { count: number; lastAttempt: number }>();
+
+  constructor(
+    private maxAttempts = 5,
+    private windowMs = 60000
+  ) {}
+
+  canAttempt(identifier: string): boolean {
+    const now = Date.now();
+    const record = this.attempts.get(identifier);
+
+    if (!record || now - record.lastAttempt > this.windowMs) {
+      this.attempts.set(identifier, { count: 1, lastAttempt: now });
+      return true;
+    }
+
+    if (record.count >= this.maxAttempts) {
+      return false;
+    }
+
+    record.count++;
+    record.lastAttempt = now;
+    return true;
+  }
+
+  getRemainingAttempts(identifier: string): number {
+    const record = this.attempts.get(identifier);
+    if (!record) return this.maxAttempts;
+    return Math.max(0, this.maxAttempts - record.count);
+  }
+
+  getResetTime(identifier: string): Date | null {
+    const record = this.attempts.get(identifier);
+    if (!record) return null;
+    return new Date(record.lastAttempt + this.windowMs);
+  }
+}
--- a/apps/server/src/lib/auth.ts
+++ b/apps/server/src/lib/auth.ts
@@ -1,62 +1,467 @@
 /**
 * Authentication middleware for API security
 *
- * Supports API key authentication via header or environment variable.
+ * Supports two authentication methods:
+ * 1. Header-based (X-API-Key) - Used by Electron mode
+ * 2. Cookie-based (HTTP-only session cookie) - Used by web mode
+ *
+ * Auto-generates an API key on first run if none is configured.
 */

 import type { Request, Response, NextFunction } from 'express';
+import crypto from 'crypto';
+import path from 'path';
+import * as secureFs from './secure-fs.js';
+import { createLogger } from '@automaker/utils';

-// API key from environment (optional - if not set, auth is disabled)
-const API_KEY = process.env.AUTOMAKER_API_KEY;
+const logger = createLogger('Auth');
+
+const DATA_DIR = process.env.DATA_DIR || './data';
+const API_KEY_FILE = path.join(DATA_DIR, '.api-key');
+const SESSIONS_FILE = path.join(DATA_DIR, '.sessions');
+const SESSION_COOKIE_NAME = 'automaker_session';
+const SESSION_MAX_AGE_MS = 30 * 24 * 60 * 60 * 1000; // 30 days
+const WS_TOKEN_MAX_AGE_MS = 5 * 60 * 1000; // 5 minutes for WebSocket connection tokens
+
+/**
+ * Check if an environment variable is set to 'true'
+ */
+function isEnvTrue(envVar: string | undefined): boolean {
+  return envVar === 'true';
+}
+
+// Session store - persisted to file for survival across server restarts
+const validSessions = new Map<string, { createdAt: number; expiresAt: number }>();
+
+// Short-lived WebSocket connection tokens (in-memory only, not persisted)
+const wsConnectionTokens = new Map<string, { createdAt: number; expiresAt: number }>();
+
+// Clean up expired WebSocket tokens periodically
+setInterval(() => {
+  const now = Date.now();
+  wsConnectionTokens.forEach((data, token) => {
+    if (data.expiresAt <= now) {
+      wsConnectionTokens.delete(token);
+    }
+  });
+}, 60 * 1000); // Clean up every minute
+
+/**
+ * Load sessions from file on startup
+ */
+function loadSessions(): void {
+  try {
+    if (secureFs.existsSync(SESSIONS_FILE)) {
+      const data = secureFs.readFileSync(SESSIONS_FILE, 'utf-8') as string;
+      const sessions = JSON.parse(data) as Array<
+        [string, { createdAt: number; expiresAt: number }]
+      >;
+      const now = Date.now();
+      let loadedCount = 0;
+      let expiredCount = 0;
+
+      for (const [token, session] of sessions) {
+        // Only load non-expired sessions
+        if (session.expiresAt > now) {
+          validSessions.set(token, session);
+          loadedCount++;
+        } else {
+          expiredCount++;
+        }
+      }
+
+      if (loadedCount > 0 || expiredCount > 0) {
+        logger.info(`Loaded ${loadedCount} sessions (${expiredCount} expired)`);
+      }
+    }
+  } catch (error) {
+    logger.warn('Error loading sessions:', error);
+  }
+}
+
+/**
+ * Save sessions to file (async)
+ */
+async function saveSessions(): Promise<void> {
+  try {
+    await secureFs.mkdir(path.dirname(SESSIONS_FILE), { recursive: true });
+    const sessions = Array.from(validSessions.entries());
+    await secureFs.writeFile(SESSIONS_FILE, JSON.stringify(sessions), {
+      encoding: 'utf-8',
+      mode: 0o600,
+    });
+  } catch (error) {
+    logger.error('Failed to save sessions:', error);
+  }
+}
+
+// Load existing sessions on startup
+loadSessions();
+
+/**
+ * Ensure an API key exists - either from env var, file, or generate new one.
+ * This provides CSRF protection by requiring a secret key for all API requests.
+ */
+function ensureApiKey(): string {
+  // First check environment variable (Electron passes it this way)
+  if (process.env.AUTOMAKER_API_KEY) {
+    logger.info('Using API key from environment variable');
+    return process.env.AUTOMAKER_API_KEY;
+  }
+
+  // Try to read from file
+  try {
+    if (secureFs.existsSync(API_KEY_FILE)) {
+      const key = (secureFs.readFileSync(API_KEY_FILE, 'utf-8') as string).trim();
+      if (key) {
+        logger.info('Loaded API key from file');
+        return key;
+      }
+    }
+  } catch (error) {
+    logger.warn('Error reading API key file:', error);
+  }
+
+  // Generate new key
+  const newKey = crypto.randomUUID();
+  try {
+    secureFs.mkdirSync(path.dirname(API_KEY_FILE), { recursive: true });
+    secureFs.writeFileSync(API_KEY_FILE, newKey, { encoding: 'utf-8', mode: 0o600 });
+    logger.info('Generated new API key');
+  } catch (error) {
+    logger.error('Failed to save API key:', error);
+  }
+  return newKey;
+}
+
+// API key - always generated/loaded on startup for CSRF protection
+const API_KEY = ensureApiKey();
+
+// Width for log box content (excluding borders)
+const BOX_CONTENT_WIDTH = 67;
+
+// Print API key to console for web mode users (unless suppressed for production logging)
+if (!isEnvTrue(process.env.AUTOMAKER_HIDE_API_KEY)) {
+  const autoLoginEnabled = isEnvTrue(process.env.AUTOMAKER_AUTO_LOGIN);
+  const autoLoginStatus = autoLoginEnabled ? 'enabled (auto-login active)' : 'disabled';
+
+  // Build box lines with exact padding
+  const header = '🔐 API Key for Web Mode Authentication'.padEnd(BOX_CONTENT_WIDTH);
+  const line1 = "When accessing via browser, you'll be prompted to enter this key:".padEnd(
+    BOX_CONTENT_WIDTH
+  );
+  const line2 = API_KEY.padEnd(BOX_CONTENT_WIDTH);
+  const line3 = 'In Electron mode, authentication is handled automatically.'.padEnd(
+    BOX_CONTENT_WIDTH
+  );
+  const line4 = `Auto-login (AUTOMAKER_AUTO_LOGIN): ${autoLoginStatus}`.padEnd(BOX_CONTENT_WIDTH);
+  const tipHeader = '💡 Tips'.padEnd(BOX_CONTENT_WIDTH);
+  const line5 = 'Set AUTOMAKER_API_KEY env var to use a fixed key'.padEnd(BOX_CONTENT_WIDTH);
+  const line6 = 'Set AUTOMAKER_AUTO_LOGIN=true to skip the login prompt'.padEnd(BOX_CONTENT_WIDTH);
+
+  logger.info(`
+╔═════════════════════════════════════════════════════════════════════╗
+║  ${header}║
+╠═════════════════════════════════════════════════════════════════════╣
+║                                                                     ║
+║  ${line1}║
+║                                                                     ║
+║  ${line2}║
+║                                                                     ║
+║  ${line3}║
+║                                                                     ║
+║  ${line4}║
+║                                                                     ║
+╠═════════════════════════════════════════════════════════════════════╣
+║  ${tipHeader}║
+╠═════════════════════════════════════════════════════════════════════╣
+║  ${line5}║
+║  ${line6}║
+╚═════════════════════════════════════════════════════════════════════╝
+`);
+} else {
+  logger.info('API key banner hidden (AUTOMAKER_HIDE_API_KEY=true)');
+}
+
+/**
+ * Generate a cryptographically secure session token
+ */
+function generateSessionToken(): string {
+  return crypto.randomBytes(32).toString('hex');
+}
+
+/**
+ * Create a new session and return the token
+ */
+export async function createSession(): Promise<string> {
+  const token = generateSessionToken();
+  const now = Date.now();
+  validSessions.set(token, {
+    createdAt: now,
+    expiresAt: now + SESSION_MAX_AGE_MS,
+  });
+  await saveSessions(); // Persist to file
+  return token;
+}
+
+/**
+ * Validate a session token
+ * Note: This returns synchronously but triggers async persistence if session expired
+ */
+export function validateSession(token: string): boolean {
+  const session = validSessions.get(token);
+  if (!session) return false;
+
+  if (Date.now() > session.expiresAt) {
+    validSessions.delete(token);
+    // Fire-and-forget: persist removal asynchronously
+    saveSessions().catch((err) => logger.error('Error saving sessions:', err));
+    return false;
+  }
+
+  return true;
+}
+
+/**
+ * Invalidate a session token
+ */
+export async function invalidateSession(token: string): Promise<void> {
+  validSessions.delete(token);
+  await saveSessions(); // Persist removal
+}
+
+/**
+ * Create a short-lived WebSocket connection token
+ * Used for initial WebSocket handshake authentication
+ */
+export function createWsConnectionToken(): string {
+  const token = generateSessionToken();
+  const now = Date.now();
+  wsConnectionTokens.set(token, {
+    createdAt: now,
+    expiresAt: now + WS_TOKEN_MAX_AGE_MS,
+  });
+  return token;
+}
+
+/**
+ * Validate a WebSocket connection token
+ * These tokens are single-use and short-lived (5 minutes)
+ * Token is invalidated immediately after first successful use
+ */
+export function validateWsConnectionToken(token: string): boolean {
+  const tokenData = wsConnectionTokens.get(token);
+  if (!tokenData) return false;
+
+  // Always delete the token (single-use)
+  wsConnectionTokens.delete(token);
+
+  // Check if expired
+  if (Date.now() > tokenData.expiresAt) {
+    return false;
+  }
+
+  return true;
+}
+
+/**
+ * Validate the API key using timing-safe comparison
+ * Prevents timing attacks that could leak information about the key
+ */
+export function validateApiKey(key: string): boolean {
+  if (!key || typeof key !== 'string') return false;
+
+  // Both buffers must be the same length for timingSafeEqual
+  const keyBuffer = Buffer.from(key);
+  const apiKeyBuffer = Buffer.from(API_KEY);
+
+  // If lengths differ, compare against a dummy to maintain constant time
+  if (keyBuffer.length !== apiKeyBuffer.length) {
+    crypto.timingSafeEqual(apiKeyBuffer, apiKeyBuffer);
+    return false;
+  }
+
+  return crypto.timingSafeEqual(keyBuffer, apiKeyBuffer);
+}
+
+/**
+ * Get session cookie options
+ */
+export function getSessionCookieOptions(): {
+  httpOnly: boolean;
+  secure: boolean;
+  sameSite: 'strict' | 'lax' | 'none';
+  maxAge: number;
+  path: string;
+} {
+  return {
+    httpOnly: true, // JavaScript cannot access this cookie
+    secure: process.env.NODE_ENV === 'production', // HTTPS only in production
+    sameSite: 'lax', // Sent for same-site requests and top-level navigations, but not cross-origin fetch/XHR
+    maxAge: SESSION_MAX_AGE_MS,
+    path: '/',
+  };
+}
+
+/**
+ * Get the session cookie name
+ */
+export function getSessionCookieName(): string {
+  return SESSION_COOKIE_NAME;
+}
+
+/**
+ * Authentication result type
+ */
+type AuthResult =
+  | { authenticated: true }
+  | { authenticated: false; errorType: 'invalid_api_key' | 'invalid_session' | 'no_auth' };
+
+/**
+ * Core authentication check - shared between middleware and status check
+ * Extracts auth credentials from various sources and validates them
+ */
+function checkAuthentication(
+  headers: Record<string, string | string[] | undefined>,
+  query: Record<string, string | undefined>,
+  cookies: Record<string, string | undefined>
+): AuthResult {
+  // Check for API key in header (Electron mode)
+  const headerKey = headers['x-api-key'] as string | undefined;
+  if (headerKey) {
+    if (validateApiKey(headerKey)) {
+      return { authenticated: true };
+    }
+    return { authenticated: false, errorType: 'invalid_api_key' };
+  }
+
+  // Check for session token in header (web mode with explicit token)
+  const sessionTokenHeader = headers['x-session-token'] as string | undefined;
+  if (sessionTokenHeader) {
+    if (validateSession(sessionTokenHeader)) {
+      return { authenticated: true };
+    }
+    return { authenticated: false, errorType: 'invalid_session' };
+  }
+
+  // Check for API key in query parameter (fallback)
+  const queryKey = query.apiKey;
+  if (queryKey) {
+    if (validateApiKey(queryKey)) {
+      return { authenticated: true };
+    }
+    return { authenticated: false, errorType: 'invalid_api_key' };
+  }
+
+  // Check for session token in query parameter (web mode - needed for image loads)
+  const queryToken = query.token;
+  if (queryToken) {
+    if (validateSession(queryToken)) {
+      return { authenticated: true };
+    }
+    return { authenticated: false, errorType: 'invalid_session' };
+  }
+
+  // Check for session cookie (web mode)
+  const sessionToken = cookies[SESSION_COOKIE_NAME];
+  if (sessionToken && validateSession(sessionToken)) {
+    return { authenticated: true };
+  }
+
+  return { authenticated: false, errorType: 'no_auth' };
+}

 /**
 * Authentication middleware
 *
- * If AUTOMAKER_API_KEY is set, requires matching key in X-API-Key header.
- * If not set, allows all requests (development mode).
+ * Accepts either:
+ * 1. X-API-Key header (for Electron mode)
+ * 2. X-Session-Token header (for web mode with explicit token)
+ * 3. apiKey query parameter (fallback for Electron, cases where headers can't be set)
+ * 4. token query parameter (fallback for web mode, needed for image loads via CSS/img tags)
+ * 5. Session cookie (for web mode)
 */
 export function authMiddleware(req: Request, res: Response, next: NextFunction): void {
-  // If no API key is configured, allow all requests
-  if (!API_KEY) {
+  // Allow disabling auth for local/trusted networks
+  if (isEnvTrue(process.env.AUTOMAKER_DISABLE_AUTH)) {
    next();
    return;
  }

-  // Check for API key in header
-  const providedKey = req.headers['x-api-key'] as string | undefined;
+  const result = checkAuthentication(
+    req.headers as Record<string, string | string[] | undefined>,
+    req.query as Record<string, string | undefined>,
+    (req.cookies || {}) as Record<string, string | undefined>
+  );

-  if (!providedKey) {
-    res.status(401).json({
-      success: false,
-      error: 'Authentication required. Provide X-API-Key header.',
-    });
+  if (result.authenticated) {
+    next();
    return;
  }

-  if (providedKey !== API_KEY) {
-    res.status(403).json({
-      success: false,
-      error: 'Invalid API key.',
-    });
-    return;
+  // Return appropriate error based on what failed
+  switch (result.errorType) {
+    case 'invalid_api_key':
+      res.status(403).json({
+        success: false,
+        error: 'Invalid API key.',
+      });
+      break;
+    case 'invalid_session':
+      res.status(403).json({
+        success: false,
+        error: 'Invalid or expired session token.',
+      });
+      break;
+    case 'no_auth':
+    default:
+      res.status(401).json({
+        success: false,
+        error: 'Authentication required.',
+      });
  }
-
-  next();
 }

 /**
- * Check if authentication is enabled
+ * Check if authentication is enabled (always true now)
 */
 export function isAuthEnabled(): boolean {
-  return !!API_KEY;
+  return true;
 }

 /**
 * Get authentication status for health endpoint
 */
 export function getAuthStatus(): { enabled: boolean; method: string } {
+  const disabled = isEnvTrue(process.env.AUTOMAKER_DISABLE_AUTH);
  return {
-    enabled: !!API_KEY,
-    method: API_KEY ? 'api_key' : 'none',
+    enabled: !disabled,
+    method: disabled ? 'disabled' : 'api_key_or_session',
  };
 }
+
+/**
+ * Check if a request is authenticated (for status endpoint)
+ */
+export function isRequestAuthenticated(req: Request): boolean {
+  if (isEnvTrue(process.env.AUTOMAKER_DISABLE_AUTH)) return true;
+  const result = checkAuthentication(
+    req.headers as Record<string, string | string[] | undefined>,
+    req.query as Record<string, string | undefined>,
+    (req.cookies || {}) as Record<string, string | undefined>
+  );
+  return result.authenticated;
+}
+
+/**
+ * Check if raw credentials are authenticated
+ * Used for WebSocket authentication where we don't have Express request objects
+ */
+export function checkRawAuthentication(
+  headers: Record<string, string | string[] | undefined>,
+  query: Record<string, string | undefined>,
+  cookies: Record<string, string | undefined>
+): boolean {
+  if (isEnvTrue(process.env.AUTOMAKER_DISABLE_AUTH)) return true;
+  return checkAuthentication(headers, query, cookies).authenticated;
+}
--- a/apps/server/src/lib/cli-detection.ts
+++ b/apps/server/src/lib/cli-detection.ts
@@ -0,0 +1,447 @@
+/**
+ * Unified CLI Detection Framework
+ *
+ * Provides consistent CLI detection and management across all providers
+ */
+
+import { spawn, execSync } from 'child_process';
+import * as fs from 'fs';
+import * as path from 'path';
+import * as os from 'os';
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('CliDetection');
+
+export interface CliInfo {
+  name: string;
+  command: string;
+  version?: string;
+  path?: string;
+  installed: boolean;
+  authenticated: boolean;
+  authMethod: 'cli' | 'api_key' | 'none';
+  platform?: string;
+  architectures?: string[];
+}
+
+export interface CliDetectionOptions {
+  timeout?: number;
+  includeWsl?: boolean;
+  wslDistribution?: string;
+}
+
+export interface CliDetectionResult {
+  cli: CliInfo;
+  detected: boolean;
+  issues: string[];
+}
+
+export interface UnifiedCliDetection {
+  claude?: CliDetectionResult;
+  codex?: CliDetectionResult;
+  cursor?: CliDetectionResult;
+}
+
+/**
+ * CLI Configuration for different providers
+ */
+const CLI_CONFIGS = {
+  claude: {
+    name: 'Claude CLI',
+    commands: ['claude'],
+    versionArgs: ['--version'],
+    installCommands: {
+      darwin: 'brew install anthropics/claude/claude',
+      linux: 'curl -fsSL https://claude.ai/install.sh | sh',
+      win32: 'iwr https://claude.ai/install.ps1 -UseBasicParsing | iex',
+    },
+  },
+  codex: {
+    name: 'Codex CLI',
+    commands: ['codex', 'openai'],
+    versionArgs: ['--version'],
+    installCommands: {
+      darwin: 'npm install -g @openai/codex-cli',
+      linux: 'npm install -g @openai/codex-cli',
+      win32: 'npm install -g @openai/codex-cli',
+    },
+  },
+  cursor: {
+    name: 'Cursor CLI',
+    commands: ['cursor-agent', 'cursor'],
+    versionArgs: ['--version'],
+    installCommands: {
+      darwin: 'brew install cursor/cursor/cursor-agent',
+      linux: 'curl -fsSL https://cursor.sh/install.sh | sh',
+      win32: 'iwr https://cursor.sh/install.ps1 -UseBasicParsing | iex',
+    },
+  },
+} as const;
+
+/**
+ * Detect if a CLI is installed and available
+ */
+export async function detectCli(
+  provider: keyof typeof CLI_CONFIGS,
+  options: CliDetectionOptions = {}
+): Promise<CliDetectionResult> {
+  const config = CLI_CONFIGS[provider];
+  const { timeout = 5000, includeWsl = false, wslDistribution } = options;
+  const issues: string[] = [];
+
+  const cliInfo: CliInfo = {
+    name: config.name,
+    command: '',
+    installed: false,
+    authenticated: false,
+    authMethod: 'none',
+  };
+
+  try {
+    // Find the command in PATH
+    const command = await findCommand([...config.commands]);
+    if (command) {
+      cliInfo.command = command;
+    }
+
+    if (!cliInfo.command) {
+      issues.push(`${config.name} not found in PATH`);
+      return { cli: cliInfo, detected: false, issues };
+    }
+
+    cliInfo.path = cliInfo.command;
+    cliInfo.installed = true;
+
+    // Get version
+    try {
+      cliInfo.version = await getCliVersion(cliInfo.command, [...config.versionArgs], timeout);
+    } catch (error) {
+      issues.push(`Failed to get ${config.name} version: ${error}`);
+    }
+
+    // Check authentication
+    cliInfo.authMethod = await checkCliAuth(provider, cliInfo.command);
+    cliInfo.authenticated = cliInfo.authMethod !== 'none';
+
+    return { cli: cliInfo, detected: true, issues };
+  } catch (error) {
+    issues.push(`Error detecting ${config.name}: ${error}`);
+    return { cli: cliInfo, detected: false, issues };
+  }
+}
+
+/**
+ * Detect all CLIs in the system
+ */
+export async function detectAllCLis(
+  options: CliDetectionOptions = {}
+): Promise<UnifiedCliDetection> {
+  const results: UnifiedCliDetection = {};
+
+  // Detect all providers in parallel
+  const providers = Object.keys(CLI_CONFIGS) as Array<keyof typeof CLI_CONFIGS>;
+  const detectionPromises = providers.map(async (provider) => {
+    const result = await detectCli(provider, options);
+    return { provider, result };
+  });
+
+  const detections = await Promise.all(detectionPromises);
+
+  for (const { provider, result } of detections) {
+    results[provider] = result;
+  }
+
+  return results;
+}
+
+/**
+ * Find the first available command from a list of alternatives
+ */
+export async function findCommand(commands: string[]): Promise<string | null> {
+  for (const command of commands) {
+    try {
+      const whichCommand = process.platform === 'win32' ? 'where' : 'which';
+      const result = execSync(`${whichCommand} ${command}`, {
+        encoding: 'utf8',
+        timeout: 2000,
+      }).trim();
+
+      if (result) {
+        return result.split('\n')[0]; // Take first result on Windows
+      }
+    } catch {
+      // Command not found, try next
+    }
+  }
+  return null;
+}
+
+/**
+ * Get CLI version
+ */
+export async function getCliVersion(
+  command: string,
+  args: string[],
+  timeout: number = 5000
+): Promise<string> {
+  return new Promise((resolve, reject) => {
+    const child = spawn(command, args, {
+      stdio: 'pipe',
+      timeout,
+    });
+
+    let stdout = '';
+    let stderr = '';
+
+    child.stdout?.on('data', (data) => {
+      stdout += data.toString();
+    });
+
+    child.stderr?.on('data', (data) => {
+      stderr += data.toString();
+    });
+
+    child.on('close', (code) => {
+      if (code === 0 && stdout) {
+        resolve(stdout.trim());
+      } else if (stderr) {
+        reject(stderr.trim());
+      } else {
+        reject(`Command exited with code ${code}`);
+      }
+    });
+
+    child.on('error', reject);
+  });
+}
+
+/**
+ * Check authentication status for a CLI
+ */
+export async function checkCliAuth(
+  provider: keyof typeof CLI_CONFIGS,
+  command: string
+): Promise<'cli' | 'api_key' | 'none'> {
+  try {
+    switch (provider) {
+      case 'claude':
+        return await checkClaudeAuth(command);
+      case 'codex':
+        return await checkCodexAuth(command);
+      case 'cursor':
+        return await checkCursorAuth(command);
+      default:
+        return 'none';
+    }
+  } catch {
+    return 'none';
+  }
+}
+
+/**
+ * Check Claude CLI authentication
+ */
+async function checkClaudeAuth(command: string): Promise<'cli' | 'api_key' | 'none'> {
+  try {
+    // Check for environment variable
+    if (process.env.ANTHROPIC_API_KEY) {
+      return 'api_key';
+    }
+
+    // Try running a simple command to check CLI auth
+    const result = await getCliVersion(command, ['--version'], 3000);
+    if (result) {
+      return 'cli'; // If version works, assume CLI is authenticated
+    }
+  } catch {
+    // Version command might work even without auth, so we need a better check
+  }
+
+  // Try a more specific auth check
+  return new Promise((resolve) => {
+    const child = spawn(command, ['whoami'], {
+      stdio: 'pipe',
+      timeout: 3000,
+    });
+
+    let stdout = '';
+    let stderr = '';
+
+    child.stdout?.on('data', (data) => {
+      stdout += data.toString();
+    });
+
+    child.stderr?.on('data', (data) => {
+      stderr += data.toString();
+    });
+
+    child.on('close', (code) => {
+      if (code === 0 && stdout && !stderr.includes('not authenticated')) {
+        resolve('cli');
+      } else {
+        resolve('none');
+      }
+    });
+
+    child.on('error', () => {
+      resolve('none');
+    });
+  });
+}
+
+/**
+ * Check Codex CLI authentication
+ */
+async function checkCodexAuth(command: string): Promise<'cli' | 'api_key' | 'none'> {
+  // Check for environment variable
+  if (process.env.OPENAI_API_KEY) {
+    return 'api_key';
+  }
+
+  try {
+    // Try a simple auth check
+    const result = await getCliVersion(command, ['--version'], 3000);
+    if (result) {
+      return 'cli';
+    }
+  } catch {
+    // Version check failed
+  }
+
+  return 'none';
+}
+
+/**
+ * Check Cursor CLI authentication
+ */
+async function checkCursorAuth(command: string): Promise<'cli' | 'api_key' | 'none'> {
+  // Check for environment variable
+  if (process.env.CURSOR_API_KEY) {
+    return 'api_key';
+  }
+
+  // Check for credentials files
+  const credentialPaths = [
+    path.join(os.homedir(), '.cursor', 'credentials.json'),
+    path.join(os.homedir(), '.config', 'cursor', 'credentials.json'),
+    path.join(os.homedir(), '.cursor', 'auth.json'),
+    path.join(os.homedir(), '.config', 'cursor', 'auth.json'),
+  ];
+
+  for (const credPath of credentialPaths) {
+    try {
+      if (fs.existsSync(credPath)) {
+        const content = fs.readFileSync(credPath, 'utf8');
+        const creds = JSON.parse(content);
+        if (creds.accessToken || creds.token || creds.apiKey) {
+          return 'cli';
+        }
+      }
+    } catch {
+      // Invalid credentials file
+    }
+  }
+
+  // Try a simple command
+  try {
+    const result = await getCliVersion(command, ['--version'], 3000);
+    if (result) {
+      return 'cli';
+    }
+  } catch {
+    // Version check failed
+  }
+
+  return 'none';
+}
+
+/**
+ * Get installation instructions for a provider
+ */
+export function getInstallInstructions(
+  provider: keyof typeof CLI_CONFIGS,
+  platform: NodeJS.Platform = process.platform
+): string {
+  const config = CLI_CONFIGS[provider];
+  const command = config.installCommands[platform as keyof typeof config.installCommands];
+
+  if (!command) {
+    return `No installation instructions available for ${provider} on ${platform}`;
+  }
+
+  return command;
+}
+
+/**
+ * Get platform-specific CLI paths and versions
+ */
+export function getPlatformCliPaths(provider: keyof typeof CLI_CONFIGS): string[] {
+  const config = CLI_CONFIGS[provider];
+  const platform = process.platform;
+
+  switch (platform) {
+    case 'darwin':
+      return [
+        `/usr/local/bin/${config.commands[0]}`,
+        `/opt/homebrew/bin/${config.commands[0]}`,
+        path.join(os.homedir(), '.local', 'bin', config.commands[0]),
+      ];
+
+    case 'linux':
+      return [
+        `/usr/bin/${config.commands[0]}`,
+        `/usr/local/bin/${config.commands[0]}`,
+        path.join(os.homedir(), '.local', 'bin', config.commands[0]),
+        path.join(os.homedir(), '.npm', 'global', 'bin', config.commands[0]),
+      ];
+
+    case 'win32':
+      return [
+        path.join(
+          os.homedir(),
+          'AppData',
+          'Local',
+          'Programs',
+          config.commands[0],
+          `${config.commands[0]}.exe`
+        ),
+        path.join(process.env.ProgramFiles || '', config.commands[0], `${config.commands[0]}.exe`),
+        path.join(
+          process.env.ProgramFiles || '',
+          config.commands[0],
+          'bin',
+          `${config.commands[0]}.exe`
+        ),
+      ];
+
+    default:
+      return [];
+  }
+}
+
+/**
+ * Validate CLI installation
+ */
+export function validateCliInstallation(cliInfo: CliInfo): {
+  valid: boolean;
+  issues: string[];
+} {
+  const issues: string[] = [];
+
+  if (!cliInfo.installed) {
+    issues.push('CLI is not installed');
+  }
+
+  if (cliInfo.installed && !cliInfo.version) {
+    issues.push('Could not determine CLI version');
+  }
+
+  if (cliInfo.installed && cliInfo.authMethod === 'none') {
+    issues.push('CLI is not authenticated');
+  }
+
+  return {
+    valid: issues.length === 0,
+    issues,
+  };
+}
--- a/apps/server/src/lib/codex-auth.ts
+++ b/apps/server/src/lib/codex-auth.ts
@@ -0,0 +1,68 @@
+/**
+ * Shared utility for checking Codex CLI authentication status
+ *
+ * Uses 'codex login status' command to verify authentication.
+ * Never assumes authenticated - only returns true if CLI confirms.
+ */
+
+import { spawnProcess } from '@automaker/platform';
+import { findCodexCliPath } from '@automaker/platform';
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('CodexAuth');
+
+const CODEX_COMMAND = 'codex';
+const OPENAI_API_KEY_ENV = 'OPENAI_API_KEY';
+
+export interface CodexAuthCheckResult {
+  authenticated: boolean;
+  method: 'api_key_env' | 'cli_authenticated' | 'none';
+}
+
+/**
+ * Check Codex authentication status using 'codex login status' command
+ *
+ * @param cliPath Optional CLI path. If not provided, will attempt to find it.
+ * @returns Authentication status and method
+ */
+export async function checkCodexAuthentication(
+  cliPath?: string | null
+): Promise<CodexAuthCheckResult> {
+  const resolvedCliPath = cliPath || (await findCodexCliPath());
+  const hasApiKey = !!process.env[OPENAI_API_KEY_ENV];
+
+  // If CLI is not installed, cannot be authenticated
+  if (!resolvedCliPath) {
+    logger.info('CLI not found');
+    return { authenticated: false, method: 'none' };
+  }
+
+  try {
+    const result = await spawnProcess({
+      command: resolvedCliPath || CODEX_COMMAND,
+      args: ['login', 'status'],
+      cwd: process.cwd(),
+      env: {
+        ...process.env,
+        TERM: 'dumb', // Avoid interactive output
+      },
+    });
+
+    // Check both stdout and stderr for "logged in" - Codex CLI outputs to stderr
+    const combinedOutput = (result.stdout + result.stderr).toLowerCase();
+    const isLoggedIn = combinedOutput.includes('logged in');
+
+    if (result.exitCode === 0 && isLoggedIn) {
+      // Determine auth method based on what we know
+      const method = hasApiKey ? 'api_key_env' : 'cli_authenticated';
+      logger.info(`✓ Authenticated (${method})`);
+      return { authenticated: true, method };
+    }
+
+    logger.info('Not authenticated');
+    return { authenticated: false, method: 'none' };
+  } catch (error) {
+    logger.error('Failed to check authentication:', error);
+    return { authenticated: false, method: 'none' };
+  }
+}
--- a/apps/server/src/lib/error-handler.ts
+++ b/apps/server/src/lib/error-handler.ts
@@ -0,0 +1,414 @@
+/**
+ * Unified Error Handling System for CLI Providers
+ *
+ * Provides consistent error classification, user-friendly messages, and debugging support
+ * across all AI providers (Claude, Codex, Cursor)
+ */
+
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('ErrorHandler');
+
+export enum ErrorType {
+  AUTHENTICATION = 'authentication',
+  BILLING = 'billing',
+  RATE_LIMIT = 'rate_limit',
+  NETWORK = 'network',
+  TIMEOUT = 'timeout',
+  VALIDATION = 'validation',
+  PERMISSION = 'permission',
+  CLI_NOT_FOUND = 'cli_not_found',
+  CLI_NOT_INSTALLED = 'cli_not_installed',
+  MODEL_NOT_SUPPORTED = 'model_not_supported',
+  INVALID_REQUEST = 'invalid_request',
+  SERVER_ERROR = 'server_error',
+  UNKNOWN = 'unknown',
+}
+
+export enum ErrorSeverity {
+  LOW = 'low',
+  MEDIUM = 'medium',
+  HIGH = 'high',
+  CRITICAL = 'critical',
+}
+
+export interface ErrorClassification {
+  type: ErrorType;
+  severity: ErrorSeverity;
+  userMessage: string;
+  technicalMessage: string;
+  suggestedAction?: string;
+  retryable: boolean;
+  provider?: string;
+  context?: Record<string, any>;
+}
+
+export interface ErrorPattern {
+  type: ErrorType;
+  severity: ErrorSeverity;
+  patterns: RegExp[];
+  userMessage: string;
+  suggestedAction?: string;
+  retryable: boolean;
+}
+
+/**
+ * Error patterns for different types of errors
+ */
+const ERROR_PATTERNS: ErrorPattern[] = [
+  // Authentication errors
+  {
+    type: ErrorType.AUTHENTICATION,
+    severity: ErrorSeverity.HIGH,
+    patterns: [
+      /unauthorized/i,
+      /authentication.*fail/i,
+      /invalid_api_key/i,
+      /invalid api key/i,
+      /not authenticated/i,
+      /please.*log/i,
+      /token.*revoked/i,
+      /oauth.*error/i,
+      /credentials.*invalid/i,
+    ],
+    userMessage: 'Authentication failed. Please check your API key or login credentials.',
+    suggestedAction:
+      "Verify your API key is correct and hasn't expired, or run the CLI login command.",
+    retryable: false,
+  },
+
+  // Billing errors
+  {
+    type: ErrorType.BILLING,
+    severity: ErrorSeverity.HIGH,
+    patterns: [
+      /credit.*balance.*low/i,
+      /insufficient.*credit/i,
+      /billing.*issue/i,
+      /payment.*required/i,
+      /usage.*exceeded/i,
+      /quota.*exceeded/i,
+      /add.*credit/i,
+    ],
+    userMessage: 'Account has insufficient credits or billing issues.',
+    suggestedAction: 'Please add credits to your account or check your billing settings.',
+    retryable: false,
+  },
+
+  // Rate limit errors
+  {
+    type: ErrorType.RATE_LIMIT,
+    severity: ErrorSeverity.MEDIUM,
+    patterns: [
+      /rate.*limit/i,
+      /too.*many.*request/i,
+      /limit.*reached/i,
+      /try.*later/i,
+      /429/i,
+      /reset.*time/i,
+      /upgrade.*plan/i,
+    ],
+    userMessage: 'Rate limit reached. Please wait before trying again.',
+    suggestedAction: 'Wait a few minutes before retrying, or consider upgrading your plan.',
+    retryable: true,
+  },
+
+  // Network errors
+  {
+    type: ErrorType.NETWORK,
+    severity: ErrorSeverity.MEDIUM,
+    patterns: [/network/i, /connection/i, /dns/i, /timeout/i, /econnrefused/i, /enotfound/i],
+    userMessage: 'Network connection issue.',
+    suggestedAction: 'Check your internet connection and try again.',
+    retryable: true,
+  },
+
+  // Timeout errors
+  {
+    type: ErrorType.TIMEOUT,
+    severity: ErrorSeverity.MEDIUM,
+    patterns: [/timeout/i, /aborted/i, /time.*out/i],
+    userMessage: 'Operation timed out.',
+    suggestedAction: 'Try again with a simpler request or check your connection.',
+    retryable: true,
+  },
+
+  // Permission errors
+  {
+    type: ErrorType.PERMISSION,
+    severity: ErrorSeverity.HIGH,
+    patterns: [/permission.*denied/i, /access.*denied/i, /forbidden/i, /403/i, /not.*authorized/i],
+    userMessage: 'Permission denied.',
+    suggestedAction: 'Check if you have the required permissions for this operation.',
+    retryable: false,
+  },
+
+  // CLI not found
+  {
+    type: ErrorType.CLI_NOT_FOUND,
+    severity: ErrorSeverity.HIGH,
+    patterns: [/command not found/i, /not recognized/i, /not.*installed/i, /ENOENT/i],
+    userMessage: 'CLI tool not found.',
+    suggestedAction: "Please install the required CLI tool and ensure it's in your PATH.",
+    retryable: false,
+  },
+
+  // Model not supported
+  {
+    type: ErrorType.MODEL_NOT_SUPPORTED,
+    severity: ErrorSeverity.HIGH,
+    patterns: [/model.*not.*support/i, /unknown.*model/i, /invalid.*model/i],
+    userMessage: 'Model not supported.',
+    suggestedAction: 'Check available models and use a supported one.',
+    retryable: false,
+  },
+
+  // Server errors
+  {
+    type: ErrorType.SERVER_ERROR,
+    severity: ErrorSeverity.HIGH,
+    patterns: [/internal.*server/i, /server.*error/i, /500/i, /502/i, /503/i, /504/i],
+    userMessage: 'Server error occurred.',
+    suggestedAction: 'Try again in a few minutes or contact support if the issue persists.',
+    retryable: true,
+  },
+];
+
+/**
+ * Classify an error into a specific type with user-friendly message
+ */
+export function classifyError(
+  error: unknown,
+  provider?: string,
+  context?: Record<string, any>
+): ErrorClassification {
+  const errorText = getErrorText(error);
+
+  // Try to match against known patterns
+  for (const pattern of ERROR_PATTERNS) {
+    for (const regex of pattern.patterns) {
+      if (regex.test(errorText)) {
+        return {
+          type: pattern.type,
+          severity: pattern.severity,
+          userMessage: pattern.userMessage,
+          technicalMessage: errorText,
+          suggestedAction: pattern.suggestedAction,
+          retryable: pattern.retryable,
+          provider,
+          context,
+        };
+      }
+    }
+  }
+
+  // Unknown error
+  return {
+    type: ErrorType.UNKNOWN,
+    severity: ErrorSeverity.MEDIUM,
+    userMessage: 'An unexpected error occurred.',
+    technicalMessage: errorText,
+    suggestedAction: 'Please try again or contact support if the issue persists.',
+    retryable: true,
+    provider,
+    context,
+  };
+}
+
+/**
+ * Get a user-friendly error message
+ */
+export function getUserFriendlyErrorMessage(error: unknown, provider?: string): string {
+  const classification = classifyError(error, provider);
+
+  let message = classification.userMessage;
+
+  if (classification.suggestedAction) {
+    message += ` ${classification.suggestedAction}`;
+  }
+
+  // Add provider-specific context if available
+  if (provider) {
+    message = `[${provider.toUpperCase()}] ${message}`;
+  }
+
+  return message;
+}
+
+/**
+ * Check if an error is retryable
+ */
+export function isRetryableError(error: unknown): boolean {
+  const classification = classifyError(error);
+  return classification.retryable;
+}
+
+/**
+ * Check if an error is authentication-related
+ */
+export function isAuthenticationError(error: unknown): boolean {
+  const classification = classifyError(error);
+  return classification.type === ErrorType.AUTHENTICATION;
+}
+
+/**
+ * Check if an error is billing-related
+ */
+export function isBillingError(error: unknown): boolean {
+  const classification = classifyError(error);
+  return classification.type === ErrorType.BILLING;
+}
+
+/**
+ * Check if an error is rate limit related
+ */
+export function isRateLimitError(error: unknown): boolean {
+  const classification = classifyError(error);
+  return classification.type === ErrorType.RATE_LIMIT;
+}
+
+/**
+ * Get error text from various error types
+ */
+function getErrorText(error: unknown): string {
+  if (typeof error === 'string') {
+    return error;
+  }
+
+  if (error instanceof Error) {
+    return error.message;
+  }
+
+  if (typeof error === 'object' && error !== null) {
+    // Handle structured error objects
+    const errorObj = error as any;
+
+    if (errorObj.message) {
+      return errorObj.message;
+    }
+
+    if (errorObj.error?.message) {
+      return errorObj.error.message;
+    }
+
+    if (errorObj.error) {
+      return typeof errorObj.error === 'string' ? errorObj.error : JSON.stringify(errorObj.error);
+    }
+
+    return JSON.stringify(error);
+  }
+
+  return String(error);
+}
+
+/**
+ * Create a standardized error response
+ */
+export function createErrorResponse(
+  error: unknown,
+  provider?: string,
+  context?: Record<string, any>
+): {
+  success: false;
+  error: string;
+  errorType: ErrorType;
+  severity: ErrorSeverity;
+  retryable: boolean;
+  suggestedAction?: string;
+} {
+  const classification = classifyError(error, provider, context);
+
+  return {
+    success: false,
+    error: classification.userMessage,
+    errorType: classification.type,
+    severity: classification.severity,
+    retryable: classification.retryable,
+    suggestedAction: classification.suggestedAction,
+  };
+}
+
+/**
+ * Log error with full context
+ */
+export function logError(
+  error: unknown,
+  provider?: string,
+  operation?: string,
+  additionalContext?: Record<string, any>
+): void {
+  const classification = classifyError(error, provider, {
+    operation,
+    ...additionalContext,
+  });
+
+  logger.error(`Error in ${provider || 'unknown'}${operation ? ` during ${operation}` : ''}`, {
+    type: classification.type,
+    severity: classification.severity,
+    message: classification.userMessage,
+    technicalMessage: classification.technicalMessage,
+    retryable: classification.retryable,
+    suggestedAction: classification.suggestedAction,
+    context: classification.context,
+  });
+}
+
+/**
+ * Provider-specific error handlers
+ */
+export const ProviderErrorHandler = {
+  claude: {
+    classify: (error: unknown) => classifyError(error, 'claude'),
+    getUserMessage: (error: unknown) => getUserFriendlyErrorMessage(error, 'claude'),
+    isAuth: (error: unknown) => isAuthenticationError(error),
+    isBilling: (error: unknown) => isBillingError(error),
+    isRateLimit: (error: unknown) => isRateLimitError(error),
+  },
+
+  codex: {
+    classify: (error: unknown) => classifyError(error, 'codex'),
+    getUserMessage: (error: unknown) => getUserFriendlyErrorMessage(error, 'codex'),
+    isAuth: (error: unknown) => isAuthenticationError(error),
+    isBilling: (error: unknown) => isBillingError(error),
+    isRateLimit: (error: unknown) => isRateLimitError(error),
+  },
+
+  cursor: {
+    classify: (error: unknown) => classifyError(error, 'cursor'),
+    getUserMessage: (error: unknown) => getUserFriendlyErrorMessage(error, 'cursor'),
+    isAuth: (error: unknown) => isAuthenticationError(error),
+    isBilling: (error: unknown) => isBillingError(error),
+    isRateLimit: (error: unknown) => isRateLimitError(error),
+  },
+};
+
+/**
+ * Create a retry handler for retryable errors
+ */
+export function createRetryHandler(maxRetries: number = 3, baseDelay: number = 1000) {
+  return async function <T>(
+    operation: () => Promise<T>,
+    shouldRetry: (error: unknown) => boolean = isRetryableError
+  ): Promise<T> {
+    let lastError: unknown;
+
+    for (let attempt = 0; attempt <= maxRetries; attempt++) {
+      try {
+        return await operation();
+      } catch (error) {
+        lastError = error;
+
+        if (attempt === maxRetries || !shouldRetry(error)) {
+          throw error;
+        }
+
+        // Exponential backoff with jitter
+        const delay = baseDelay * Math.pow(2, attempt) + Math.random() * 1000;
+        logger.debug(`Retrying operation in ${delay}ms (attempt ${attempt + 1}/${maxRetries})`);
+        await new Promise((resolve) => setTimeout(resolve, delay));
+      }
+    }
+
+    throw lastError;
+  };
+}
--- a/apps/server/src/lib/events.ts
+++ b/apps/server/src/lib/events.ts
@@ -3,6 +3,9 @@
 */

 import type { EventType, EventCallback } from '@automaker/types';
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('Events');

 // Re-export event types from shared package
 export type { EventType, EventCallback };
@@ -21,7 +24,7 @@ export function createEventEmitter(): EventEmitter {
        try {
          callback(type, payload);
        } catch (error) {
-          console.error('Error in event subscriber:', error);
+          logger.error('Error in event subscriber:', error);
        }
      }
    },
--- a/apps/server/src/lib/json-extractor.ts
+++ b/apps/server/src/lib/json-extractor.ts
@@ -0,0 +1,211 @@
+/**
+ * JSON Extraction Utilities
+ *
+ * Robust JSON extraction from AI responses that may contain markdown,
+ * code blocks, or other text mixed with JSON content.
+ *
+ * Used by various routes that parse structured output from Cursor or
+ * Claude responses when structured output is not available.
+ */
+
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('JsonExtractor');
+
+/**
+ * Logger interface for optional custom logging
+ */
+export interface JsonExtractorLogger {
+  debug: (message: string, ...args: unknown[]) => void;
+  warn?: (message: string, ...args: unknown[]) => void;
+}
+
+/**
+ * Options for JSON extraction
+ */
+export interface ExtractJsonOptions {
+  /** Custom logger (defaults to internal logger) */
+  logger?: JsonExtractorLogger;
+  /** Required key that must be present in the extracted JSON */
+  requiredKey?: string;
+  /** Whether the required key's value must be an array */
+  requireArray?: boolean;
+}
+
+/**
+ * Extract JSON from response text using multiple strategies.
+ *
+ * Strategies tried in order:
+ * 1. JSON in ```json code block
+ * 2. JSON in ``` code block (no language)
+ * 3. Find JSON object by matching braces (starting with requiredKey if specified)
+ * 4. Find any JSON object by matching braces
+ * 5. Parse entire response as JSON
+ *
+ * @param responseText - The raw response text that may contain JSON
+ * @param options - Optional extraction options
+ * @returns Parsed JSON object or null if extraction fails
+ */
+export function extractJson<T = Record<string, unknown>>(
+  responseText: string,
+  options: ExtractJsonOptions = {}
+): T | null {
+  const log = options.logger || logger;
+  const requiredKey = options.requiredKey;
+  const requireArray = options.requireArray ?? false;
+
+  /**
+   * Validate that the result has the required key/structure
+   */
+  const validateResult = (result: unknown): result is T => {
+    if (!result || typeof result !== 'object') return false;
+    if (requiredKey) {
+      const obj = result as Record<string, unknown>;
+      if (!(requiredKey in obj)) return false;
+      if (requireArray && !Array.isArray(obj[requiredKey])) return false;
+    }
+    return true;
+  };
+
+  /**
+   * Find matching closing brace by counting brackets
+   */
+  const findMatchingBrace = (text: string, startIdx: number): number => {
+    let depth = 0;
+    for (let i = startIdx; i < text.length; i++) {
+      if (text[i] === '{') depth++;
+      if (text[i] === '}') {
+        depth--;
+        if (depth === 0) {
+          return i + 1;
+        }
+      }
+    }
+    return -1;
+  };
+
+  const strategies = [
+    // Strategy 1: JSON in ```json code block
+    () => {
+      const match = responseText.match(/```json\s*([\s\S]*?)```/);
+      if (match) {
+        log.debug('Extracting JSON from ```json code block');
+        return JSON.parse(match[1].trim());
+      }
+      return null;
+    },
+
+    // Strategy 2: JSON in ``` code block (no language specified)
+    () => {
+      const match = responseText.match(/```\s*([\s\S]*?)```/);
+      if (match) {
+        const content = match[1].trim();
+        // Only try if it looks like JSON (starts with { or [)
+        if (content.startsWith('{') || content.startsWith('[')) {
+          log.debug('Extracting JSON from ``` code block');
+          return JSON.parse(content);
+        }
+      }
+      return null;
+    },
+
+    // Strategy 3: Find JSON object containing the required key (if specified)
+    () => {
+      if (!requiredKey) return null;
+
+      const searchPattern = `{"${requiredKey}"`;
+      const startIdx = responseText.indexOf(searchPattern);
+      if (startIdx === -1) return null;
+
+      const endIdx = findMatchingBrace(responseText, startIdx);
+      if (endIdx > startIdx) {
+        log.debug(`Extracting JSON with required key "${requiredKey}"`);
+        return JSON.parse(responseText.slice(startIdx, endIdx));
+      }
+      return null;
+    },
+
+    // Strategy 4: Find any JSON object by matching braces
+    () => {
+      const startIdx = responseText.indexOf('{');
+      if (startIdx === -1) return null;
+
+      const endIdx = findMatchingBrace(responseText, startIdx);
+      if (endIdx > startIdx) {
+        log.debug('Extracting JSON by brace matching');
+        return JSON.parse(responseText.slice(startIdx, endIdx));
+      }
+      return null;
+    },
+
+    // Strategy 5: Find JSON using first { to last } (may be less accurate)
+    () => {
+      const firstBrace = responseText.indexOf('{');
+      const lastBrace = responseText.lastIndexOf('}');
+      if (firstBrace !== -1 && lastBrace > firstBrace) {
+        log.debug('Extracting JSON from first { to last }');
+        return JSON.parse(responseText.slice(firstBrace, lastBrace + 1));
+      }
+      return null;
+    },
+
+    // Strategy 6: Try parsing the entire response as JSON
+    () => {
+      const trimmed = responseText.trim();
+      if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
+        log.debug('Parsing entire response as JSON');
+        return JSON.parse(trimmed);
+      }
+      return null;
+    },
+  ];
+
+  for (const strategy of strategies) {
+    try {
+      const result = strategy();
+      if (validateResult(result)) {
+        log.debug('Successfully extracted JSON');
+        return result as T;
+      }
+    } catch {
+      // Strategy failed, try next
+    }
+  }
+
+  log.debug('Failed to extract JSON from response');
+  return null;
+}
+
+/**
+ * Extract JSON with a specific required key.
+ * Convenience wrapper around extractJson.
+ *
+ * @param responseText - The raw response text
+ * @param requiredKey - Key that must be present in the extracted JSON
+ * @param options - Additional options
+ * @returns Parsed JSON object or null
+ */
+export function extractJsonWithKey<T = Record<string, unknown>>(
+  responseText: string,
+  requiredKey: string,
+  options: Omit<ExtractJsonOptions, 'requiredKey'> = {}
+): T | null {
+  return extractJson<T>(responseText, { ...options, requiredKey });
+}
+
+/**
+ * Extract JSON that has a required array property.
+ * Useful for extracting responses like { "suggestions": [...] }
+ *
+ * @param responseText - The raw response text
+ * @param arrayKey - Key that must contain an array
+ * @param options - Additional options
+ * @returns Parsed JSON object or null
+ */
+export function extractJsonWithArray<T = Record<string, unknown>>(
+  responseText: string,
+  arrayKey: string,
+  options: Omit<ExtractJsonOptions, 'requiredKey' | 'requireArray'> = {}
+): T | null {
+  return extractJson<T>(responseText, { ...options, requiredKey: arrayKey, requireArray: true });
+}
--- a/apps/server/src/lib/permission-enforcer.ts
+++ b/apps/server/src/lib/permission-enforcer.ts
@@ -0,0 +1,173 @@
+/**
+ * Permission enforcement utilities for Cursor provider
+ */
+
+import type { CursorCliConfigFile } from '@automaker/types';
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('PermissionEnforcer');
+
+export interface PermissionCheckResult {
+  allowed: boolean;
+  reason?: string;
+}
+
+/**
+ * Check if a tool call is allowed based on permissions
+ */
+export function checkToolCallPermission(
+  toolCall: any,
+  permissions: CursorCliConfigFile | null
+): PermissionCheckResult {
+  if (!permissions || !permissions.permissions) {
+    // If no permissions are configured, allow everything (backward compatibility)
+    return { allowed: true };
+  }
+
+  const { allow = [], deny = [] } = permissions.permissions;
+
+  // Check shell tool calls
+  if (toolCall.shellToolCall?.args?.command) {
+    const command = toolCall.shellToolCall.args.command;
+    const toolName = `Shell(${extractCommandName(command)})`;
+
+    // Check deny list first (deny takes precedence)
+    for (const denyRule of deny) {
+      if (matchesRule(toolName, denyRule)) {
+        return {
+          allowed: false,
+          reason: `Operation blocked by permission rule: ${denyRule}`,
+        };
+      }
+    }
+
+    // Then check allow list
+    for (const allowRule of allow) {
+      if (matchesRule(toolName, allowRule)) {
+        return { allowed: true };
+      }
+    }
+
+    return {
+      allowed: false,
+      reason: `Operation not in allow list: ${toolName}`,
+    };
+  }
+
+  // Check read tool calls
+  if (toolCall.readToolCall?.args?.path) {
+    const path = toolCall.readToolCall.args.path;
+    const toolName = `Read(${path})`;
+
+    // Check deny list first
+    for (const denyRule of deny) {
+      if (matchesRule(toolName, denyRule)) {
+        return {
+          allowed: false,
+          reason: `Read operation blocked by permission rule: ${denyRule}`,
+        };
+      }
+    }
+
+    // Then check allow list
+    for (const allowRule of allow) {
+      if (matchesRule(toolName, allowRule)) {
+        return { allowed: true };
+      }
+    }
+
+    return {
+      allowed: false,
+      reason: `Read operation not in allow list: ${toolName}`,
+    };
+  }
+
+  // Check write tool calls
+  if (toolCall.writeToolCall?.args?.path) {
+    const path = toolCall.writeToolCall.args.path;
+    const toolName = `Write(${path})`;
+
+    // Check deny list first
+    for (const denyRule of deny) {
+      if (matchesRule(toolName, denyRule)) {
+        return {
+          allowed: false,
+          reason: `Write operation blocked by permission rule: ${denyRule}`,
+        };
+      }
+    }
+
+    // Then check allow list
+    for (const allowRule of allow) {
+      if (matchesRule(toolName, allowRule)) {
+        return { allowed: true };
+      }
+    }
+
+    return {
+      allowed: false,
+      reason: `Write operation not in allow list: ${toolName}`,
+    };
+  }
+
+  // For other tool types, allow by default for now
+  return { allowed: true };
+}
+
+/**
+ * Extract the base command name from a shell command
+ */
+function extractCommandName(command: string): string {
+  // Remove leading spaces and get the first word
+  const trimmed = command.trim();
+  const firstWord = trimmed.split(/\s+/)[0];
+  return firstWord || 'unknown';
+}
+
+/**
+ * Check if a tool name matches a permission rule
+ */
+function matchesRule(toolName: string, rule: string): boolean {
+  // Exact match
+  if (toolName === rule) {
+    return true;
+  }
+
+  // Wildcard patterns
+  if (rule.includes('*')) {
+    const regex = new RegExp(rule.replace(/\*/g, '.*'));
+    return regex.test(toolName);
+  }
+
+  // Prefix match for shell commands (e.g., "Shell(git)" matches "Shell(git status)")
+  if (rule.startsWith('Shell(') && toolName.startsWith('Shell(')) {
+    const ruleCommand = rule.slice(6, -1); // Remove "Shell(" and ")"
+    const toolCommand = extractCommandName(toolName.slice(6, -1)); // Remove "Shell(" and ")"
+    return toolCommand.startsWith(ruleCommand);
+  }
+
+  return false;
+}
+
+/**
+ * Log permission violations
+ */
+export function logPermissionViolation(toolCall: any, reason: string, sessionId?: string): void {
+  const sessionIdStr = sessionId ? ` [${sessionId}]` : '';
+
+  if (toolCall.shellToolCall?.args?.command) {
+    logger.warn(
+      `Permission violation${sessionIdStr}: Shell command blocked - ${toolCall.shellToolCall.args.command} (${reason})`
+    );
+  } else if (toolCall.readToolCall?.args?.path) {
+    logger.warn(
+      `Permission violation${sessionIdStr}: Read operation blocked - ${toolCall.readToolCall.args.path} (${reason})`
+    );
+  } else if (toolCall.writeToolCall?.args?.path) {
+    logger.warn(
+      `Permission violation${sessionIdStr}: Write operation blocked - ${toolCall.writeToolCall.args.path} (${reason})`
+    );
+  } else {
+    logger.warn(`Permission violation${sessionIdStr}: Tool call blocked (${reason})`, { toolCall });
+  }
+}
--- a/apps/server/src/lib/sdk-options.ts
+++ b/apps/server/src/lib/sdk-options.ts
@@ -18,9 +18,80 @@
 import type { Options } from '@anthropic-ai/claude-agent-sdk';
 import path from 'path';
 import { resolveModelString } from '@automaker/model-resolver';
-import { DEFAULT_MODELS, CLAUDE_MODEL_MAP } from '@automaker/types';
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('SdkOptions');
+import {
+  DEFAULT_MODELS,
+  CLAUDE_MODEL_MAP,
+  type McpServerConfig,
+  type ThinkingLevel,
+  getThinkingTokenBudget,
+} from '@automaker/types';
 import { isPathAllowed, PathNotAllowedError, getAllowedRootDirectory } from '@automaker/platform';

+/**
+ * Result of sandbox compatibility check
+ */
+export interface SandboxCompatibilityResult {
+  /** Whether sandbox mode can be enabled for this path */
+  enabled: boolean;
+  /** Optional message explaining why sandbox is disabled */
+  message?: string;
+}
+
+/**
+ * Check if a working directory is compatible with sandbox mode.
+ * Some paths (like cloud storage mounts) may not work with sandboxed execution.
+ *
+ * @param cwd - The working directory to check
+ * @param sandboxRequested - Whether sandbox mode was requested by settings
+ * @returns Object indicating if sandbox can be enabled and why not if disabled
+ */
+export function checkSandboxCompatibility(
+  cwd: string,
+  sandboxRequested: boolean
+): SandboxCompatibilityResult {
+  if (!sandboxRequested) {
+    return { enabled: false };
+  }
+
+  const resolvedCwd = path.resolve(cwd);
+
+  // Check for cloud storage paths that may not be compatible with sandbox
+  const cloudStoragePatterns = [
+    // macOS mounted volumes
+    /^\/Volumes\/GoogleDrive/i,
+    /^\/Volumes\/Dropbox/i,
+    /^\/Volumes\/OneDrive/i,
+    /^\/Volumes\/iCloud/i,
+    // macOS home directory
+    /^\/Users\/[^/]+\/Google Drive/i,
+    /^\/Users\/[^/]+\/Dropbox/i,
+    /^\/Users\/[^/]+\/OneDrive/i,
+    /^\/Users\/[^/]+\/Library\/Mobile Documents/i, // iCloud
+    // Linux home directory
+    /^\/home\/[^/]+\/Google Drive/i,
+    /^\/home\/[^/]+\/Dropbox/i,
+    /^\/home\/[^/]+\/OneDrive/i,
+    // Windows
+    /^C:\\Users\\[^\\]+\\Google Drive/i,
+    /^C:\\Users\\[^\\]+\\Dropbox/i,
+    /^C:\\Users\\[^\\]+\\OneDrive/i,
+  ];
+
+  for (const pattern of cloudStoragePatterns) {
+    if (pattern.test(resolvedCwd)) {
+      return {
+        enabled: false,
+        message: `Sandbox disabled: Cloud storage path detected (${resolvedCwd}). Sandbox mode may not work correctly with cloud-synced directories.`,
+      };
+    }
+  }
+
+  return { enabled: true };
+}
+
 /**
 * Validate that a working directory is allowed by ALLOWED_ROOT_DIRECTORY.
 * This is the centralized security check for ALL AI model invocations.
@@ -58,10 +129,30 @@ export const TOOL_PRESETS = {
  specGeneration: ['Read', 'Glob', 'Grep'] as const,

  /** Full tool access for feature implementation */
-  fullAccess: ['Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch'] as const,
+  fullAccess: [
+    'Read',
+    'Write',
+    'Edit',
+    'Glob',
+    'Grep',
+    'Bash',
+    'WebSearch',
+    'WebFetch',
+    'TodoWrite',
+  ] as const,

  /** Tools for chat/interactive mode */
-  chat: ['Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch'] as const,
+  chat: [
+    'Read',
+    'Write',
+    'Edit',
+    'Glob',
+    'Grep',
+    'Bash',
+    'WebSearch',
+    'WebFetch',
+    'TodoWrite',
+  ] as const,
 } as const;

 /**
@@ -129,13 +220,104 @@ export function getModelForUseCase(

 /**
 * Base options that apply to all SDK calls
+ * AUTONOMOUS MODE: Always bypass permissions for fully autonomous operation
 */
 function getBaseOptions(): Partial<Options> {
  return {
-    permissionMode: 'acceptEdits',
+    permissionMode: 'bypassPermissions',
+    allowDangerouslySkipPermissions: true,
  };
 }

+/**
+ * MCP options result
+ */
+interface McpOptions {
+  /** Options to spread for MCP servers */
+  mcpServerOptions: Partial<Options>;
+}
+
+/**
+ * Build MCP-related options based on configuration.
+ *
+ * @param config - The SDK options config
+ * @returns Object with MCP server settings to spread into final options
+ */
+function buildMcpOptions(config: CreateSdkOptionsConfig): McpOptions {
+  return {
+    // Include MCP servers if configured
+    mcpServerOptions: config.mcpServers ? { mcpServers: config.mcpServers } : {},
+  };
+}
+
+/**
+ * Build thinking options for SDK configuration.
+ * Converts ThinkingLevel to maxThinkingTokens for the Claude SDK.
+ *
+ * @param thinkingLevel - The thinking level to convert
+ * @returns Object with maxThinkingTokens if thinking is enabled
+ */
+function buildThinkingOptions(thinkingLevel?: ThinkingLevel): Partial<Options> {
+  const maxThinkingTokens = getThinkingTokenBudget(thinkingLevel);
+  logger.debug(
+    `buildThinkingOptions: thinkingLevel="${thinkingLevel}" -> maxThinkingTokens=${maxThinkingTokens}`
+  );
+  return maxThinkingTokens ? { maxThinkingTokens } : {};
+}
+
+/**
+ * Build system prompt configuration based on autoLoadClaudeMd setting.
+ * When autoLoadClaudeMd is true:
+ * - Uses preset mode with 'claude_code' to enable CLAUDE.md auto-loading
+ * - If there's a custom systemPrompt, appends it to the preset
+ * - Sets settingSources to ['project'] for SDK to load CLAUDE.md files
+ *
+ * @param config - The SDK options config
+ * @returns Object with systemPrompt and settingSources for SDK options
+ */
+function buildClaudeMdOptions(config: CreateSdkOptionsConfig): {
+  systemPrompt?: string | SystemPromptConfig;
+  settingSources?: Array<'user' | 'project' | 'local'>;
+} {
+  if (!config.autoLoadClaudeMd) {
+    // Standard mode - just pass through the system prompt as-is
+    return config.systemPrompt ? { systemPrompt: config.systemPrompt } : {};
+  }
+
+  // Auto-load CLAUDE.md mode - use preset with settingSources
+  const result: {
+    systemPrompt: SystemPromptConfig;
+    settingSources: Array<'user' | 'project' | 'local'>;
+  } = {
+    systemPrompt: {
+      type: 'preset',
+      preset: 'claude_code',
+    },
+    // Load both user (~/.claude/CLAUDE.md) and project (.claude/CLAUDE.md) settings
+    settingSources: ['user', 'project'],
+  };
+
+  // If there's a custom system prompt, append it to the preset
+  if (config.systemPrompt) {
+    result.systemPrompt.append = config.systemPrompt;
+  }
+
+  return result;
+}
+
+/**
+ * System prompt configuration for SDK options
+ * When using preset mode with claude_code, CLAUDE.md files are automatically loaded
+ */
+export interface SystemPromptConfig {
+  /** Use preset mode with claude_code to enable CLAUDE.md auto-loading */
+  type: 'preset';
+  /** The preset to use - 'claude_code' enables CLAUDE.md loading */
+  preset: 'claude_code';
+  /** Optional additional prompt to append to the preset */
+  append?: string;
+}
+
 /**
 * Options configuration for creating SDK options
 */
@@ -160,8 +342,25 @@ export interface CreateSdkOptionsConfig {
    type: 'json_schema';
    schema: Record<string, unknown>;
  };
+
+  /** Enable auto-loading of CLAUDE.md files via SDK's settingSources */
+  autoLoadClaudeMd?: boolean;
+
+  /** MCP servers to make available to the agent */
+  mcpServers?: Record<string, McpServerConfig>;
+
+  /** Extended thinking level for Claude models */
+  thinkingLevel?: ThinkingLevel;
 }

+// Re-export MCP types from @automaker/types for convenience
+export type {
+  McpServerConfig,
+  McpStdioServerConfig,
+  McpSSEServerConfig,
+  McpHttpServerConfig,
+} from '@automaker/types';
+
 /**
 * Create SDK options for spec generation
 *
@@ -169,11 +368,18 @@ export interface CreateSdkOptionsConfig {
 * - Uses read-only tools for codebase analysis
 * - Extended turns for thorough exploration
 * - Opus model by default (can be overridden)
+ * - When autoLoadClaudeMd is true, uses preset mode and settingSources for CLAUDE.md loading
 */
 export function createSpecGenerationOptions(config: CreateSdkOptionsConfig): Options {
  // Validate working directory before creating options
  validateWorkingDirectory(config.cwd);

+  // Build CLAUDE.md auto-loading options if enabled
+  const claudeMdOptions = buildClaudeMdOptions(config);
+
+  // Build thinking options
+  const thinkingOptions = buildThinkingOptions(config.thinkingLevel);
+
  return {
    ...getBaseOptions(),
    // Override permissionMode - spec generation only needs read-only tools
@@ -184,7 +390,8 @@ export function createSpecGenerationOptions(config: CreateSdkOptionsConfig): Opt
    maxTurns: MAX_TURNS.maximum,
    cwd: config.cwd,
    allowedTools: [...TOOL_PRESETS.specGeneration],
-    ...(config.systemPrompt && { systemPrompt: config.systemPrompt }),
+    ...claudeMdOptions,
+    ...thinkingOptions,
    ...(config.abortController && { abortController: config.abortController }),
    ...(config.outputFormat && { outputFormat: config.outputFormat }),
  };
@@ -197,11 +404,18 @@ export function createSpecGenerationOptions(config: CreateSdkOptionsConfig): Opt
 * - Uses read-only tools (just needs to read the spec)
 * - Quick turns since it's mostly JSON generation
 * - Sonnet model by default for speed
+ * - When autoLoadClaudeMd is true, uses preset mode and settingSources for CLAUDE.md loading
 */
 export function createFeatureGenerationOptions(config: CreateSdkOptionsConfig): Options {
  // Validate working directory before creating options
  validateWorkingDirectory(config.cwd);

+  // Build CLAUDE.md auto-loading options if enabled
+  const claudeMdOptions = buildClaudeMdOptions(config);
+
+  // Build thinking options
+  const thinkingOptions = buildThinkingOptions(config.thinkingLevel);
+
  return {
    ...getBaseOptions(),
    // Override permissionMode - feature generation only needs read-only tools
@@ -210,7 +424,8 @@ export function createFeatureGenerationOptions(config: CreateSdkOptionsConfig):
    maxTurns: MAX_TURNS.quick,
    cwd: config.cwd,
    allowedTools: [...TOOL_PRESETS.readOnly],
-    ...(config.systemPrompt && { systemPrompt: config.systemPrompt }),
+    ...claudeMdOptions,
+    ...thinkingOptions,
    ...(config.abortController && { abortController: config.abortController }),
  };
 }
@@ -222,18 +437,26 @@ export function createFeatureGenerationOptions(config: CreateSdkOptionsConfig):
 * - Uses read-only tools for analysis
 * - Standard turns to allow thorough codebase exploration and structured output generation
 * - Opus model by default for thorough analysis
+ * - When autoLoadClaudeMd is true, uses preset mode and settingSources for CLAUDE.md loading
 */
 export function createSuggestionsOptions(config: CreateSdkOptionsConfig): Options {
  // Validate working directory before creating options
  validateWorkingDirectory(config.cwd);

+  // Build CLAUDE.md auto-loading options if enabled
+  const claudeMdOptions = buildClaudeMdOptions(config);
+
+  // Build thinking options
+  const thinkingOptions = buildThinkingOptions(config.thinkingLevel);
+
  return {
    ...getBaseOptions(),
    model: getModelForUseCase('suggestions', config.model),
    maxTurns: MAX_TURNS.extended,
    cwd: config.cwd,
    allowedTools: [...TOOL_PRESETS.readOnly],
-    ...(config.systemPrompt && { systemPrompt: config.systemPrompt }),
+    ...claudeMdOptions,
+    ...thinkingOptions,
    ...(config.abortController && { abortController: config.abortController }),
    ...(config.outputFormat && { outputFormat: config.outputFormat }),
  };
@@ -246,7 +469,7 @@ export function createSuggestionsOptions(config: CreateSdkOptionsConfig): Option
 * - Full tool access for code modification
 * - Standard turns for interactive sessions
 * - Model priority: explicit model > session model > chat default
- * - Sandbox enabled for bash safety
+ * - When autoLoadClaudeMd is true, uses preset mode and settingSources for CLAUDE.md loading
 */
 export function createChatOptions(config: CreateSdkOptionsConfig): Options {
  // Validate working directory before creating options
@@ -255,18 +478,25 @@ export function createChatOptions(config: CreateSdkOptionsConfig): Options {
  // Model priority: explicit model > session model > chat default
  const effectiveModel = config.model || config.sessionModel;

+  // Build CLAUDE.md auto-loading options if enabled
+  const claudeMdOptions = buildClaudeMdOptions(config);
+
+  // Build MCP-related options
+  const mcpOptions = buildMcpOptions(config);
+
+  // Build thinking options
+  const thinkingOptions = buildThinkingOptions(config.thinkingLevel);
+
  return {
    ...getBaseOptions(),
    model: getModelForUseCase('chat', effectiveModel),
    maxTurns: MAX_TURNS.standard,
    cwd: config.cwd,
    allowedTools: [...TOOL_PRESETS.chat],
-    sandbox: {
-      enabled: true,
-      autoAllowBashIfSandboxed: true,
-    },
-    ...(config.systemPrompt && { systemPrompt: config.systemPrompt }),
+    ...claudeMdOptions,
+    ...thinkingOptions,
    ...(config.abortController && { abortController: config.abortController }),
+    ...mcpOptions.mcpServerOptions,
  };
 }

@@ -277,24 +507,31 @@ export function createChatOptions(config: CreateSdkOptionsConfig): Options {
 * - Full tool access for code modification and implementation
 * - Extended turns for thorough feature implementation
 * - Uses default model (can be overridden)
- * - Sandbox enabled for bash safety
+ * - When autoLoadClaudeMd is true, uses preset mode and settingSources for CLAUDE.md loading
 */
 export function createAutoModeOptions(config: CreateSdkOptionsConfig): Options {
  // Validate working directory before creating options
  validateWorkingDirectory(config.cwd);

+  // Build CLAUDE.md auto-loading options if enabled
+  const claudeMdOptions = buildClaudeMdOptions(config);
+
+  // Build MCP-related options
+  const mcpOptions = buildMcpOptions(config);
+
+  // Build thinking options
+  const thinkingOptions = buildThinkingOptions(config.thinkingLevel);
+
  return {
    ...getBaseOptions(),
    model: getModelForUseCase('auto', config.model),
    maxTurns: MAX_TURNS.maximum,
    cwd: config.cwd,
    allowedTools: [...TOOL_PRESETS.fullAccess],
-    sandbox: {
-      enabled: true,
-      autoAllowBashIfSandboxed: true,
-    },
-    ...(config.systemPrompt && { systemPrompt: config.systemPrompt }),
+    ...claudeMdOptions,
+    ...thinkingOptions,
    ...(config.abortController && { abortController: config.abortController }),
+    ...mcpOptions.mcpServerOptions,
  };
 }

@@ -302,25 +539,40 @@ export function createAutoModeOptions(config: CreateSdkOptionsConfig): Options {
 * Create custom SDK options with explicit configuration
 *
 * Use this when the preset options don't fit your use case.
+ * When autoLoadClaudeMd is true, uses preset mode and settingSources for CLAUDE.md loading
 */
 export function createCustomOptions(
  config: CreateSdkOptionsConfig & {
    maxTurns?: number;
    allowedTools?: readonly string[];
-    sandbox?: { enabled: boolean; autoAllowBashIfSandboxed?: boolean };
  }
 ): Options {
  // Validate working directory before creating options
  validateWorkingDirectory(config.cwd);

+  // Build CLAUDE.md auto-loading options if enabled
+  const claudeMdOptions = buildClaudeMdOptions(config);
+
+  // Build MCP-related options
+  const mcpOptions = buildMcpOptions(config);
+
+  // Build thinking options
+  const thinkingOptions = buildThinkingOptions(config.thinkingLevel);
+
+  // For custom options: use explicit allowedTools if provided, otherwise default to readOnly
+  const effectiveAllowedTools = config.allowedTools
+    ? [...config.allowedTools]
+    : [...TOOL_PRESETS.readOnly];
+
  return {
    ...getBaseOptions(),
    model: getModelForUseCase('default', config.model),
    maxTurns: config.maxTurns ?? MAX_TURNS.maximum,
    cwd: config.cwd,
-    allowedTools: config.allowedTools ? [...config.allowedTools] : [...TOOL_PRESETS.readOnly],
-    ...(config.sandbox && { sandbox: config.sandbox }),
-    ...(config.systemPrompt && { systemPrompt: config.systemPrompt }),
+    allowedTools: effectiveAllowedTools,
+    ...claudeMdOptions,
+    ...thinkingOptions,
    ...(config.abortController && { abortController: config.abortController }),
+    ...mcpOptions.mcpServerOptions,
  };
 }
--- a/apps/server/src/lib/secure-fs.ts
+++ b/apps/server/src/lib/secure-fs.ts
@@ -6,6 +6,7 @@
 import { secureFs } from '@automaker/platform';

 export const {
+  // Async methods
  access,
  readFile,
  writeFile,
@@ -20,4 +21,19 @@ export const {
  lstat,
  joinPath,
  resolvePath,
+  // Sync methods
+  existsSync,
+  readFileSync,
+  writeFileSync,
+  mkdirSync,
+  readdirSync,
+  statSync,
+  accessSync,
+  unlinkSync,
+  rmSync,
+  // Throttling configuration and monitoring
+  configureThrottling,
+  getThrottlingConfig,
+  getPendingOperations,
+  getActiveOperations,
 } = secureFs;
--- a/apps/server/src/lib/settings-helpers.ts
+++ b/apps/server/src/lib/settings-helpers.ts
@@ -0,0 +1,730 @@
+/**
+ * Helper utilities for loading settings and context file handling across different parts of the server
+ */
+
+import type { SettingsService } from '../services/settings-service.js';
+import type { ContextFilesResult, ContextFileInfo } from '@automaker/utils';
+import { createLogger } from '@automaker/utils';
+import type {
+  MCPServerConfig,
+  McpServerConfig,
+  PromptCustomization,
+  ClaudeApiProfile,
+  ClaudeCompatibleProvider,
+  PhaseModelKey,
+  PhaseModelEntry,
+  Credentials,
+} from '@automaker/types';
+import { DEFAULT_PHASE_MODELS } from '@automaker/types';
+import {
+  mergeAutoModePrompts,
+  mergeAgentPrompts,
+  mergeBacklogPlanPrompts,
+  mergeEnhancementPrompts,
+  mergeCommitMessagePrompts,
+  mergeTitleGenerationPrompts,
+  mergeIssueValidationPrompts,
+  mergeIdeationPrompts,
+  mergeAppSpecPrompts,
+  mergeContextDescriptionPrompts,
+  mergeSuggestionsPrompts,
+  mergeTaskExecutionPrompts,
+} from '@automaker/prompts';
+
+const logger = createLogger('SettingsHelper');
+
+/**
+ * Get the autoLoadClaudeMd setting, with project settings taking precedence over global.
+ * Returns false if settings service is not available.
+ *
+ * @param projectPath - Path to the project
+ * @param settingsService - Optional settings service instance
+ * @param logPrefix - Prefix for log messages (e.g., '[DescribeImage]')
+ * @returns Promise resolving to the autoLoadClaudeMd setting value
+ */
+export async function getAutoLoadClaudeMdSetting(
+  projectPath: string,
+  settingsService?: SettingsService | null,
+  logPrefix = '[SettingsHelper]'
+): Promise<boolean> {
+  if (!settingsService) {
+    logger.info(`${logPrefix} SettingsService not available, autoLoadClaudeMd disabled`);
+    return false;
+  }
+
+  try {
+    // Check project settings first (takes precedence)
+    const projectSettings = await settingsService.getProjectSettings(projectPath);
+    if (projectSettings.autoLoadClaudeMd !== undefined) {
+      logger.info(
+        `${logPrefix} autoLoadClaudeMd from project settings: ${projectSettings.autoLoadClaudeMd}`
+      );
+      return projectSettings.autoLoadClaudeMd;
+    }
+
+    // Fall back to global settings
+    const globalSettings = await settingsService.getGlobalSettings();
+    const result = globalSettings.autoLoadClaudeMd ?? false;
+    logger.info(`${logPrefix} autoLoadClaudeMd from global settings: ${result}`);
+    return result;
+  } catch (error) {
+    logger.error(`${logPrefix} Failed to load autoLoadClaudeMd setting:`, error);
+    throw error;
+  }
+}
+
+/**
+ * Filters out CLAUDE.md from context files when autoLoadClaudeMd is enabled
+ * and rebuilds the formatted prompt without it.
+ *
+ * When autoLoadClaudeMd is true, the SDK handles CLAUDE.md loading via settingSources,
+ * so we need to exclude it from the manual context loading to avoid duplication.
+ * Other context files (CODE_QUALITY.md, CONVENTIONS.md, etc.) are preserved.
+ *
+ * @param contextResult - Result from loadContextFiles
+ * @param autoLoadClaudeMd - Whether SDK auto-loading is enabled
+ * @returns Filtered context prompt (empty string if no non-CLAUDE.md files)
+ */
+export function filterClaudeMdFromContext(
+  contextResult: ContextFilesResult,
+  autoLoadClaudeMd: boolean
+): string {
+  // If autoLoadClaudeMd is disabled, return the original prompt unchanged
+  if (!autoLoadClaudeMd || contextResult.files.length === 0) {
+    return contextResult.formattedPrompt;
+  }
+
+  // Filter out CLAUDE.md (case-insensitive)
+  const nonClaudeFiles = contextResult.files.filter((f) => f.name.toLowerCase() !== 'claude.md');
+
+  // If all files were CLAUDE.md, return empty string
+  if (nonClaudeFiles.length === 0) {
+    return '';
+  }
+
+  // Rebuild prompt without CLAUDE.md using the same format as loadContextFiles
+  const formattedFiles = nonClaudeFiles.map((file) => formatContextFileEntry(file));
+
+  return `# Project Context Files
+
+The following context files provide project-specific rules, conventions, and guidelines.
+Each file serves a specific purpose - use the description to understand when to reference it.
+If you need more details about a context file, you can read the full file at the path provided.
+
+**IMPORTANT**: You MUST follow the rules and conventions specified in these files.
+- Follow ALL commands exactly as shown (e.g., if the project uses \`pnpm\`, NEVER use \`npm\` or \`npx\`)
+- Follow ALL coding conventions, commit message formats, and architectural patterns specified
+- Reference these rules before running ANY shell commands or making commits
+
+---
+
+${formattedFiles.join('\n\n---\n\n')}
+
+---
+
+**REMINDER**: Before taking any action, verify you are following the conventions specified above.
+`;
+}
+
+/**
+ * Format a single context file entry for the prompt
+ * (Matches the format used in @automaker/utils/context-loader.ts)
+ */
+function formatContextFileEntry(file: ContextFileInfo): string {
+  const header = `## ${file.name}`;
+  const pathInfo = `**Path:** \`${file.path}\``;
+  const descriptionInfo = file.description ? `\n**Purpose:** ${file.description}` : '';
+  return `${header}\n${pathInfo}${descriptionInfo}\n\n${file.content}`;
+}
+
+/**
+ * Get enabled MCP servers from global settings, converted to SDK format.
+ * Returns an empty object if settings service is not available or no servers are configured.
+ *
+ * @param settingsService - Optional settings service instance
+ * @param logPrefix - Prefix for log messages (e.g., '[AgentService]')
+ * @returns Promise resolving to MCP servers in SDK format (keyed by name)
+ */
+export async function getMCPServersFromSettings(
+  settingsService?: SettingsService | null,
+  logPrefix = '[SettingsHelper]'
+): Promise<Record<string, McpServerConfig>> {
+  if (!settingsService) {
+    return {};
+  }
+
+  try {
+    const globalSettings = await settingsService.getGlobalSettings();
+    const mcpServers = globalSettings.mcpServers || [];
+
+    // Filter to only enabled servers and convert to SDK format
+    const enabledServers = mcpServers.filter((s) => s.enabled !== false);
+
+    if (enabledServers.length === 0) {
+      return {};
+    }
+
+    // Convert settings format to SDK format (keyed by name)
+    const sdkServers: Record<string, McpServerConfig> = {};
+    for (const server of enabledServers) {
+      sdkServers[server.name] = convertToSdkFormat(server);
+    }
+
+    logger.info(
+      `${logPrefix} Loaded ${enabledServers.length} MCP server(s): ${enabledServers.map((s) => s.name).join(', ')}`
+    );
+
+    return sdkServers;
+  } catch (error) {
+    logger.error(`${logPrefix} Failed to load MCP servers setting:`, error);
+    return {};
+  }
+}
+
+/**
+ * Convert a settings MCPServerConfig to SDK McpServerConfig format.
+ * Validates required fields and throws informative errors if missing.
+ */
+function convertToSdkFormat(server: MCPServerConfig): McpServerConfig {
+  if (server.type === 'sse') {
+    if (!server.url) {
+      throw new Error(`SSE MCP server "${server.name}" is missing a URL.`);
+    }
+    return {
+      type: 'sse',
+      url: server.url,
+      headers: server.headers,
+    };
+  }
+
+  if (server.type === 'http') {
+    if (!server.url) {
+      throw new Error(`HTTP MCP server "${server.name}" is missing a URL.`);
+    }
+    return {
+      type: 'http',
+      url: server.url,
+      headers: server.headers,
+    };
+  }
+
+  // Default to stdio
+  if (!server.command) {
+    throw new Error(`Stdio MCP server "${server.name}" is missing a command.`);
+  }
+  return {
+    type: 'stdio',
+    command: server.command,
+    args: server.args,
+    env: server.env,
+  };
+}
+
+/**
+ * Get prompt customization from global settings and merge with defaults.
+ * Returns prompts merged with built-in defaults - custom prompts override defaults.
+ *
+ * @param settingsService - Optional settings service instance
+ * @param logPrefix - Prefix for log messages
+ * @returns Promise resolving to merged prompts for all categories
+ */
+export async function getPromptCustomization(
+  settingsService?: SettingsService | null,
+  logPrefix = '[PromptHelper]'
+): Promise<{
+  autoMode: ReturnType<typeof mergeAutoModePrompts>;
+  agent: ReturnType<typeof mergeAgentPrompts>;
+  backlogPlan: ReturnType<typeof mergeBacklogPlanPrompts>;
+  enhancement: ReturnType<typeof mergeEnhancementPrompts>;
+  commitMessage: ReturnType<typeof mergeCommitMessagePrompts>;
+  titleGeneration: ReturnType<typeof mergeTitleGenerationPrompts>;
+  issueValidation: ReturnType<typeof mergeIssueValidationPrompts>;
+  ideation: ReturnType<typeof mergeIdeationPrompts>;
+  appSpec: ReturnType<typeof mergeAppSpecPrompts>;
+  contextDescription: ReturnType<typeof mergeContextDescriptionPrompts>;
+  suggestions: ReturnType<typeof mergeSuggestionsPrompts>;
+  taskExecution: ReturnType<typeof mergeTaskExecutionPrompts>;
+}> {
+  let customization: PromptCustomization = {};
+
+  if (settingsService) {
+    try {
+      const globalSettings = await settingsService.getGlobalSettings();
+      customization = globalSettings.promptCustomization || {};
+      logger.info(`${logPrefix} Loaded prompt customization from settings`);
+    } catch (error) {
+      logger.error(`${logPrefix} Failed to load prompt customization:`, error);
+      // Fall through to use empty customization (all defaults)
+    }
+  } else {
+    logger.info(`${logPrefix} SettingsService not available, using default prompts`);
+  }
+
+  return {
+    autoMode: mergeAutoModePrompts(customization.autoMode),
+    agent: mergeAgentPrompts(customization.agent),
+    backlogPlan: mergeBacklogPlanPrompts(customization.backlogPlan),
+    enhancement: mergeEnhancementPrompts(customization.enhancement),
+    commitMessage: mergeCommitMessagePrompts(customization.commitMessage),
+    titleGeneration: mergeTitleGenerationPrompts(customization.titleGeneration),
+    issueValidation: mergeIssueValidationPrompts(customization.issueValidation),
+    ideation: mergeIdeationPrompts(customization.ideation),
+    appSpec: mergeAppSpecPrompts(customization.appSpec),
+    contextDescription: mergeContextDescriptionPrompts(customization.contextDescription),
+    suggestions: mergeSuggestionsPrompts(customization.suggestions),
+    taskExecution: mergeTaskExecutionPrompts(customization.taskExecution),
+  };
+}
+
+/**
+ * Get Skills configuration from settings.
+ * Returns configuration for enabling skills and which sources to load from.
+ *
+ * @param settingsService - Settings service instance
+ * @returns Skills configuration with enabled state, sources, and tool inclusion flag
+ */
+export async function getSkillsConfiguration(settingsService: SettingsService): Promise<{
+  enabled: boolean;
+  sources: Array<'user' | 'project'>;
+  shouldIncludeInTools: boolean;
+}> {
+  const settings = await settingsService.getGlobalSettings();
+  const enabled = settings.enableSkills ?? true; // Default enabled
+  const sources = settings.skillsSources ?? ['user', 'project']; // Default both sources
+
+  return {
+    enabled,
+    sources,
+    shouldIncludeInTools: enabled && sources.length > 0,
+  };
+}
+
+/**
+ * Get Subagents configuration from settings.
+ * Returns configuration for enabling subagents and which sources to load from.
+ *
+ * @param settingsService - Settings service instance
+ * @returns Subagents configuration with enabled state, sources, and tool inclusion flag
+ */
+export async function getSubagentsConfiguration(settingsService: SettingsService): Promise<{
+  enabled: boolean;
+  sources: Array<'user' | 'project'>;
+  shouldIncludeInTools: boolean;
+}> {
+  const settings = await settingsService.getGlobalSettings();
+  const enabled = settings.enableSubagents ?? true; // Default enabled
+  const sources = settings.subagentsSources ?? ['user', 'project']; // Default both sources
+
+  return {
+    enabled,
+    sources,
+    shouldIncludeInTools: enabled && sources.length > 0,
+  };
+}
+
+/**
+ * Get custom subagents from settings, merging global and project-level definitions.
+ * Project-level subagents take precedence over global ones with the same name.
+ *
+ * @param settingsService - Settings service instance
+ * @param projectPath - Path to the project for loading project-specific subagents
+ * @returns Record of agent names to definitions, or undefined if none configured
+ */
+export async function getCustomSubagents(
+  settingsService: SettingsService,
+  projectPath?: string
+): Promise<Record<string, import('@automaker/types').AgentDefinition> | undefined> {
+  // Get global subagents
+  const globalSettings = await settingsService.getGlobalSettings();
+  const globalSubagents = globalSettings.customSubagents || {};
+
+  // If no project path, return only global subagents
+  if (!projectPath) {
+    return Object.keys(globalSubagents).length > 0 ? globalSubagents : undefined;
+  }
+
+  // Get project-specific subagents
+  const projectSettings = await settingsService.getProjectSettings(projectPath);
+  const projectSubagents = projectSettings.customSubagents || {};
+
+  // Merge: project-level takes precedence
+  const merged = {
+    ...globalSubagents,
+    ...projectSubagents,
+  };
+
+  return Object.keys(merged).length > 0 ? merged : undefined;
+}
+
+/** Result from getActiveClaudeApiProfile */
+export interface ActiveClaudeApiProfileResult {
+  /** The active profile, or undefined if using direct Anthropic API */
+  profile: ClaudeApiProfile | undefined;
+  /** Credentials for resolving 'credentials' apiKeySource */
+  credentials: import('@automaker/types').Credentials | undefined;
+}
+
+/**
+ * Get the active Claude API profile and credentials from settings.
+ * Checks project settings first for per-project overrides, then falls back to global settings.
+ * Returns both the profile and credentials for resolving 'credentials' apiKeySource.
+ *
+ * @deprecated Use getProviderById and getPhaseModelWithOverrides instead for the new provider system.
+ * This function is kept for backward compatibility during migration.
+ *
+ * @param settingsService - Optional settings service instance
+ * @param logPrefix - Prefix for log messages (e.g., '[AgentService]')
+ * @param projectPath - Optional project path for per-project override
+ * @returns Promise resolving to object with profile and credentials
+ */
+export async function getActiveClaudeApiProfile(
+  settingsService?: SettingsService | null,
+  logPrefix = '[SettingsHelper]',
+  projectPath?: string
+): Promise<ActiveClaudeApiProfileResult> {
+  if (!settingsService) {
+    return { profile: undefined, credentials: undefined };
+  }
+
+  try {
+    const globalSettings = await settingsService.getGlobalSettings();
+    const credentials = await settingsService.getCredentials();
+    const profiles = globalSettings.claudeApiProfiles || [];
+
+    // Check for project-level override first
+    let activeProfileId: string | null | undefined;
+    let isProjectOverride = false;
+
+    if (projectPath) {
+      const projectSettings = await settingsService.getProjectSettings(projectPath);
+      // undefined = use global, null = explicit no profile, string = specific profile
+      if (projectSettings.activeClaudeApiProfileId !== undefined) {
+        activeProfileId = projectSettings.activeClaudeApiProfileId;
+        isProjectOverride = true;
+      }
+    }
+
+    // Fall back to global if project doesn't specify
+    if (activeProfileId === undefined && !isProjectOverride) {
+      activeProfileId = globalSettings.activeClaudeApiProfileId;
+    }
+
+    // No active profile selected - use direct Anthropic API
+    if (!activeProfileId) {
+      if (isProjectOverride && activeProfileId === null) {
+        logger.info(`${logPrefix} Project explicitly using Direct Anthropic API`);
+      }
+      return { profile: undefined, credentials };
+    }
+
+    // Find the active profile by ID
+    const activeProfile = profiles.find((p) => p.id === activeProfileId);
+
+    if (activeProfile) {
+      const overrideSuffix = isProjectOverride ? ' (project override)' : '';
+      logger.info(`${logPrefix} Using Claude API profile: ${activeProfile.name}${overrideSuffix}`);
+      return { profile: activeProfile, credentials };
+    } else {
+      logger.warn(
+        `${logPrefix} Active profile ID "${activeProfileId}" not found, falling back to direct Anthropic API`
+      );
+      return { profile: undefined, credentials };
+    }
+  } catch (error) {
+    logger.error(`${logPrefix} Failed to load Claude API profile:`, error);
+    return { profile: undefined, credentials: undefined };
+  }
+}
+
+// ============================================================================
+// New Provider System Helpers
+// ============================================================================
+
+/** Result from getProviderById */
+export interface ProviderByIdResult {
+  /** The provider, or undefined if not found */
+  provider: ClaudeCompatibleProvider | undefined;
+  /** Credentials for resolving 'credentials' apiKeySource */
+  credentials: Credentials | undefined;
+}
+
+/**
+ * Get a ClaudeCompatibleProvider by its ID.
+ * Returns the provider configuration and credentials for API key resolution.
+ *
+ * @param providerId - The provider ID to look up
+ * @param settingsService - Settings service instance
+ * @param logPrefix - Prefix for log messages
+ * @returns Promise resolving to object with provider and credentials
+ */
+export async function getProviderById(
+  providerId: string,
+  settingsService: SettingsService,
+  logPrefix = '[SettingsHelper]'
+): Promise<ProviderByIdResult> {
+  try {
+    const globalSettings = await settingsService.getGlobalSettings();
+    const credentials = await settingsService.getCredentials();
+    const providers = globalSettings.claudeCompatibleProviders || [];
+
+    const provider = providers.find((p) => p.id === providerId);
+
+    if (provider) {
+      if (provider.enabled === false) {
+        logger.warn(`${logPrefix} Provider "${provider.name}" (${providerId}) is disabled`);
+      } else {
+        logger.debug(`${logPrefix} Found provider: ${provider.name}`);
+      }
+      return { provider, credentials };
+    } else {
+      logger.warn(`${logPrefix} Provider not found: ${providerId}`);
+      return { provider: undefined, credentials };
+    }
+  } catch (error) {
+    logger.error(`${logPrefix} Failed to load provider by ID:`, error);
+    return { provider: undefined, credentials: undefined };
+  }
+}
+
+/** Result from getPhaseModelWithOverrides */
+export interface PhaseModelWithOverridesResult {
+  /** The resolved phase model entry */
+  phaseModel: PhaseModelEntry;
+  /** Whether a project override was applied */
+  isProjectOverride: boolean;
+  /** The provider if providerId is set and found */
+  provider: ClaudeCompatibleProvider | undefined;
+  /** Credentials for API key resolution */
+  credentials: Credentials | undefined;
+}
+
+/**
+ * Get the phase model configuration for a specific phase, applying project overrides if available.
+ * Also resolves the provider if the phase model has a providerId.
+ *
+ * @param phase - The phase key (e.g., 'enhancementModel', 'specGenerationModel')
+ * @param settingsService - Optional settings service instance (returns defaults if undefined)
+ * @param projectPath - Optional project path for checking overrides
+ * @param logPrefix - Prefix for log messages
+ * @returns Promise resolving to phase model with provider info
+ */
+export async function getPhaseModelWithOverrides(
+  phase: PhaseModelKey,
+  settingsService?: SettingsService | null,
+  projectPath?: string,
+  logPrefix = '[SettingsHelper]'
+): Promise<PhaseModelWithOverridesResult> {
+  // Handle undefined settingsService gracefully
+  if (!settingsService) {
+    logger.info(`${logPrefix} SettingsService not available, using default for ${phase}`);
+    return {
+      phaseModel: DEFAULT_PHASE_MODELS[phase] || { model: 'sonnet' },
+      isProjectOverride: false,
+      provider: undefined,
+      credentials: undefined,
+    };
+  }
+
+  try {
+    const globalSettings = await settingsService.getGlobalSettings();
+    const credentials = await settingsService.getCredentials();
+    const globalPhaseModels = globalSettings.phaseModels || {};
+
+    // Start with global phase model
+    let phaseModel = globalPhaseModels[phase];
+    let isProjectOverride = false;
+
+    // Check for project override
+    if (projectPath) {
+      const projectSettings = await settingsService.getProjectSettings(projectPath);
+      const projectOverrides = projectSettings.phaseModelOverrides || {};
+
+      if (projectOverrides[phase]) {
+        phaseModel = projectOverrides[phase];
+        isProjectOverride = true;
+        logger.debug(`${logPrefix} Using project override for ${phase}`);
+      }
+    }
+
+    // If no phase model found, use per-phase default
+    if (!phaseModel) {
+      phaseModel = DEFAULT_PHASE_MODELS[phase] || { model: 'sonnet' };
+      logger.debug(`${logPrefix} No ${phase} configured, using default: ${phaseModel.model}`);
+    }
+
+    // Resolve provider if providerId is set
+    let provider: ClaudeCompatibleProvider | undefined;
+    if (phaseModel.providerId) {
+      const providers = globalSettings.claudeCompatibleProviders || [];
+      provider = providers.find((p) => p.id === phaseModel.providerId);
+
+      if (provider) {
+        if (provider.enabled === false) {
+          logger.warn(
+            `${logPrefix} Provider "${provider.name}" for ${phase} is disabled, falling back to direct API`
+          );
+          provider = undefined;
+        } else {
+          logger.debug(`${logPrefix} Using provider "${provider.name}" for ${phase}`);
+        }
+      } else {
+        logger.warn(
+          `${logPrefix} Provider ${phaseModel.providerId} not found for ${phase}, falling back to direct API`
+        );
+      }
+    }
+
+    return {
+      phaseModel,
+      isProjectOverride,
+      provider,
+      credentials,
+    };
+  } catch (error) {
+    logger.error(`${logPrefix} Failed to get phase model with overrides:`, error);
+    // Return a safe default
+    return {
+      phaseModel: { model: 'sonnet' },
+      isProjectOverride: false,
+      provider: undefined,
+      credentials: undefined,
+    };
+  }
+}
+
+/** Result from getProviderByModelId */
+export interface ProviderByModelIdResult {
+  /** The provider that contains this model, or undefined if not found */
+  provider: ClaudeCompatibleProvider | undefined;
+  /** The model configuration if found */
+  modelConfig: import('@automaker/types').ProviderModel | undefined;
+  /** Credentials for API key resolution */
+  credentials: Credentials | undefined;
+  /** The resolved Claude model ID to use for API calls (from mapsToClaudeModel) */
+  resolvedModel: string | undefined;
+}
+
+/**
+ * Find a ClaudeCompatibleProvider by one of its model IDs.
+ * Searches through all enabled providers to find one that contains the specified model.
+ * This is useful when you have a model string from the UI but need the provider config.
+ *
+ * Also resolves the `mapsToClaudeModel` field to get the actual Claude model ID to use
+ * when calling the API (e.g., "GLM-4.5-Air" -> "claude-haiku-4-5").
+ *
+ * @param modelId - The model ID to search for (e.g., "GLM-4.7", "MiniMax-M2.1")
+ * @param settingsService - Settings service instance
+ * @param logPrefix - Prefix for log messages
+ * @returns Promise resolving to object with provider, model config, credentials, and resolved model
+ */
+export async function getProviderByModelId(
+  modelId: string,
+  settingsService: SettingsService,
+  logPrefix = '[SettingsHelper]'
+): Promise<ProviderByModelIdResult> {
+  try {
+    const globalSettings = await settingsService.getGlobalSettings();
+    const credentials = await settingsService.getCredentials();
+    const providers = globalSettings.claudeCompatibleProviders || [];
+
+    // Search through all enabled providers for this model
+    for (const provider of providers) {
+      // Skip disabled providers
+      if (provider.enabled === false) {
+        continue;
+      }
+
+      // Check if this provider has the model
+      const modelConfig = provider.models?.find(
+        (m) => m.id === modelId || m.id.toLowerCase() === modelId.toLowerCase()
+      );
+
+      if (modelConfig) {
+        logger.info(`${logPrefix} Found model "${modelId}" in provider "${provider.name}"`);
+
+        // Resolve the mapped Claude model if specified
+        let resolvedModel: string | undefined;
+        if (modelConfig.mapsToClaudeModel) {
+          // Import resolveModelString to convert alias to full model ID
+          const { resolveModelString } = await import('@automaker/model-resolver');
+          resolvedModel = resolveModelString(modelConfig.mapsToClaudeModel);
+          logger.info(
+            `${logPrefix} Model "${modelId}" maps to Claude model "${modelConfig.mapsToClaudeModel}" -> "${resolvedModel}"`
+          );
+        }
+
+        return { provider, modelConfig, credentials, resolvedModel };
+      }
+    }
+
+    // Model not found in any provider
+    logger.debug(`${logPrefix} Model "${modelId}" not found in any provider`);
+    return {
+      provider: undefined,
+      modelConfig: undefined,
+      credentials: undefined,
+      resolvedModel: undefined,
+    };
+  } catch (error) {
+    logger.error(`${logPrefix} Failed to find provider by model ID:`, error);
+    return {
+      provider: undefined,
+      modelConfig: undefined,
+      credentials: undefined,
+      resolvedModel: undefined,
+    };
+  }
+}
+
+/**
+ * Get all enabled provider models for use in model dropdowns.
+ * Returns models from all enabled ClaudeCompatibleProviders.
+ *
+ * @param settingsService - Settings service instance
+ * @param logPrefix - Prefix for log messages
+ * @returns Promise resolving to array of provider models with their provider info
+ */
+export async function getAllProviderModels(
+  settingsService: SettingsService,
+  logPrefix = '[SettingsHelper]'
+): Promise<
+  Array<{
+    providerId: string;
+    providerName: string;
+    model: import('@automaker/types').ProviderModel;
+  }>
+> {
+  try {
+    const globalSettings = await settingsService.getGlobalSettings();
+    const providers = globalSettings.claudeCompatibleProviders || [];
+
+    const allModels: Array<{
+      providerId: string;
+      providerName: string;
+      model: import('@automaker/types').ProviderModel;
+    }> = [];
+
+    for (const provider of providers) {
+      // Skip disabled providers
+      if (provider.enabled === false) {
+        continue;
+      }
+
+      for (const model of provider.models || []) {
+        allModels.push({
+          providerId: provider.id,
+          providerName: provider.name,
+          model,
+        });
+      }
+    }
+
+    logger.debug(
+      `${logPrefix} Found ${allModels.length} models from ${providers.length} providers`
+    );
+    return allModels;
+  } catch (error) {
+    logger.error(`${logPrefix} Failed to get all provider models:`, error);
+    return [];
+  }
+}
--- a/apps/server/src/lib/version.ts
+++ b/apps/server/src/lib/version.ts
@@ -0,0 +1,36 @@
+/**
+ * Version utility - Reads version from package.json
+ */
+
+import { readFileSync } from 'fs';
+import { fileURLToPath } from 'url';
+import { dirname, join } from 'path';
+import { createLogger } from '@automaker/utils';
+
+const logger = createLogger('Version');
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+let cachedVersion: string | null = null;
+
+/**
+ * Get the version from package.json
+ * Caches the result for performance
+ */
+export function getVersion(): string {
+  if (cachedVersion) {
+    return cachedVersion;
+  }
+
+  try {
+    const packageJsonPath = join(__dirname, '..', '..', 'package.json');
+    const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
+    const version = packageJson.version || '0.0.0';
+    cachedVersion = version;
+    return version;
+  } catch (error) {
+    logger.warn('Failed to read version from package.json:', error);
+    return '0.0.0';
+  }
+}
--- a/apps/server/src/lib/worktree-metadata.ts
+++ b/apps/server/src/lib/worktree-metadata.ts
@@ -5,22 +5,24 @@

 import * as secureFs from './secure-fs.js';
 import * as path from 'path';
+import type { PRState, WorktreePRInfo } from '@automaker/types';
+
+// Re-export types for backwards compatibility
+export type { PRState, WorktreePRInfo };

 /** Maximum length for sanitized branch names in filesystem paths */
 const MAX_SANITIZED_BRANCH_PATH_LENGTH = 200;

-export interface WorktreePRInfo {
-  number: number;
-  url: string;
-  title: string;
-  state: string;
-  createdAt: string;
-}
-
 export interface WorktreeMetadata {
  branch: string;
  createdAt: string;
  pr?: WorktreePRInfo;
+  /** Whether the init script has been executed for this worktree */
+  initScriptRan?: boolean;
+  /** Status of the init script execution */
+  initScriptStatus?: 'running' | 'success' | 'failed';
+  /** Error message if init script failed */
+  initScriptError?: string;
 }

 /**
--- a/apps/server/src/lib/xml-extractor.ts
+++ b/apps/server/src/lib/xml-extractor.ts
@@ -0,0 +1,611 @@
+/**
+ * XML Extraction Utilities
+ *
+ * Robust XML parsing utilities for extracting and updating sections
+ * from app_spec.txt XML content. Uses regex-based parsing which is
+ * sufficient for our controlled XML structure.
+ *
+ * Note: If more complex XML parsing is needed in the future, consider
+ * using a library like 'fast-xml-parser' or 'xml2js'.
+ */
+
+import { createLogger } from '@automaker/utils';
+import type { SpecOutput } from '@automaker/types';
+
+const logger = createLogger('XmlExtractor');
+
+/**
+ * Represents an implemented feature extracted from XML
+ */
+export interface ImplementedFeature {
+  name: string;
+  description: string;
+  file_locations?: string[];
+}
+
+/**
+ * Logger interface for optional custom logging
+ */
+export interface XmlExtractorLogger {
+  debug: (message: string, ...args: unknown[]) => void;
+  warn?: (message: string, ...args: unknown[]) => void;
+}
+
+/**
+ * Options for XML extraction operations
+ */
+export interface ExtractXmlOptions {
+  /** Custom logger (defaults to internal logger) */
+  logger?: XmlExtractorLogger;
+}
+
+/**
+ * Escape special XML characters
+ * Handles undefined/null values by converting them to empty strings
+ */
+export function escapeXml(str: string | undefined | null): string {
+  if (str == null) {
+    return '';
+  }
+  return str
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&apos;');
+}
+
+/**
+ * Unescape XML entities back to regular characters
+ */
+export function unescapeXml(str: string): string {
+  return str
+    .replace(/&apos;/g, "'")
+    .replace(/&quot;/g, '"')
+    .replace(/&gt;/g, '>')
+    .replace(/&lt;/g, '<')
+    .replace(/&amp;/g, '&');
+}
+
+/**
+ * Extract the content of a specific XML section
+ *
+ * @param xmlContent - The full XML content
+ * @param tagName - The tag name to extract (e.g., 'implemented_features')
+ * @param options - Optional extraction options
+ * @returns The content between the tags, or null if not found
+ */
+export function extractXmlSection(
+  xmlContent: string,
+  tagName: string,
+  options: ExtractXmlOptions = {}
+): string | null {
+  const log = options.logger || logger;
+
+  const regex = new RegExp(`<${tagName}>([\\s\\S]*?)<\\/${tagName}>`, 'i');
+  const match = xmlContent.match(regex);
+
+  if (match) {
+    log.debug(`Extracted <${tagName}> section`);
+    return match[1];
+  }
+
+  log.debug(`Section <${tagName}> not found`);
+  return null;
+}
+
+/**
+ * Extract all values from repeated XML elements
+ *
+ * @param xmlContent - The XML content to search
+ * @param tagName - The tag name to extract values from
+ * @param options - Optional extraction options
+ * @returns Array of extracted values (unescaped)
+ */
+export function extractXmlElements(
+  xmlContent: string,
+  tagName: string,
+  options: ExtractXmlOptions = {}
+): string[] {
+  const log = options.logger || logger;
+  const values: string[] = [];
+
+  const regex = new RegExp(`<${tagName}>([\\s\\S]*?)<\\/${tagName}>`, 'g');
+  const matches = xmlContent.matchAll(regex);
+
+  for (const match of matches) {
+    values.push(unescapeXml(match[1].trim()));
+  }
+
+  log.debug(`Extracted ${values.length} <${tagName}> elements`);
+  return values;
+}
+
+/**
+ * Extract implemented features from app_spec.txt XML content
+ *
+ * @param specContent - The full XML content of app_spec.txt
+ * @param options - Optional extraction options
+ * @returns Array of implemented features with name, description, and optional file_locations
+ */
+export function extractImplementedFeatures(
+  specContent: string,
+  options: ExtractXmlOptions = {}
+): ImplementedFeature[] {
+  const log = options.logger || logger;
+  const features: ImplementedFeature[] = [];
+
+  // Match <implemented_features>...</implemented_features> section
+  const implementedSection = extractXmlSection(specContent, 'implemented_features', options);
+
+  if (!implementedSection) {
+    log.debug('No implemented_features section found');
+    return features;
+  }
+
+  // Extract individual feature blocks
+  const featureRegex = /<feature>([\s\S]*?)<\/feature>/g;
+  const featureMatches = implementedSection.matchAll(featureRegex);
+
+  for (const featureMatch of featureMatches) {
+    const featureContent = featureMatch[1];
+
+    // Extract name
+    const nameMatch = featureContent.match(/<name>([\s\S]*?)<\/name>/);
+    const name = nameMatch ? unescapeXml(nameMatch[1].trim()) : '';
+
+    // Extract description
+    const descMatch = featureContent.match(/<description>([\s\S]*?)<\/description>/);
+    const description = descMatch ? unescapeXml(descMatch[1].trim()) : '';
+
+    // Extract file_locations if present
+    const locationsSection = extractXmlSection(featureContent, 'file_locations', options);
+    const file_locations = locationsSection
+      ? extractXmlElements(locationsSection, 'location', options)
+      : undefined;
+
+    if (name) {
+      features.push({
+        name,
+        description,
+        ...(file_locations && file_locations.length > 0 ? { file_locations } : {}),
+      });
+    }
+  }
+
+  log.debug(`Extracted ${features.length} implemented features`);
+  return features;
+}
+
+/**
+ * Extract only the feature names from implemented_features section
+ *
+ * @param specContent - The full XML content of app_spec.txt
+ * @param options - Optional extraction options
+ * @returns Array of feature names
+ */
+export function extractImplementedFeatureNames(
+  specContent: string,
+  options: ExtractXmlOptions = {}
+): string[] {
+  const features = extractImplementedFeatures(specContent, options);
+  return features.map((f) => f.name);
+}
+
+/**
+ * Generate XML for a single implemented feature
+ *
+ * @param feature - The feature to convert to XML
+ * @param indent - The base indentation level (default: 2 spaces)
+ * @returns XML string for the feature
+ */
+export function featureToXml(feature: ImplementedFeature, indent: string = '  '): string {
+  const i2 = indent.repeat(2);
+  const i3 = indent.repeat(3);
+  const i4 = indent.repeat(4);
+
+  let xml = `${i2}<feature>
+${i3}<name>${escapeXml(feature.name)}</name>
+${i3}<description>${escapeXml(feature.description)}</description>`;
+
+  if (feature.file_locations && feature.file_locations.length > 0) {
+    xml += `
+${i3}<file_locations>
+${feature.file_locations.map((loc) => `${i4}<location>${escapeXml(loc)}</location>`).join('\n')}
+${i3}</file_locations>`;
+  }
+
+  xml += `
+${i2}</feature>`;
+
+  return xml;
+}
+
+/**
+ * Generate XML for an array of implemented features
+ *
+ * @param features - Array of features to convert to XML
+ * @param indent - The base indentation level (default: 2 spaces)
+ * @returns XML string for the implemented_features section content
+ */
+export function featuresToXml(features: ImplementedFeature[], indent: string = '  '): string {
+  return features.map((f) => featureToXml(f, indent)).join('\n');
+}
+
+/**
+ * Update the implemented_features section in XML content
+ *
+ * @param specContent - The full XML content
+ * @param newFeatures - The new features to set
+ * @param options - Optional extraction options
+ * @returns Updated XML content with the new implemented_features section
+ */
+export function updateImplementedFeaturesSection(
+  specContent: string,
+  newFeatures: ImplementedFeature[],
+  options: ExtractXmlOptions = {}
+): string {
+  const log = options.logger || logger;
+  const indent = '  ';
+
+  // Generate new section content
+  const newSectionContent = featuresToXml(newFeatures, indent);
+
+  // Build the new section
+  const newSection = `<implemented_features>
+${newSectionContent}
+${indent}</implemented_features>`;
+
+  // Check if section exists
+  const sectionRegex = /<implemented_features>[\s\S]*?<\/implemented_features>/;
+
+  if (sectionRegex.test(specContent)) {
+    log.debug('Replacing existing implemented_features section');
+    return specContent.replace(sectionRegex, newSection);
+  }
+
+  // If section doesn't exist, try to insert after core_capabilities
+  const coreCapabilitiesEnd = '</core_capabilities>';
+  const insertIndex = specContent.indexOf(coreCapabilitiesEnd);
+
+  if (insertIndex !== -1) {
+    const insertPosition = insertIndex + coreCapabilitiesEnd.length;
+    log.debug('Inserting implemented_features after core_capabilities');
+    return (
+      specContent.slice(0, insertPosition) +
+      '\n\n' +
+      indent +
+      newSection +
+      specContent.slice(insertPosition)
+    );
+  }
+
+  // As a fallback, insert before </project_specification>
+  const projectSpecEnd = '</project_specification>';
+  const fallbackIndex = specContent.indexOf(projectSpecEnd);
+
+  if (fallbackIndex !== -1) {
+    log.debug('Inserting implemented_features before </project_specification>');
+    return (
+      specContent.slice(0, fallbackIndex) +
+      indent +
+      newSection +
+      '\n' +
+      specContent.slice(fallbackIndex)
+    );
+  }
+
+  log.warn?.('Could not find appropriate insertion point for implemented_features');
+  log.debug('Could not find appropriate insertion point for implemented_features');
+  return specContent;
+}
+
+/**
+ * Add a new feature to the implemented_features section
+ *
+ * @param specContent - The full XML content
+ * @param newFeature - The feature to add
+ * @param options - Optional extraction options
+ * @returns Updated XML content with the new feature added
+ */
+export function addImplementedFeature(
+  specContent: string,
+  newFeature: ImplementedFeature,
+  options: ExtractXmlOptions = {}
+): string {
+  const log = options.logger || logger;
+
+  // Extract existing features
+  const existingFeatures = extractImplementedFeatures(specContent, options);
+
+  // Check for duplicates by name
+  const isDuplicate = existingFeatures.some(
+    (f) => f.name.toLowerCase() === newFeature.name.toLowerCase()
+  );
+
+  if (isDuplicate) {
+    log.debug(`Feature "${newFeature.name}" already exists, skipping`);
+    return specContent;
+  }
+
+  // Add the new feature
+  const updatedFeatures = [...existingFeatures, newFeature];
+
+  log.debug(`Adding feature "${newFeature.name}"`);
+  return updateImplementedFeaturesSection(specContent, updatedFeatures, options);
+}
+
+/**
+ * Remove a feature from the implemented_features section by name
+ *
+ * @param specContent - The full XML content
+ * @param featureName - The name of the feature to remove
+ * @param options - Optional extraction options
+ * @returns Updated XML content with the feature removed
+ */
+export function removeImplementedFeature(
+  specContent: string,
+  featureName: string,
+  options: ExtractXmlOptions = {}
+): string {
+  const log = options.logger || logger;
+
+  // Extract existing features
+  const existingFeatures = extractImplementedFeatures(specContent, options);
+
+  // Filter out the feature to remove
+  const updatedFeatures = existingFeatures.filter(
+    (f) => f.name.toLowerCase() !== featureName.toLowerCase()
+  );
+
+  if (updatedFeatures.length === existingFeatures.length) {
+    log.debug(`Feature "${featureName}" not found, no changes made`);
+    return specContent;
+  }
+
+  log.debug(`Removing feature "${featureName}"`);
+  return updateImplementedFeaturesSection(specContent, updatedFeatures, options);
+}
+
+/**
+ * Update an existing feature in the implemented_features section
+ *
+ * @param specContent - The full XML content
+ * @param featureName - The name of the feature to update
+ * @param updates - Partial updates to apply to the feature
+ * @param options - Optional extraction options
+ * @returns Updated XML content with the feature modified
+ */
+export function updateImplementedFeature(
+  specContent: string,
+  featureName: string,
+  updates: Partial<ImplementedFeature>,
+  options: ExtractXmlOptions = {}
+): string {
+  const log = options.logger || logger;
+
+  // Extract existing features
+  const existingFeatures = extractImplementedFeatures(specContent, options);
+
+  // Find and update the feature
+  let found = false;
+  const updatedFeatures = existingFeatures.map((f) => {
+    if (f.name.toLowerCase() === featureName.toLowerCase()) {
+      found = true;
+      return {
+        ...f,
+        ...updates,
+        // Preserve the original name if not explicitly updated
+        name: updates.name ?? f.name,
+      };
+    }
+    return f;
+  });
+
+  if (!found) {
+    log.debug(`Feature "${featureName}" not found, no changes made`);
+    return specContent;
+  }
+
+  log.debug(`Updating feature "${featureName}"`);
+  return updateImplementedFeaturesSection(specContent, updatedFeatures, options);
+}
+
+/**
+ * Check if a feature exists in the implemented_features section
+ *
+ * @param specContent - The full XML content
+ * @param featureName - The name of the feature to check
+ * @param options - Optional extraction options
+ * @returns True if the feature exists
+ */
+export function hasImplementedFeature(
+  specContent: string,
+  featureName: string,
+  options: ExtractXmlOptions = {}
+): boolean {
+  const features = extractImplementedFeatures(specContent, options);
+  return features.some((f) => f.name.toLowerCase() === featureName.toLowerCase());
+}
+
+/**
+ * Convert extracted features to SpecOutput.implemented_features format
+ *
+ * @param features - Array of extracted features
+ * @returns Features in SpecOutput format
+ */
+export function toSpecOutputFeatures(
+  features: ImplementedFeature[]
+): SpecOutput['implemented_features'] {
+  return features.map((f) => ({
+    name: f.name,
+    description: f.description,
+    ...(f.file_locations && f.file_locations.length > 0
+      ? { file_locations: f.file_locations }
+      : {}),
+  }));
+}
+
+/**
+ * Convert SpecOutput.implemented_features to ImplementedFeature format
+ *
+ * @param specFeatures - Features from SpecOutput
+ * @returns Features in ImplementedFeature format
+ */
+export function fromSpecOutputFeatures(
+  specFeatures: SpecOutput['implemented_features']
+): ImplementedFeature[] {
+  return specFeatures.map((f) => ({
+    name: f.name,
+    description: f.description,
+    ...(f.file_locations && f.file_locations.length > 0
+      ? { file_locations: f.file_locations }
+      : {}),
+  }));
+}
+
+/**
+ * Represents a roadmap phase extracted from XML
+ */
+export interface RoadmapPhase {
+  name: string;
+  status: string;
+  description?: string;
+}
+
+/**
+ * Extract the technology stack from app_spec.txt XML content
+ *
+ * @param specContent - The full XML content
+ * @param options - Optional extraction options
+ * @returns Array of technology names
+ */
+export function extractTechnologyStack(
+  specContent: string,
+  options: ExtractXmlOptions = {}
+): string[] {
+  const log = options.logger || logger;
+
+  const techSection = extractXmlSection(specContent, 'technology_stack', options);
+  if (!techSection) {
+    log.debug('No technology_stack section found');
+    return [];
+  }
+
+  const technologies = extractXmlElements(techSection, 'technology', options);
+  log.debug(`Extracted ${technologies.length} technologies`);
+  return technologies;
+}
+
+/**
+ * Update the technology_stack section in XML content
+ *
+ * @param specContent - The full XML content
+ * @param technologies - The new technology list
+ * @param options - Optional extraction options
+ * @returns Updated XML content
+ */
+export function updateTechnologyStack(
+  specContent: string,
+  technologies: string[],
+  options: ExtractXmlOptions = {}
+): string {
+  const log = options.logger || logger;
+  const indent = '  ';
+  const i2 = indent.repeat(2);
+
+  // Generate new section content
+  const techXml = technologies
+    .map((t) => `${i2}<technology>${escapeXml(t)}</technology>`)
+    .join('\n');
+  const newSection = `<technology_stack>\n${techXml}\n${indent}</technology_stack>`;
+
+  // Check if section exists
+  const sectionRegex = /<technology_stack>[\s\S]*?<\/technology_stack>/;
+
+  if (sectionRegex.test(specContent)) {
+    log.debug('Replacing existing technology_stack section');
+    return specContent.replace(sectionRegex, newSection);
+  }
+
+  log.debug('No technology_stack section found to update');
+  return specContent;
+}
+
+/**
+ * Extract roadmap phases from app_spec.txt XML content
+ *
+ * @param specContent - The full XML content
+ * @param options - Optional extraction options
+ * @returns Array of roadmap phases
+ */
+export function extractRoadmapPhases(
+  specContent: string,
+  options: ExtractXmlOptions = {}
+): RoadmapPhase[] {
+  const log = options.logger || logger;
+  const phases: RoadmapPhase[] = [];
+
+  const roadmapSection = extractXmlSection(specContent, 'implementation_roadmap', options);
+  if (!roadmapSection) {
+    log.debug('No implementation_roadmap section found');
+    return phases;
+  }
+
+  // Extract individual phase blocks
+  const phaseRegex = /<phase>([\s\S]*?)<\/phase>/g;
+  const phaseMatches = roadmapSection.matchAll(phaseRegex);
+
+  for (const phaseMatch of phaseMatches) {
+    const phaseContent = phaseMatch[1];
+
+    const nameMatch = phaseContent.match(/<name>([\s\S]*?)<\/name>/);
+    const name = nameMatch ? unescapeXml(nameMatch[1].trim()) : '';
+
+    const statusMatch = phaseContent.match(/<status>([\s\S]*?)<\/status>/);
+    const status = statusMatch ? unescapeXml(statusMatch[1].trim()) : 'pending';
+
+    const descMatch = phaseContent.match(/<description>([\s\S]*?)<\/description>/);
+    const description = descMatch ? unescapeXml(descMatch[1].trim()) : undefined;
+
+    if (name) {
+      phases.push({ name, status, description });
+    }
+  }
+
+  log.debug(`Extracted ${phases.length} roadmap phases`);
+  return phases;
+}
+
+/**
+ * Update a roadmap phase status in XML content
+ *
+ * @param specContent - The full XML content
+ * @param phaseName - The name of the phase to update
+ * @param newStatus - The new status value
+ * @param options - Optional extraction options
+ * @returns Updated XML content
+ */
+export function updateRoadmapPhaseStatus(
+  specContent: string,
+  phaseName: string,
+  newStatus: string,
+  options: ExtractXmlOptions = {}
+): string {
+  const log = options.logger || logger;
+
+  // Find the phase and update its status
+  // Match the phase block containing the specific name
+  const phaseRegex = new RegExp(
+    `(<phase>\\s*<name>\\s*${escapeXml(phaseName)}\\s*<\\/name>\\s*<status>)[\\s\\S]*?(<\\/status>)`,
+    'i'
+  );
+
+  if (phaseRegex.test(specContent)) {
+    log.debug(`Updating phase "${phaseName}" status to "${newStatus}"`);
+    return specContent.replace(phaseRegex, `$1${escapeXml(newStatus)}$2`);
+  }
+
+  log.debug(`Phase "${phaseName}" not found`);
+  return specContent;
+}
--- a/apps/server/src/middleware/require-json-content-type.ts
+++ b/apps/server/src/middleware/require-json-content-type.ts
@@ -0,0 +1,50 @@
+/**
+ * Middleware to enforce Content-Type: application/json for request bodies
+ *
+ * This security middleware prevents malicious requests by requiring proper
+ * Content-Type headers for all POST, PUT, and PATCH requests.
+ *
+ * Rejecting requests without proper Content-Type helps prevent:
+ * - CSRF attacks via form submissions (which use application/x-www-form-urlencoded)
+ * - Content-type confusion attacks
+ * - Malformed request exploitation
+ */
+
+import type { Request, Response, NextFunction } from 'express';
+
+// HTTP methods that typically include request bodies
+const METHODS_REQUIRING_JSON = ['POST', 'PUT', 'PATCH'];
+
+/**
+ * Middleware that requires Content-Type: application/json for POST/PUT/PATCH requests
+ *
+ * Returns 415 Unsupported Media Type if:
+ * - The request method is POST, PUT, or PATCH
+ * - AND the Content-Type header is missing or not application/json
+ *
+ * Allows requests to pass through if:
+ * - The request method is GET, DELETE, OPTIONS, HEAD, etc.
+ * - OR the Content-Type is properly set to application/json (with optional charset)
+ */
+export function requireJsonContentType(req: Request, res: Response, next: NextFunction): void {
+  // Skip validation for methods that don't require a body
+  if (!METHODS_REQUIRING_JSON.includes(req.method)) {
+    next();
+    return;
+  }
+
+  const contentType = req.headers['content-type'];
+
+  // Check if Content-Type header exists and contains application/json
+  // Allows for charset parameter: "application/json; charset=utf-8"
+  if (!contentType || !contentType.toLowerCase().includes('application/json')) {
+    res.status(415).json({
+      success: false,
+      error: 'Unsupported Media Type',
+      message: 'Content-Type header must be application/json',
+    });
+    return;
+  }
+
+  next();
+}
--- a/apps/server/src/middleware/validate-paths.ts
+++ b/apps/server/src/middleware/validate-paths.ts
@@ -8,12 +8,28 @@ import type { Request, Response, NextFunction } from 'express';
 import { validatePath, PathNotAllowedError } from '@automaker/platform';

 /**
- * Creates a middleware that validates specified path parameters in req.body
+ * Helper to get parameter value from request (checks body first, then query)
+ */
+function getParamValue(req: Request, paramName: string): unknown {
+  // Check body first (for POST/PUT/PATCH requests)
+  if (req.body && req.body[paramName] !== undefined) {
+    return req.body[paramName];
+  }
+  // Fall back to query params (for GET requests)
+  if (req.query && req.query[paramName] !== undefined) {
+    return req.query[paramName];
+  }
+  return undefined;
+}
+
+/**
+ * Creates a middleware that validates specified path parameters in req.body or req.query
 * @param paramNames - Names of parameters to validate (e.g., 'projectPath', 'worktreePath')
 * @example
 * router.post('/create', validatePathParams('projectPath'), handler);
 * router.post('/delete', validatePathParams('projectPath', 'worktreePath'), handler);
 * router.post('/send', validatePathParams('workingDirectory?', 'imagePaths[]'), handler);
+ * router.get('/logs', validatePathParams('worktreePath'), handler); // Works with query params too
 *
 * Special syntax:
 * - 'paramName?' - Optional parameter (only validated if present)
@@ -26,8 +42,8 @@ export function validatePathParams(...paramNames: string[]) {
        // Handle optional parameters (paramName?)
        if (paramName.endsWith('?')) {
          const actualName = paramName.slice(0, -1);
-          const value = req.body[actualName];
-          if (value) {
+          const value = getParamValue(req, actualName);
+          if (value && typeof value === 'string') {
            validatePath(value);
          }
          continue;
@@ -36,18 +52,20 @@ export function validatePathParams(...paramNames: string[]) {
        // Handle array parameters (paramName[])
        if (paramName.endsWith('[]')) {
          const actualName = paramName.slice(0, -2);
-          const values = req.body[actualName];
+          const values = getParamValue(req, actualName);
          if (Array.isArray(values) && values.length > 0) {
            for (const value of values) {
-              validatePath(value);
+              if (typeof value === 'string') {
+                validatePath(value);
+              }
            }
          }
          continue;
        }

        // Handle regular parameters
-        const value = req.body[paramName];
-        if (value) {
+        const value = getParamValue(req, paramName);
+        if (value && typeof value === 'string') {
          validatePath(value);
        }
      }
--- a/apps/server/src/providers/claude-provider.ts
+++ b/apps/server/src/providers/claude-provider.ts
@@ -7,6 +7,24 @@

 import { query, type Options } from '@anthropic-ai/claude-agent-sdk';
 import { BaseProvider } from './base-provider.js';
+import { classifyError, getUserFriendlyErrorMessage, createLogger } from '@automaker/utils';
+
+const logger = createLogger('ClaudeProvider');
+import {
+  getThinkingTokenBudget,
+  validateBareModelId,
+  type ClaudeApiProfile,
+  type ClaudeCompatibleProvider,
+  type Credentials,
+} from '@automaker/types';
+
+/**
+ * ProviderConfig - Union type for provider configuration
+ *
+ * Accepts either the legacy ClaudeApiProfile or new ClaudeCompatibleProvider.
+ * Both share the same connection settings structure.
+ */
+type ProviderConfig = ClaudeApiProfile | ClaudeCompatibleProvider;
 import type {
  ExecuteOptions,
  ProviderMessage,
@@ -14,6 +32,160 @@ import type {
  ModelDefinition,
 } from './types.js';

+// Explicit allowlist of environment variables to pass to the SDK.
+// Only these vars are passed - nothing else from process.env leaks through.
+const ALLOWED_ENV_VARS = [
+  // Authentication
+  'ANTHROPIC_API_KEY',
+  'ANTHROPIC_AUTH_TOKEN',
+  // Endpoint configuration
+  'ANTHROPIC_BASE_URL',
+  'API_TIMEOUT_MS',
+  // Model mappings
+  'ANTHROPIC_DEFAULT_HAIKU_MODEL',
+  'ANTHROPIC_DEFAULT_SONNET_MODEL',
+  'ANTHROPIC_DEFAULT_OPUS_MODEL',
+  // Traffic control
+  'CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC',
+  // System vars (always from process.env)
+  'PATH',
+  'HOME',
+  'SHELL',
+  'TERM',
+  'USER',
+  'LANG',
+  'LC_ALL',
+];
+
+// System vars are always passed from process.env regardless of profile
+const SYSTEM_ENV_VARS = ['PATH', 'HOME', 'SHELL', 'TERM', 'USER', 'LANG', 'LC_ALL'];
+
+/**
+ * Check if the config is a ClaudeCompatibleProvider (new system)
+ * by checking for the 'models' array property
+ */
+function isClaudeCompatibleProvider(config: ProviderConfig): config is ClaudeCompatibleProvider {
+  return 'models' in config && Array.isArray(config.models);
+}
+
+/**
+ * Build environment for the SDK with only explicitly allowed variables.
+ * When a provider/profile is provided, uses its configuration (clean switch - don't inherit from process.env).
+ * When no provider is provided, uses direct Anthropic API settings from process.env.
+ *
+ * Supports both:
+ * - ClaudeCompatibleProvider (new system with models[] array)
+ * - ClaudeApiProfile (legacy system with modelMappings)
+ *
+ * @param providerConfig - Optional provider configuration for alternative endpoint
+ * @param credentials - Optional credentials object for resolving 'credentials' apiKeySource
+ */
+function buildEnv(
+  providerConfig?: ProviderConfig,
+  credentials?: Credentials
+): Record<string, string | undefined> {
+  const env: Record<string, string | undefined> = {};
+
+  if (providerConfig) {
+    // Use provider configuration (clean switch - don't inherit non-system vars from process.env)
+    logger.debug('[buildEnv] Using provider configuration:', {
+      name: providerConfig.name,
+      baseUrl: providerConfig.baseUrl,
+      apiKeySource: providerConfig.apiKeySource ?? 'inline',
+      isNewProvider: isClaudeCompatibleProvider(providerConfig),
+    });
+
+    // Resolve API key based on source strategy
+    let apiKey: string | undefined;
+    const source = providerConfig.apiKeySource ?? 'inline'; // Default to inline for backwards compat
+
+    switch (source) {
+      case 'inline':
+        apiKey = providerConfig.apiKey;
+        break;
+      case 'env':
+        apiKey = process.env.ANTHROPIC_API_KEY;
+        break;
+      case 'credentials':
+        apiKey = credentials?.apiKeys?.anthropic;
+        break;
+    }
+
+    // Warn if no API key found
+    if (!apiKey) {
+      logger.warn(`No API key found for provider "${providerConfig.name}" with source "${source}"`);
+    }
+
+    // Authentication
+    if (providerConfig.useAuthToken) {
+      env['ANTHROPIC_AUTH_TOKEN'] = apiKey;
+    } else {
+      env['ANTHROPIC_API_KEY'] = apiKey;
+    }
+
+    // Endpoint configuration
+    env['ANTHROPIC_BASE_URL'] = providerConfig.baseUrl;
+    logger.debug(`[buildEnv] Set ANTHROPIC_BASE_URL to: ${providerConfig.baseUrl}`);
+
+    if (providerConfig.timeoutMs) {
+      env['API_TIMEOUT_MS'] = String(providerConfig.timeoutMs);
+    }
+
+    // Model mappings - only for legacy ClaudeApiProfile
+    // For ClaudeCompatibleProvider, the model is passed directly (no mapping needed)
+    if (!isClaudeCompatibleProvider(providerConfig) && providerConfig.modelMappings) {
+      if (providerConfig.modelMappings.haiku) {
+        env['ANTHROPIC_DEFAULT_HAIKU_MODEL'] = providerConfig.modelMappings.haiku;
+      }
+      if (providerConfig.modelMappings.sonnet) {
+        env['ANTHROPIC_DEFAULT_SONNET_MODEL'] = providerConfig.modelMappings.sonnet;
+      }
+      if (providerConfig.modelMappings.opus) {
+        env['ANTHROPIC_DEFAULT_OPUS_MODEL'] = providerConfig.modelMappings.opus;
+      }
+    }
+
+    // Traffic control
+    if (providerConfig.disableNonessentialTraffic) {
+      env['CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC'] = '1';
+    }
+  } else {
+    // Use direct Anthropic API - pass through credentials or environment variables
+    // This supports:
+    // 1. API Key mode: ANTHROPIC_API_KEY from credentials (UI settings) or env
+    // 2. Claude Max plan: Uses CLI OAuth auth (SDK handles this automatically)
+    // 3. Custom endpoints via ANTHROPIC_BASE_URL env var (backward compatibility)
+    //
+    // Priority: credentials file (UI settings) -> environment variable
+    // Note: Only auth and endpoint vars are passed. Model mappings and traffic
+    // control are NOT passed (those require a profile for explicit configuration).
+    if (credentials?.apiKeys?.anthropic) {
+      env['ANTHROPIC_API_KEY'] = credentials.apiKeys.anthropic;
+    } else if (process.env.ANTHROPIC_API_KEY) {
+      env['ANTHROPIC_API_KEY'] = process.env.ANTHROPIC_API_KEY;
+    }
+    // If using Claude Max plan via CLI auth, the SDK handles auth automatically
+    // when no API key is provided. We don't set ANTHROPIC_AUTH_TOKEN here
+    // unless it was explicitly set in process.env (rare edge case).
+    if (process.env.ANTHROPIC_AUTH_TOKEN) {
+      env['ANTHROPIC_AUTH_TOKEN'] = process.env.ANTHROPIC_AUTH_TOKEN;
+    }
+    // Pass through ANTHROPIC_BASE_URL if set in environment (backward compatibility)
+    if (process.env.ANTHROPIC_BASE_URL) {
+      env['ANTHROPIC_BASE_URL'] = process.env.ANTHROPIC_BASE_URL;
+    }
+  }
+
+  // Always add system vars from process.env
+  for (const key of SYSTEM_ENV_VARS) {
+    if (process.env[key]) {
+      env[key] = process.env[key];
+    }
+  }
+
+  return env;
+}
+
 export class ClaudeProvider extends BaseProvider {
  getName(): string {
    return 'claude';
@@ -23,6 +195,10 @@ export class ClaudeProvider extends BaseProvider {
   * Execute a query using Claude Agent SDK
   */
  async *executeQuery(options: ExecuteOptions): AsyncGenerator<ProviderMessage> {
+    // Validate that model doesn't have a provider prefix
+    // AgentService should strip prefixes before passing to providers
+    validateBareModelId(options.model, 'ClaudeProvider');
+
    const {
      prompt,
      model,
@@ -33,28 +209,49 @@ export class ClaudeProvider extends BaseProvider {
      abortController,
      conversationHistory,
      sdkSessionId,
+      thinkingLevel,
+      claudeApiProfile,
+      claudeCompatibleProvider,
+      credentials,
    } = options;

-    // Build Claude SDK options
-    const defaultTools = ['Read', 'Write', 'Edit', 'Glob', 'Grep', 'Bash', 'WebSearch', 'WebFetch'];
-    const toolsToUse = allowedTools || defaultTools;
+    // Determine which provider config to use
+    // claudeCompatibleProvider takes precedence over claudeApiProfile
+    const providerConfig = claudeCompatibleProvider || claudeApiProfile;

+    // Convert thinking level to token budget
+    const maxThinkingTokens = getThinkingTokenBudget(thinkingLevel);
+
+    // Build Claude SDK options
    const sdkOptions: Options = {
      model,
      systemPrompt,
      maxTurns,
      cwd,
-      allowedTools: toolsToUse,
-      permissionMode: 'acceptEdits',
-      sandbox: {
-        enabled: true,
-        autoAllowBashIfSandboxed: true,
-      },
+      // Pass only explicitly allowed environment variables to SDK
+      // When a provider is active, uses provider settings (clean switch)
+      // When no provider, uses direct Anthropic API (from process.env or CLI OAuth)
+      env: buildEnv(providerConfig, credentials),
+      // Pass through allowedTools if provided by caller (decided by sdk-options.ts)
+      ...(allowedTools && { allowedTools }),
+      // AUTONOMOUS MODE: Always bypass permissions for fully autonomous operation
+      permissionMode: 'bypassPermissions',
+      allowDangerouslySkipPermissions: true,
      abortController,
      // Resume existing SDK session if we have a session ID
      ...(sdkSessionId && conversationHistory && conversationHistory.length > 0
        ? { resume: sdkSessionId }
        : {}),
+      // Forward settingSources for CLAUDE.md file loading
+      ...(options.settingSources && { settingSources: options.settingSources }),
+      // Forward MCP servers configuration
+      ...(options.mcpServers && { mcpServers: options.mcpServers }),
+      // Extended thinking configuration
+      ...(maxThinkingTokens && { maxThinkingTokens }),
+      // Subagents configuration for specialized task delegation
+      ...(options.agents && { agents: options.agents }),
+      // Pass through outputFormat for structured JSON outputs
+      ...(options.outputFormat && { outputFormat: options.outputFormat }),
    };

    // Build prompt payload
@@ -79,6 +276,18 @@ export class ClaudeProvider extends BaseProvider {
      promptPayload = prompt;
    }

+    // Log the environment being passed to the SDK for debugging
+    const envForSdk = sdkOptions.env as Record<string, string | undefined>;
+    logger.debug('[ClaudeProvider] SDK Configuration:', {
+      model: sdkOptions.model,
+      baseUrl: envForSdk?.['ANTHROPIC_BASE_URL'] || '(default Anthropic API)',
+      hasApiKey: !!envForSdk?.['ANTHROPIC_API_KEY'],
+      hasAuthToken: !!envForSdk?.['ANTHROPIC_AUTH_TOKEN'],
+      providerName: providerConfig?.name || '(direct Anthropic)',
+      maxTurns: sdkOptions.maxTurns,
+      maxThinkingTokens: sdkOptions.maxThinkingTokens,
+    });
+
    // Execute via Claude Agent SDK
    try {
      const stream = query({ prompt: promptPayload, options: sdkOptions });
@@ -88,8 +297,32 @@ export class ClaudeProvider extends BaseProvider {
        yield msg as ProviderMessage;
      }
    } catch (error) {
-      console.error('[ClaudeProvider] executeQuery() error during execution:', error);
-      throw error;
+      // Enhance error with user-friendly message and classification
+      const errorInfo = classifyError(error);
+      const userMessage = getUserFriendlyErrorMessage(error);
+
+      logger.error('executeQuery() error during execution:', {
+        type: errorInfo.type,
+        message: errorInfo.message,
+        isRateLimit: errorInfo.isRateLimit,
+        retryAfter: errorInfo.retryAfter,
+        stack: (error as Error).stack,
+      });
+
+      // Build enhanced error message with additional guidance for rate limits
+      const message = errorInfo.isRateLimit
+        ? `${userMessage}\n\nTip: If you're running multiple features in auto-mode, consider reducing concurrency (maxConcurrency setting) to avoid hitting rate limits.`
+        : userMessage;
+
+      const enhancedError = new Error(message);
+      (enhancedError as any).originalError = error;
+      (enhancedError as any).type = errorInfo.type;
+
+      if (errorInfo.isRateLimit) {
+        (enhancedError as any).retryAfter = errorInfo.retryAfter;
+      }
+
+      throw enhancedError;
    }
  }

--- a/apps/server/src/providers/cli-provider.ts
+++ b/apps/server/src/providers/cli-provider.ts
@@ -0,0 +1,625 @@
+/**
+ * CliProvider - Abstract base class for CLI-based AI providers
+ *
+ * Provides common infrastructure for CLI tools that spawn subprocesses
+ * and stream JSONL output. Handles:
+ * - Platform-specific CLI detection (PATH, common locations)
+ * - Windows execution strategies (WSL, npx, direct, cmd)
+ * - JSONL subprocess spawning and streaming
+ * - Error mapping infrastructure
+ *
+ * @example
+ * ```typescript
+ * class CursorProvider extends CliProvider {
+ *   getCliName(): string { return 'cursor-agent'; }
+ *   getSpawnConfig(): CliSpawnConfig {
+ *     return {
+ *       windowsStrategy: 'wsl',
+ *       commonPaths: {
+ *         linux: ['~/.local/bin/cursor-agent'],
+ *         darwin: ['~/.local/bin/cursor-agent'],
+ *       }
+ *     };
+ *   }
+ *   // ... implement abstract methods
+ * }
+ * ```
+ */
+
+import {
+  createWslCommand,
+  findCliInWsl,
+  isWslAvailable,
+  spawnJSONLProcess,
+  windowsToWslPath,
+  type SubprocessOptions,
+  type WslCliResult,
+} from '@automaker/platform';
+import { calculateReasoningTimeout } from '@automaker/types';
+import { createLogger, isAbortError } from '@automaker/utils';
+import { execSync } from 'child_process';
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { BaseProvider } from './base-provider.js';
+import type { ExecuteOptions, ProviderConfig, ProviderMessage } from './types.js';
+
+/**
+ * Spawn strategy for CLI tools on Windows
+ *
+ * Different CLI tools require different execution strategies:
+ * - 'wsl': Requires WSL, CLI only available on Linux/macOS (e.g., cursor-agent)
+ * - 'npx': Installed globally via npm/npx, use `npx <package>` to run
+ * - 'direct': Native Windows binary, can spawn directly
+ * - 'cmd': Windows batch file (.cmd/.bat), needs cmd.exe shell
+ */
+export type SpawnStrategy = 'wsl' | 'npx' | 'direct' | 'cmd';
+
+/**
+ * Configuration for CLI tool spawning
+ */
+export interface CliSpawnConfig {
+  /** How to spawn on Windows */
+  windowsStrategy: SpawnStrategy;
+
+  /** NPX package name (required if windowsStrategy is 'npx') */
+  npxPackage?: string;
+
+  /** Preferred WSL distribution (if windowsStrategy is 'wsl') */
+  wslDistribution?: string;
+
+  /**
+   * Common installation paths per platform
+   * Use ~ for home directory (will be expanded)
+   * Keys: 'linux', 'darwin', 'win32'
+   */
+  commonPaths: Record<string, string[]>;
+
+  /** Version check command (defaults to --version) */
+  versionCommand?: string;
+}
+
+/**
+ * CLI error information for consistent error handling
+ */
+export interface CliErrorInfo {
+  code: string;
+  message: string;
+  recoverable: boolean;
+  suggestion?: string;
+}
+
+/**
+ * Detection result from CLI path finding
+ */
+export interface CliDetectionResult {
+  /** Path to the CLI (or 'npx' for npx strategy) */
+  cliPath: string | null;
+  /** Whether using WSL mode */
+  useWsl: boolean;
+  /** WSL path if using WSL */
+  wslCliPath?: string;
+  /** WSL distribution if using WSL */
+  wslDistribution?: string;
+  /** Detected strategy used */
+  strategy: SpawnStrategy | 'native';
+}
+
+// Create logger for CLI operations
+const cliLogger = createLogger('CliProvider');
+
+/**
+ * Base timeout for CLI operations in milliseconds.
+ * CLI tools have longer startup and processing times compared to direct API calls,
+ * so we use a higher base timeout (120s) than the default provider timeout (30s).
+ * This is multiplied by reasoning effort multipliers when applicable.
+ * @see calculateReasoningTimeout from @automaker/types
+ */
+const CLI_BASE_TIMEOUT_MS = 120000;
+
+/**
+ * Abstract base class for CLI-based providers
+ *
+ * Subclasses must implement:
+ * - getCliName(): CLI executable name
+ * - getSpawnConfig(): Platform-specific spawn configuration
+ * - buildCliArgs(): Convert ExecuteOptions to CLI arguments
+ * - normalizeEvent(): Convert CLI output to ProviderMessage
+ */
+export abstract class CliProvider extends BaseProvider {
+  // CLI detection results (cached after first detection)
+  protected cliPath: string | null = null;
+  protected useWsl: boolean = false;
+  protected wslCliPath: string | null = null;
+  protected wslDistribution: string | undefined = undefined;
+  protected detectedStrategy: SpawnStrategy | 'native' = 'native';
+
+  // NPX args (used when strategy is 'npx')
+  protected npxArgs: string[] = [];
+
+  constructor(config: ProviderConfig = {}) {
+    super(config);
+    // Detection happens lazily on first use
+  }
+
+  // ==========================================================================
+  // Abstract methods - must be implemented by subclasses
+  // ==========================================================================
+
+  /**
+   * Get the CLI executable name (e.g., 'cursor-agent', 'aider')
+   */
+  abstract getCliName(): string;
+
+  /**
+   * Get spawn configuration for this CLI
+   */
+  abstract getSpawnConfig(): CliSpawnConfig;
+
+  /**
+   * Build CLI arguments from execution options
+   * @param options Execution options
+   * @returns Array of CLI arguments
+   */
+  abstract buildCliArgs(options: ExecuteOptions): string[];
+
+  /**
+   * Normalize a raw CLI event to ProviderMessage format
+   * @param event Raw event from CLI JSONL output
+   * @returns Normalized ProviderMessage or null to skip
+   */
+  abstract normalizeEvent(event: unknown): ProviderMessage | null;
+
+  // ==========================================================================
+  // Optional overrides
+  // ==========================================================================
+
+  /**
+   * Map CLI stderr/exit code to error info
+   * Override to provide CLI-specific error mapping
+   */
+  protected mapError(stderr: string, exitCode: number | null): CliErrorInfo {
+    const lower = stderr.toLowerCase();
+
+    // Common authentication errors
+    if (
+      lower.includes('not authenticated') ||
+      lower.includes('please log in') ||
+      lower.includes('unauthorized')
+    ) {
+      return {
+        code: 'NOT_AUTHENTICATED',
+        message: `${this.getCliName()} is not authenticated`,
+        recoverable: true,
+        suggestion: `Run "${this.getCliName()} login" to authenticate`,
+      };
+    }
+
+    // Rate limiting
+    if (
+      lower.includes('rate limit') ||
+      lower.includes('too many requests') ||
+      lower.includes('429')
+    ) {
+      return {
+        code: 'RATE_LIMITED',
+        message: 'API rate limit exceeded',
+        recoverable: true,
+        suggestion: 'Wait a few minutes and try again',
+      };
+    }
+
+    // Network errors
+    if (
+      lower.includes('network') ||
+      lower.includes('connection') ||
+      lower.includes('econnrefused') ||
+      lower.includes('timeout')
+    ) {
+      return {
+        code: 'NETWORK_ERROR',
+        message: 'Network connection error',
+        recoverable: true,
+        suggestion: 'Check your internet connection and try again',
+      };
+    }
+
+    // Process killed
+    if (exitCode === 137 || lower.includes('killed') || lower.includes('sigterm')) {
+      return {
+        code: 'PROCESS_CRASHED',
+        message: 'Process was terminated',
+        recoverable: true,
+        suggestion: 'The process may have run out of memory. Try a simpler task.',
+      };
+    }
+
+    // Generic error
+    return {
+      code: 'UNKNOWN_ERROR',
+      message: stderr || `Process exited with code ${exitCode}`,
+      recoverable: false,
+    };
+  }
+
+  /**
+   * Get installation instructions for this CLI
+   * Override to provide CLI-specific instructions
+   */
+  protected getInstallInstructions(): string {
+    const cliName = this.getCliName();
+    const config = this.getSpawnConfig();
+
+    if (process.platform === 'win32') {
+      switch (config.windowsStrategy) {
+        case 'wsl':
+          return `${cliName} requires WSL on Windows. Install WSL, then run inside WSL to install.`;
+        case 'npx':
+          return `Install with: npm install -g ${config.npxPackage || cliName}`;
+        case 'cmd':
+        case 'direct':
+          return `${cliName} is not installed. Check the documentation for installation instructions.`;
+      }
+    }
+
+    return `${cliName} is not installed. Check the documentation for installation instructions.`;
+  }
+
+  // ==========================================================================
+  // CLI Detection
+  // ==========================================================================
+
+  /**
+   * Expand ~ to home directory in path
+   */
+  private expandPath(p: string): string {
+    if (p.startsWith('~')) {
+      return path.join(os.homedir(), p.slice(1));
+    }
+    return p;
+  }
+
+  /**
+   * Find CLI in PATH using 'which' (Unix) or 'where' (Windows)
+   */
+  private findCliInPath(): string | null {
+    const cliName = this.getCliName();
+
+    try {
+      const command = process.platform === 'win32' ? 'where' : 'which';
+      const result = execSync(`${command} ${cliName}`, {
+        encoding: 'utf8',
+        timeout: 5000,
+        stdio: ['pipe', 'pipe', 'pipe'],
+        windowsHide: true,
+      })
+        .trim()
+        .split('\n')[0];
+
+      if (result && fs.existsSync(result)) {
+        cliLogger.debug(`Found ${cliName} in PATH: ${result}`);
+        return result;
+      }
+    } catch {
+      // Not in PATH
+    }
+
+    return null;
+  }
+
+  /**
+   * Find CLI in common installation paths for current platform
+   */
+  private findCliInCommonPaths(): string | null {
+    const config = this.getSpawnConfig();
+    const cliName = this.getCliName();
+    const platform = process.platform as 'linux' | 'darwin' | 'win32';
+    const paths = config.commonPaths[platform] || [];
+
+    for (const p of paths) {
+      const expandedPath = this.expandPath(p);
+      if (fs.existsSync(expandedPath)) {
+        cliLogger.debug(`Found ${cliName} at: ${expandedPath}`);
+        return expandedPath;
+      }
+    }
+
+    return null;
+  }
+
+  /**
+   * Detect CLI installation using appropriate strategy
+   */
+  protected detectCli(): CliDetectionResult {
+    const config = this.getSpawnConfig();
+    const cliName = this.getCliName();
+    const wslLogger = (msg: string) => cliLogger.debug(msg);
+
+    // Windows - use configured strategy
+    if (process.platform === 'win32') {
+      switch (config.windowsStrategy) {
+        case 'wsl': {
+          // Check WSL for CLI
+          if (isWslAvailable({ logger: wslLogger })) {
+            const wslResult: WslCliResult | null = findCliInWsl(cliName, {
+              logger: wslLogger,
+              distribution: config.wslDistribution,
+            });
+            if (wslResult) {
+              cliLogger.debug(
+                `Using ${cliName} via WSL (${wslResult.distribution || 'default'}): ${wslResult.wslPath}`
+              );
+              return {
+                cliPath: 'wsl.exe',
+                useWsl: true,
+                wslCliPath: wslResult.wslPath,
+                wslDistribution: wslResult.distribution,
+                strategy: 'wsl',
+              };
+            }
+          }
+          cliLogger.debug(`${cliName} not found (WSL not available or CLI not installed in WSL)`);
+          return { cliPath: null, useWsl: false, strategy: 'wsl' };
+        }
+
+        case 'npx': {
+          // For npx, we don't need to find the CLI, just return npx
+          cliLogger.debug(`Using ${cliName} via npx (package: ${config.npxPackage})`);
+          return {
+            cliPath: 'npx',
+            useWsl: false,
+            strategy: 'npx',
+          };
+        }
+
+        case 'direct':
+        case 'cmd': {
+          // Native Windows - check PATH and common paths
+          const pathResult = this.findCliInPath();
+          if (pathResult) {
+            return { cliPath: pathResult, useWsl: false, strategy: config.windowsStrategy };
+          }
+
+          const commonResult = this.findCliInCommonPaths();
+          if (commonResult) {
+            return { cliPath: commonResult, useWsl: false, strategy: config.windowsStrategy };
+          }
+
+          cliLogger.debug(`${cliName} not found on Windows`);
+          return { cliPath: null, useWsl: false, strategy: config.windowsStrategy };
+        }
+      }
+    }
+
+    // Linux/macOS - native execution
+    const pathResult = this.findCliInPath();
+    if (pathResult) {
+      return { cliPath: pathResult, useWsl: false, strategy: 'native' };
+    }
+
+    const commonResult = this.findCliInCommonPaths();
+    if (commonResult) {
+      return { cliPath: commonResult, useWsl: false, strategy: 'native' };
+    }
+
+    cliLogger.debug(`${cliName} not found`);
+    return { cliPath: null, useWsl: false, strategy: 'native' };
+  }
+
+  /**
+   * Ensure CLI is detected (lazy initialization)
+   */
+  protected ensureCliDetected(): void {
+    if (this.cliPath !== null || this.detectedStrategy !== 'native') {
+      return; // Already detected
+    }
+
+    const result = this.detectCli();
+    this.cliPath = result.cliPath;
+    this.useWsl = result.useWsl;
+    this.wslCliPath = result.wslCliPath || null;
+    this.wslDistribution = result.wslDistribution;
+    this.detectedStrategy = result.strategy;
+
+    // Set up npx args if using npx strategy
+    const config = this.getSpawnConfig();
+    if (result.strategy === 'npx' && config.npxPackage) {
+      this.npxArgs = [config.npxPackage];
+    }
+  }
+
+  /**
+   * Check if CLI is installed
+   */
+  async isInstalled(): Promise<boolean> {
+    this.ensureCliDetected();
+    return this.cliPath !== null;
+  }
+
+  // ==========================================================================
+  // Subprocess Spawning
+  // ==========================================================================
+
+  /**
+   * Build subprocess options based on detected strategy
+   */
+  protected buildSubprocessOptions(options: ExecuteOptions, cliArgs: string[]): SubprocessOptions {
+    this.ensureCliDetected();
+
+    if (!this.cliPath) {
+      throw new Error(`${this.getCliName()} CLI not found. ${this.getInstallInstructions()}`);
+    }
+
+    const cwd = options.cwd || process.cwd();
+
+    // Filter undefined values from process.env
+    const filteredEnv: Record<string, string> = {};
+    for (const [key, value] of Object.entries(process.env)) {
+      if (value !== undefined) {
+        filteredEnv[key] = value;
+      }
+    }
+
+    // Calculate dynamic timeout based on reasoning effort.
+    // This addresses GitHub issue #530 where reasoning models with 'xhigh' effort would timeout.
+    const timeout = calculateReasoningTimeout(options.reasoningEffort, CLI_BASE_TIMEOUT_MS);
+
+    // WSL strategy
+    if (this.useWsl && this.wslCliPath) {
+      const wslCwd = windowsToWslPath(cwd);
+      const wslCmd = createWslCommand(this.wslCliPath, cliArgs, {
+        distribution: this.wslDistribution,
+      });
+
+      // Add --cd flag to change directory inside WSL
+      let args: string[];
+      if (this.wslDistribution) {
+        args = ['-d', this.wslDistribution, '--cd', wslCwd, this.wslCliPath, ...cliArgs];
+      } else {
+        args = ['--cd', wslCwd, this.wslCliPath, ...cliArgs];
+      }
+
+      cliLogger.debug(`WSL spawn: ${wslCmd.command} ${args.slice(0, 6).join(' ')}...`);
+
+      return {
+        command: wslCmd.command,
+        args,
+        cwd, // Windows cwd for spawn
+        env: filteredEnv,
+        abortController: options.abortController,
+        timeout,
+      };
+    }
+
+    // NPX strategy
+    if (this.detectedStrategy === 'npx') {
+      const allArgs = [...this.npxArgs, ...cliArgs];
+      cliLogger.debug(`NPX spawn: npx ${allArgs.slice(0, 6).join(' ')}...`);
+
+      return {
+        command: 'npx',
+        args: allArgs,
+        cwd,
+        env: filteredEnv,
+        abortController: options.abortController,
+        timeout,
+      };
+    }
+
+    // Direct strategy (native Unix or Windows direct/cmd)
+    cliLogger.debug(`Direct spawn: ${this.cliPath} ${cliArgs.slice(0, 6).join(' ')}...`);
+
+    return {
+      command: this.cliPath,
+      args: cliArgs,
+      cwd,
+      env: filteredEnv,
+      abortController: options.abortController,
+      timeout,
+    };
+  }
+
+  /**
+   * Execute a query using the CLI with JSONL streaming
+   *
+   * This is a default implementation that:
+   * 1. Builds CLI args from options
+   * 2. Spawns the subprocess with appropriate strategy
+   * 3. Streams and normalizes events
+   *
+   * Subclasses can override for custom behavior.
+   */
+  async *executeQuery(options: ExecuteOptions): AsyncGenerator<ProviderMessage> {
+    this.ensureCliDetected();
+
+    if (!this.cliPath) {
+      throw new Error(`${this.getCliName()} CLI not found. ${this.getInstallInstructions()}`);
+    }
+
+    // Many CLI-based providers do not support a separate "system" message.
+    // If a systemPrompt is provided, embed it into the prompt so downstream models
+    // still receive critical formatting/schema instructions (e.g., JSON-only outputs).
+    const effectiveOptions = this.embedSystemPromptIntoPrompt(options);
+
+    const cliArgs = this.buildCliArgs(effectiveOptions);
+    const subprocessOptions = this.buildSubprocessOptions(effectiveOptions, cliArgs);
+
+    try {
+      for await (const rawEvent of spawnJSONLProcess(subprocessOptions)) {
+        const normalized = this.normalizeEvent(rawEvent);
+        if (normalized) {
+          yield normalized;
+        }
+      }
+    } catch (error) {
+      if (isAbortError(error)) {
+        cliLogger.debug('Query aborted');
+        return;
+      }
+
+      // Map CLI errors
+      if (error instanceof Error && 'stderr' in error) {
+        const errorInfo = this.mapError(
+          (error as { stderr?: string }).stderr || error.message,
+          (error as { exitCode?: number | null }).exitCode ?? null
+        );
+
+        const cliError = new Error(errorInfo.message) as Error & CliErrorInfo;
+        cliError.code = errorInfo.code;
+        cliError.recoverable = errorInfo.recoverable;
+        cliError.suggestion = errorInfo.suggestion;
+        throw cliError;
+      }
+
+      throw error;
+    }
+  }
+
+  /**
+   * Embed system prompt text into the user prompt for CLI providers.
+   *
+   * Most CLI providers we integrate with only accept a single prompt via stdin/args.
+   * When upstream code supplies `options.systemPrompt`, we prepend it to the prompt
+   * content and clear `systemPrompt` to avoid any accidental double-injection by
+   * subclasses.
+   */
+  protected embedSystemPromptIntoPrompt(options: ExecuteOptions): ExecuteOptions {
+    if (!options.systemPrompt) {
+      return options;
+    }
+
+    // Only string system prompts can be reliably embedded for CLI providers.
+    // Presets are provider-specific (e.g., Claude SDK) and cannot be represented
+    // universally. If a preset is provided, we only embed its optional `append`.
+    const systemText =
+      typeof options.systemPrompt === 'string'
+        ? options.systemPrompt
+        : options.systemPrompt.append
+          ? options.systemPrompt.append
+          : '';
+
+    if (!systemText) {
+      return { ...options, systemPrompt: undefined };
+    }
+
+    // Preserve original prompt structure.
+    if (typeof options.prompt === 'string') {
+      return {
+        ...options,
+        prompt: `${systemText}\n\n---\n\n${options.prompt}`,
+        systemPrompt: undefined,
+      };
+    }
+
+    if (Array.isArray(options.prompt)) {
+      return {
+        ...options,
+        prompt: [{ type: 'text', text: systemText }, ...options.prompt],
+        systemPrompt: undefined,
+      };
+    }
+
+    // Should be unreachable due to ExecuteOptions typing, but keep safe.
+    return { ...options, systemPrompt: undefined };
+  }
+}
--- a/apps/server/src/providers/codex-config-manager.ts
+++ b/apps/server/src/providers/codex-config-manager.ts
@@ -0,0 +1,85 @@
+/**
+ * Codex Config Manager - Writes MCP server configuration for Codex CLI
+ */
+
+import path from 'path';
+import type { McpServerConfig } from '@automaker/types';
+import * as secureFs from '../lib/secure-fs.js';
+
+const CODEX_CONFIG_DIR = '.codex';
+const CODEX_CONFIG_FILENAME = 'config.toml';
+const CODEX_MCP_SECTION = 'mcp_servers';
+
+function formatTomlString(value: string): string {
+  return JSON.stringify(value);
+}
+
+function formatTomlArray(values: string[]): string {
+  const formatted = values.map((value) => formatTomlString(value)).join(', ');
+  return `[${formatted}]`;
+}
+
+function formatTomlInlineTable(values: Record<string, string>): string {
+  const entries = Object.entries(values).map(
+    ([key, value]) => `${key} = ${formatTomlString(value)}`
+  );
+  return `{ ${entries.join(', ')} }`;
+}
+
+function formatTomlKey(key: string): string {
+  return `"${key.replace(/"/g, '\\"')}"`;
+}
+
+function buildServerBlock(name: string, server: McpServerConfig): string[] {
+  const lines: string[] = [];
+  const section = `${CODEX_MCP_SECTION}.${formatTomlKey(name)}`;
+  lines.push(`[${section}]`);
+
+  if (server.type) {
+    lines.push(`type = ${formatTomlString(server.type)}`);
+  }
+
+  if ('command' in server && server.command) {
+    lines.push(`command = ${formatTomlString(server.command)}`);
+  }
+
+  if ('args' in server && server.args && server.args.length > 0) {
+    lines.push(`args = ${formatTomlArray(server.args)}`);
+  }
+
+  if ('env' in server && server.env && Object.keys(server.env).length > 0) {
+    lines.push(`env = ${formatTomlInlineTable(server.env)}`);
+  }
+
+  if ('url' in server && server.url) {
+    lines.push(`url = ${formatTomlString(server.url)}`);
+  }
+
+  if ('headers' in server && server.headers && Object.keys(server.headers).length > 0) {
+    lines.push(`headers = ${formatTomlInlineTable(server.headers)}`);
+  }
+
+  return lines;
+}
+
+export class CodexConfigManager {
+  async configureMcpServers(
+    cwd: string,
+    mcpServers: Record<string, McpServerConfig>
+  ): Promise<void> {
+    const configDir = path.join(cwd, CODEX_CONFIG_DIR);
+    const configPath = path.join(configDir, CODEX_CONFIG_FILENAME);
+
+    await secureFs.mkdir(configDir, { recursive: true });
+
+    const blocks: string[] = [];
+    for (const [name, server] of Object.entries(mcpServers)) {
+      blocks.push(...buildServerBlock(name, server), '');
+    }
+
+    const content = blocks.join('\n').trim();
+    if (content) {
+      await secureFs.writeFile(configPath, content + '\n', 'utf-8');
+    }
+  }
+}
--- a/apps/server/src/providers/codex-models.ts
+++ b/apps/server/src/providers/codex-models.ts
@@ -0,0 +1,111 @@
+/**
+ * Codex Model Definitions
+ *
+ * Official Codex CLI models as documented at https://developers.openai.com/codex/models/
+ */
+
+import { CODEX_MODEL_MAP } from '@automaker/types';
+import type { ModelDefinition } from './types.js';
+
+const CONTEXT_WINDOW_256K = 256000;
+const CONTEXT_WINDOW_128K = 128000;
+const MAX_OUTPUT_32K = 32000;
+const MAX_OUTPUT_16K = 16000;
+
+/**
+ * All available Codex models with their specifications
+ * Based on https://developers.openai.com/codex/models/
+ */
+export const CODEX_MODELS: ModelDefinition[] = [
+  // ========== Recommended Codex Models ==========
+  {
+    id: CODEX_MODEL_MAP.gpt52Codex,
+    name: 'GPT-5.2-Codex',
+    modelString: CODEX_MODEL_MAP.gpt52Codex,
+    provider: 'openai',
+    description:
+      'Most advanced agentic coding model for complex software engineering (default for ChatGPT users).',
+    contextWindow: CONTEXT_WINDOW_256K,
+    maxOutputTokens: MAX_OUTPUT_32K,
+    supportsVision: true,
+    supportsTools: true,
+    tier: 'premium' as const,
+    default: true,
+    hasReasoning: true,
+  },
+  {
+    id: CODEX_MODEL_MAP.gpt51CodexMax,
+    name: 'GPT-5.1-Codex-Max',
+    modelString: CODEX_MODEL_MAP.gpt51CodexMax,
+    provider: 'openai',
+    description: 'Optimized for long-horizon, agentic coding tasks in Codex.',
+    contextWindow: CONTEXT_WINDOW_256K,
+    maxOutputTokens: MAX_OUTPUT_32K,
+    supportsVision: true,
+    supportsTools: true,
+    tier: 'premium' as const,
+    hasReasoning: true,
+  },
+  {
+    id: CODEX_MODEL_MAP.gpt51CodexMini,
+    name: 'GPT-5.1-Codex-Mini',
+    modelString: CODEX_MODEL_MAP.gpt51CodexMini,
+    provider: 'openai',
+    description: 'Smaller, more cost-effective version for faster workflows.',
+    contextWindow: CONTEXT_WINDOW_128K,
+    maxOutputTokens: MAX_OUTPUT_16K,
+    supportsVision: true,
+    supportsTools: true,
+    tier: 'basic' as const,
+    hasReasoning: false,
+  },
+
+  // ========== General-Purpose GPT Models ==========
+  {
+    id: CODEX_MODEL_MAP.gpt52,
+    name: 'GPT-5.2',
+    modelString: CODEX_MODEL_MAP.gpt52,
+    provider: 'openai',
+    description: 'Best general agentic model for tasks across industries and domains.',
+    contextWindow: CONTEXT_WINDOW_256K,
+    maxOutputTokens: MAX_OUTPUT_32K,
+    supportsVision: true,
+    supportsTools: true,
+    tier: 'standard' as const,
+    hasReasoning: true,
+  },
+  {
+    id: CODEX_MODEL_MAP.gpt51,
+    name: 'GPT-5.1',
+    modelString: CODEX_MODEL_MAP.gpt51,
+    provider: 'openai',
+    description: 'Great for coding and agentic tasks across domains.',
+    contextWindow: CONTEXT_WINDOW_256K,
+    maxOutputTokens: MAX_OUTPUT_32K,
+    supportsVision: true,
+    supportsTools: true,
+    tier: 'standard' as const,
+    hasReasoning: true,
+  },
+];
+
+/**
+ * Get model definition by ID
+ */
+export function getCodexModelById(modelId: string): ModelDefinition | undefined {
+  return CODEX_MODELS.find((m) => m.id === modelId || m.modelString === modelId);
+}
+
+/**
+ * Get all models that support reasoning
+ */
+export function getReasoningModels(): ModelDefinition[] {
+  return CODEX_MODELS.filter((m) => m.hasReasoning);
+}
+
+/**
+ * Get models by tier
+ */
+export function getModelsByTier(tier: 'premium' | 'standard' | 'basic'): ModelDefinition[] {
+  return CODEX_MODELS.filter((m) => m.tier === tier);
+}
--- a/apps/server/src/providers/codex-provider.ts
+++ b/apps/server/src/providers/codex-provider.ts
--- a/apps/server/src/providers/codex-sdk-client.ts
+++ b/apps/server/src/providers/codex-sdk-client.ts
@@ -0,0 +1,173 @@
+/**
+ * Codex SDK client - Executes Codex queries via official @openai/codex-sdk
+ *
+ * Used for programmatic control of Codex from within the application.
+ * Provides cleaner integration than spawning CLI processes.
+ */
+
+import { Codex } from '@openai/codex-sdk';
+import { formatHistoryAsText, classifyError, getUserFriendlyErrorMessage } from '@automaker/utils';
+import { supportsReasoningEffort } from '@automaker/types';
+import type { ExecuteOptions, ProviderMessage } from './types.js';
+
+const OPENAI_API_KEY_ENV = 'OPENAI_API_KEY';
+const SDK_HISTORY_HEADER = 'Current request:\n';
+const DEFAULT_RESPONSE_TEXT = '';
+const SDK_ERROR_DETAILS_LABEL = 'Details:';
+
+type PromptBlock = {
+  type: string;
+  text?: string;
+  source?: {
+    type?: string;
+    media_type?: string;
+    data?: string;
+  };
+};
+
+function resolveApiKey(): string {
+  const apiKey = process.env[OPENAI_API_KEY_ENV];
+  if (!apiKey) {
+    throw new Error('OPENAI_API_KEY is not set.');
+  }
+  return apiKey;
+}
+
+function normalizePromptBlocks(prompt: ExecuteOptions['prompt']): PromptBlock[] {
+  if (Array.isArray(prompt)) {
+    return prompt as PromptBlock[];
+  }
+  return [{ type: 'text', text: prompt }];
+}
+
+function buildPromptText(options: ExecuteOptions, systemPrompt: string | null): string {
+  const historyText =
+    options.conversationHistory && options.conversationHistory.length > 0
+      ? formatHistoryAsText(options.conversationHistory)
+      : '';
+
+  const promptBlocks = normalizePromptBlocks(options.prompt);
+  const promptTexts: string[] = [];
+
+  for (const block of promptBlocks) {
+    if (block.type === 'text' && typeof block.text === 'string' && block.text.trim()) {
+      promptTexts.push(block.text);
+    }
+  }
+
+  const promptContent = promptTexts.join('\n\n');
+  if (!promptContent.trim()) {
+    throw new Error('Codex SDK prompt is empty.');
+  }
+
+  const parts: string[] = [];
+  if (systemPrompt) {
+    parts.push(`System: ${systemPrompt}`);
+  }
+  if (historyText) {
+    parts.push(historyText);
+  }
+  parts.push(`${SDK_HISTORY_HEADER}${promptContent}`);
+
+  return parts.join('\n\n');
+}
+
+function buildSdkErrorMessage(rawMessage: string, userMessage: string): string {
+  if (!rawMessage) {
+    return userMessage;
+  }
+  if (!userMessage || rawMessage === userMessage) {
+    return rawMessage;
+  }
+  return `${userMessage}\n\n${SDK_ERROR_DETAILS_LABEL} ${rawMessage}`;
+}
+
+/**
+ * Execute a query using the official Codex SDK
+ *
+ * The SDK provides a cleaner interface than spawning CLI processes:
+ * - Handles authentication automatically
+ * - Provides TypeScript types
+ * - Supports thread management and resumption
+ * - Better error handling
+ */
+export async function* executeCodexSdkQuery(
+  options: ExecuteOptions,
+  systemPrompt: string | null
+): AsyncGenerator<ProviderMessage> {
+  try {
+    const apiKey = resolveApiKey();
+    const codex = new Codex({ apiKey });
+
+    // Resume existing thread or start new one
+    let thread;
+    if (options.sdkSessionId) {
+      try {
+        thread = codex.resumeThread(options.sdkSessionId);
+      } catch {
+        // If resume fails, start a new thread
+        thread = codex.startThread();
+      }
+    } else {
+      thread = codex.startThread();
+    }
+
+    const promptText = buildPromptText(options, systemPrompt);
+
+    // Build run options with reasoning effort if supported
+    const runOptions: {
+      signal?: AbortSignal;
+      reasoning?: { effort: string };
+    } = {
+      signal: options.abortController?.signal,
+    };
+
+    // Add reasoning effort if model supports it and reasoningEffort is specified
+    if (
+      options.reasoningEffort &&
+      supportsReasoningEffort(options.model) &&
+      options.reasoningEffort !== 'none'
+    ) {
+      runOptions.reasoning = { effort: options.reasoningEffort };
+    }
+
+    // Run the query
+    const result = await thread.run(promptText, runOptions);
+
+    // Extract response text (from finalResponse property)
+    const outputText = result.finalResponse ?? DEFAULT_RESPONSE_TEXT;
+
+    // Get thread ID (may be null if not populated yet)
+    const threadId = thread.id ?? undefined;
+
+    // Yield assistant message
+    yield {
+      type: 'assistant',
+      session_id: threadId,
+      message: {
+        role: 'assistant',
+        content: [{ type: 'text', text: outputText }],
+      },
+    };
+
+    // Yield result
+    yield {
+      type: 'result',
+      subtype: 'success',
+      session_id: threadId,
+      result: outputText,
+    };
+  } catch (error) {
+    const errorInfo = classifyError(error);
+    const userMessage = getUserFriendlyErrorMessage(error);
+    const combinedMessage = buildSdkErrorMessage(errorInfo.message, userMessage);
+    console.error('[CodexSDK] executeQuery() error during execution:', {
+      type: errorInfo.type,
+      message: errorInfo.message,
+      isRateLimit: errorInfo.isRateLimit,
+      retryAfter: errorInfo.retryAfter,
+      stack: error instanceof Error ? error.stack : undefined,
+    });
+    yield { type: 'error', error: combinedMessage };
+  }
+}
--- a/apps/server/src/providers/codex-tool-mapping.ts
+++ b/apps/server/src/providers/codex-tool-mapping.ts
@@ -0,0 +1,436 @@
+export type CodexToolResolution = {
+  name: string;
+  input: Record<string, unknown>;
+};
+
+export type CodexTodoItem = {
+  content: string;
+  status: 'pending' | 'in_progress' | 'completed';
+  activeForm?: string;
+};
+
+const TOOL_NAME_BASH = 'Bash';
+const TOOL_NAME_READ = 'Read';
+const TOOL_NAME_EDIT = 'Edit';
+const TOOL_NAME_WRITE = 'Write';
+const TOOL_NAME_GREP = 'Grep';
+const TOOL_NAME_GLOB = 'Glob';
+const TOOL_NAME_TODO = 'TodoWrite';
+const TOOL_NAME_DELETE = 'Delete';
+const TOOL_NAME_LS = 'Ls';
+
+const INPUT_KEY_COMMAND = 'command';
+const INPUT_KEY_FILE_PATH = 'file_path';
+const INPUT_KEY_PATTERN = 'pattern';
+
+const SHELL_WRAPPER_PATTERNS = [
+  /^\/bin\/bash\s+-lc\s+["']([\s\S]+)["']$/,
+  /^bash\s+-lc\s+["']([\s\S]+)["']$/,
+  /^\/bin\/sh\s+-lc\s+["']([\s\S]+)["']$/,
+  /^sh\s+-lc\s+["']([\s\S]+)["']$/,
+  /^cmd\.exe\s+\/c\s+["']?([\s\S]+)["']?$/i,
+  /^powershell(?:\.exe)?\s+-Command\s+["']?([\s\S]+)["']?$/i,
+  /^pwsh(?:\.exe)?\s+-Command\s+["']?([\s\S]+)["']?$/i,
+] as const;
+
+const COMMAND_SEPARATOR_PATTERN = /\s*(?:&&|\|\||;)\s*/;
+const SEGMENT_SKIP_PREFIXES = ['cd ', 'export ', 'set ', 'pushd '] as const;
+const WRAPPER_COMMANDS = new Set(['sudo', 'env', 'command']);
+const READ_COMMANDS = new Set(['cat', 'sed', 'head', 'tail', 'less', 'more', 'bat', 'stat', 'wc']);
+const SEARCH_COMMANDS = new Set(['rg', 'grep', 'ag', 'ack']);
+const GLOB_COMMANDS = new Set(['ls', 'find', 'fd', 'tree']);
+const DELETE_COMMANDS = new Set(['rm', 'del', 'erase', 'remove', 'unlink']);
+const LIST_COMMANDS = new Set(['ls', 'dir', 'll', 'la']);
+const WRITE_COMMANDS = new Set(['tee', 'touch', 'mkdir']);
+const APPLY_PATCH_COMMAND = 'apply_patch';
+const APPLY_PATCH_PATTERN = /\bapply_patch\b/;
+const REDIRECTION_TARGET_PATTERN = /(?:>>|>)\s*([^\s]+)/;
+const SED_IN_PLACE_FLAGS = new Set(['-i', '--in-place']);
+const PERL_IN_PLACE_FLAG = /-.*i/;
+const SEARCH_PATTERN_FLAGS = new Set(['-e', '--regexp']);
+const SEARCH_VALUE_FLAGS = new Set([
+  '-g',
+  '--glob',
+  '--iglob',
+  '--type',
+  '--type-add',
+  '--type-clear',
+  '--encoding',
+]);
+const SEARCH_FILE_LIST_FLAGS = new Set(['--files']);
+const TODO_LINE_PATTERN = /^[-*]\s*(?:\[(?<status>[ x~])\]\s*)?(?<content>.+)$/;
+const TODO_STATUS_COMPLETED = 'completed';
+const TODO_STATUS_IN_PROGRESS = 'in_progress';
+const TODO_STATUS_PENDING = 'pending';
+const PATCH_FILE_MARKERS = [
+  '*** Update File: ',
+  '*** Add File: ',
+  '*** Delete File: ',
+  '*** Move to: ',
+] as const;
+
+function stripShellWrapper(command: string): string {
+  const trimmed = command.trim();
+  for (const pattern of SHELL_WRAPPER_PATTERNS) {
+    const match = trimmed.match(pattern);
+    if (match && match[1]) {
+      return unescapeCommand(match[1].trim());
+    }
+  }
+  return trimmed;
+}
+
+function unescapeCommand(command: string): string {
+  return command.replace(/\\(["'])/g, '$1');
+}
+
+function extractPrimarySegment(command: string): string {
+  const segments = command
+    .split(COMMAND_SEPARATOR_PATTERN)
+    .map((segment) => segment.trim())
+    .filter(Boolean);
+
+  for (const segment of segments) {
+    const shouldSkip = SEGMENT_SKIP_PREFIXES.some((prefix) => segment.startsWith(prefix));
+    if (!shouldSkip) {
+      return segment;
+    }
+  }
+
+  return command.trim();
+}
+
+function tokenizeCommand(command: string): string[] {
+  const tokens: string[] = [];
+  let current = '';
+  let inSingleQuote = false;
+  let inDoubleQuote = false;
+  let isEscaped = false;
+
+  for (const char of command) {
+    if (isEscaped) {
+      current += char;
+      isEscaped = false;
+      continue;
+    }
+
+    if (char === '\\') {
+      isEscaped = true;
+      continue;
+    }
+
+    if (char === "'" && !inDoubleQuote) {
+      inSingleQuote = !inSingleQuote;
+      continue;
+    }
+
+    if (char === '"' && !inSingleQuote) {
+      inDoubleQuote = !inDoubleQuote;
+      continue;
+    }
+
+    if (!inSingleQuote && !inDoubleQuote && /\s/.test(char)) {
+      if (current) {
+        tokens.push(current);
+        current = '';
+      }
+      continue;
+    }
+
+    current += char;
+  }
+
+  if (current) {
+    tokens.push(current);
+  }
+
+  return tokens;
+}
+
+function stripWrapperTokens(tokens: string[]): string[] {
+  let index = 0;
+  while (index < tokens.length && WRAPPER_COMMANDS.has(tokens[index].toLowerCase())) {
+    index += 1;
+  }
+  return tokens.slice(index);
+}
+
+function extractFilePathFromTokens(tokens: string[]): string | null {
+  const candidates = tokens.slice(1).filter((token) => token && !token.startsWith('-'));
+  if (candidates.length === 0) return null;
+  return candidates[candidates.length - 1];
+}
+
+function extractSearchPattern(tokens: string[]): string | null {
+  const remaining = tokens.slice(1);
+
+  for (let index = 0; index < remaining.length; index += 1) {
+    const token = remaining[index];
+    if (token === '--') {
+      return remaining[index + 1] ?? null;
+    }
+    if (SEARCH_PATTERN_FLAGS.has(token)) {
+      return remaining[index + 1] ?? null;
+    }
+    if (SEARCH_VALUE_FLAGS.has(token)) {
+      index += 1;
+      continue;
+    }
+    if (token.startsWith('-')) {
+      continue;
+    }
+    return token;
+  }
+
+  return null;
+}
+
+function extractTeeTarget(tokens: string[]): string | null {
+  const teeIndex = tokens.findIndex((token) => token === 'tee');
+  if (teeIndex < 0) return null;
+  const candidate = tokens[teeIndex + 1];
+  return candidate && !candidate.startsWith('-') ? candidate : null;
+}
+
+function extractRedirectionTarget(command: string): string | null {
+  const match = command.match(REDIRECTION_TARGET_PATTERN);
+  return match?.[1] ?? null;
+}
+
+function extractFilePathFromDeleteTokens(tokens: string[]): string | null {
+  // rm file.txt or rm /path/to/file.txt
+  // Skip flags and get the first non-flag argument
+  for (let i = 1; i < tokens.length; i++) {
+    const token = tokens[i];
+    if (token && !token.startsWith('-')) {
+      return token;
+    }
+  }
+  return null;
+}
+
+function hasSedInPlaceFlag(tokens: string[]): boolean {
+  return tokens.some((token) => SED_IN_PLACE_FLAGS.has(token) || token.startsWith('-i'));
+}
+
+function hasPerlInPlaceFlag(tokens: string[]): boolean {
+  return tokens.some((token) => PERL_IN_PLACE_FLAG.test(token));
+}
+
+function extractPatchFilePath(command: string): string | null {
+  for (const marker of PATCH_FILE_MARKERS) {
+    const index = command.indexOf(marker);
+    if (index < 0) continue;
+    const start = index + marker.length;
+    const end = command.indexOf('\n', start);
+    const rawPath = (end === -1 ? command.slice(start) : command.slice(start, end)).trim();
+    if (rawPath) return rawPath;
+  }
+  return null;
+}
+
+function buildInputWithFilePath(filePath: string | null): Record<string, unknown> {
+  return filePath ? { [INPUT_KEY_FILE_PATH]: filePath } : {};
+}
+
+function buildInputWithPattern(pattern: string | null): Record<string, unknown> {
+  return pattern ? { [INPUT_KEY_PATTERN]: pattern } : {};
+}
+
+export function resolveCodexToolCall(command: string): CodexToolResolution {
+  const normalized = stripShellWrapper(command);
+  const primarySegment = extractPrimarySegment(normalized);
+  const tokens = stripWrapperTokens(tokenizeCommand(primarySegment));
+  const commandToken = tokens[0]?.toLowerCase() ?? '';
+
+  const redirectionTarget = extractRedirectionTarget(primarySegment);
+  if (redirectionTarget) {
+    return {
+      name: TOOL_NAME_WRITE,
+      input: buildInputWithFilePath(redirectionTarget),
+    };
+  }
+
+  if (commandToken === APPLY_PATCH_COMMAND || APPLY_PATCH_PATTERN.test(primarySegment)) {
+    return {
+      name: TOOL_NAME_EDIT,
+      input: buildInputWithFilePath(extractPatchFilePath(primarySegment)),
+    };
+  }
+
+  if (commandToken === 'sed' && hasSedInPlaceFlag(tokens)) {
+    return {
+      name: TOOL_NAME_EDIT,
+      input: buildInputWithFilePath(extractFilePathFromTokens(tokens)),
+    };
+  }
+
+  if (commandToken === 'perl' && hasPerlInPlaceFlag(tokens)) {
+    return {
+      name: TOOL_NAME_EDIT,
+      input: buildInputWithFilePath(extractFilePathFromTokens(tokens)),
+    };
+  }
+
+  if (WRITE_COMMANDS.has(commandToken)) {
+    const filePath =
+      commandToken === 'tee' ? extractTeeTarget(tokens) : extractFilePathFromTokens(tokens);
+    return {
+      name: TOOL_NAME_WRITE,
+      input: buildInputWithFilePath(filePath),
+    };
+  }
+
+  if (SEARCH_COMMANDS.has(commandToken)) {
+    if (tokens.some((token) => SEARCH_FILE_LIST_FLAGS.has(token))) {
+      return {
+        name: TOOL_NAME_GLOB,
+        input: buildInputWithPattern(extractFilePathFromTokens(tokens)),
+      };
+    }
+
+    return {
+      name: TOOL_NAME_GREP,
+      input: buildInputWithPattern(extractSearchPattern(tokens)),
+    };
+  }
+
+  // Handle Delete commands (rm, del, erase, remove, unlink)
+  if (DELETE_COMMANDS.has(commandToken)) {
+    // Skip if -r or -rf flags (recursive delete should go to Bash)
+    if (
+      tokens.some((token) => token === '-r' || token === '-rf' || token === '-f' || token === '-rf')
+    ) {
+      return {
+        name: TOOL_NAME_BASH,
+        input: { [INPUT_KEY_COMMAND]: normalized },
+      };
+    }
+    // Simple file deletion - extract the file path
+    const filePath = extractFilePathFromDeleteTokens(tokens);
+    if (filePath) {
+      return {
+        name: TOOL_NAME_DELETE,
+        input: { path: filePath },
+      };
+    }
+    // Fall back to bash if we can't determine the file path
+    return {
+      name: TOOL_NAME_BASH,
+      input: { [INPUT_KEY_COMMAND]: normalized },
+    };
+  }
+
+  // Handle simple Ls commands (just listing, not find/glob)
+  if (LIST_COMMANDS.has(commandToken)) {
+    const filePath = extractFilePathFromTokens(tokens);
+    return {
+      name: TOOL_NAME_LS,
+      input: { path: filePath || '.' },
+    };
+  }
+
+  if (GLOB_COMMANDS.has(commandToken)) {
+    return {
+      name: TOOL_NAME_GLOB,
+      input: buildInputWithPattern(extractFilePathFromTokens(tokens)),
+    };
+  }
+
+  if (READ_COMMANDS.has(commandToken)) {
+    return {
+      name: TOOL_NAME_READ,
+      input: buildInputWithFilePath(extractFilePathFromTokens(tokens)),
+    };
+  }
+
+  return {
+    name: TOOL_NAME_BASH,
+    input: { [INPUT_KEY_COMMAND]: normalized },
+  };
+}
+
+function parseTodoLines(lines: string[]): CodexTodoItem[] {
+  const todos: CodexTodoItem[] = [];
+
+  for (const line of lines) {
+    const match = line.match(TODO_LINE_PATTERN);
+    if (!match?.groups?.content) continue;
+
+    const statusToken = match.groups.status;
+    const status =
+      statusToken === 'x'
+        ? TODO_STATUS_COMPLETED
+        : statusToken === '~'
+          ? TODO_STATUS_IN_PROGRESS
+          : TODO_STATUS_PENDING;
+
+    todos.push({ content: match.groups.content.trim(), status });
+  }
+
+  return todos;
+}
+
+function extractTodoFromArray(value: unknown[]): CodexTodoItem[] {
+  return value
+    .map((entry) => {
+      if (typeof entry === 'string') {
+        return { content: entry, status: TODO_STATUS_PENDING };
+      }
+      if (entry && typeof entry === 'object') {
+        const record = entry as Record<string, unknown>;
+        const content =
+          typeof record.content === 'string'
+            ? record.content
+            : typeof record.text === 'string'
+              ? record.text
+              : typeof record.title === 'string'
+                ? record.title
+                : null;
+        if (!content) return null;
+        const status =
+          record.status === TODO_STATUS_COMPLETED ||
+          record.status === TODO_STATUS_IN_PROGRESS ||
+          record.status === TODO_STATUS_PENDING
+            ? (record.status as CodexTodoItem['status'])
+            : TODO_STATUS_PENDING;
+        const activeForm = typeof record.activeForm === 'string' ? record.activeForm : undefined;
+        return { content, status, activeForm };
+      }
+      return null;
+    })
+    .filter((item): item is CodexTodoItem => Boolean(item));
+}
+
+export function extractCodexTodoItems(item: Record<string, unknown>): CodexTodoItem[] | null {
+  const todosValue = item.todos;
+  if (Array.isArray(todosValue)) {
+    const todos = extractTodoFromArray(todosValue);
+    return todos.length > 0 ? todos : null;
+  }
+
+  const itemsValue = item.items;
+  if (Array.isArray(itemsValue)) {
+    const todos = extractTodoFromArray(itemsValue);
+    return todos.length > 0 ? todos : null;
+  }
+
+  const textValue =
+    typeof item.text === 'string'
+      ? item.text
+      : typeof item.content === 'string'
+        ? item.content
+        : null;
+  if (!textValue) return null;
+
+  const lines = textValue
+    .split('\n')
+    .map((line) => line.trim())
+    .filter(Boolean);
+  const todos = parseTodoLines(lines);
+  return todos.length > 0 ? todos : null;
+}
+
+export function getCodexTodoToolName(): string {
+  return TOOL_NAME_TODO;
+}
--- a/apps/server/src/providers/copilot-provider.ts
+++ b/apps/server/src/providers/copilot-provider.ts
@@ -0,0 +1,942 @@
+/**
+ * Copilot Provider - Executes queries using the GitHub Copilot SDK
+ *
+ * Uses the official @github/copilot-sdk for:
+ * - Session management and streaming responses
+ * - GitHub OAuth authentication (via gh CLI)
+ * - Tool call handling and permission management
+ * - Runtime model discovery
+ *
+ * Based on https://github.com/github/copilot-sdk
+ */
+
+import { execSync } from 'child_process';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import * as os from 'os';
+import { CliProvider, type CliSpawnConfig, type CliErrorInfo } from './cli-provider.js';
+import type {
+  ProviderConfig,
+  ExecuteOptions,
+  ProviderMessage,
+  InstallationStatus,
+  ModelDefinition,
+} from './types.js';
+// Note: validateBareModelId is not used because Copilot's bare model IDs
+// legitimately contain prefixes like claude-, gemini-, gpt-
+import {
+  COPILOT_MODEL_MAP,
+  type CopilotAuthStatus,
+  type CopilotRuntimeModel,
+} from '@automaker/types';
+import { createLogger, isAbortError } from '@automaker/utils';
+import { CopilotClient, type PermissionRequest } from '@github/copilot-sdk';
+import {
+  normalizeTodos,
+  normalizeFilePathInput,
+  normalizeCommandInput,
+  normalizePatternInput,
+} from './tool-normalization.js';
+
+// Create logger for this module
+const logger = createLogger('CopilotProvider');
+
+// Default bare model (without copilot- prefix) for SDK calls
+const DEFAULT_BARE_MODEL = 'claude-sonnet-4.5';
+
+// =============================================================================
+// SDK Event Types (from @github/copilot-sdk)
+// =============================================================================
+
+/**
+ * SDK session event data types
+ */
+interface SdkEvent {
+  type: string;
+  data?: unknown;
+}
+
+interface SdkMessageEvent extends SdkEvent {
+  type: 'assistant.message';
+  data: {
+    content: string;
+  };
+}
+
+// Note: SdkMessageDeltaEvent is not used - we skip delta events to reduce noise
+// The final assistant.message event contains the complete content
+
+interface SdkToolExecutionStartEvent extends SdkEvent {
+  type: 'tool.execution_start';
+  data: {
+    toolName: string;
+    toolCallId: string;
+    input?: Record<string, unknown>;
+  };
+}
+
+interface SdkToolExecutionEndEvent extends SdkEvent {
+  type: 'tool.execution_end';
+  data: {
+    toolName: string;
+    toolCallId: string;
+    result?: string;
+    error?: string;
+  };
+}
+
+interface SdkSessionIdleEvent extends SdkEvent {
+  type: 'session.idle';
+}
+
+interface SdkSessionErrorEvent extends SdkEvent {
+  type: 'session.error';
+  data: {
+    message: string;
+    code?: string;
+  };
+}
+
+// =============================================================================
+// Error Codes
+// =============================================================================
+
+export enum CopilotErrorCode {
+  NOT_INSTALLED = 'COPILOT_NOT_INSTALLED',
+  NOT_AUTHENTICATED = 'COPILOT_NOT_AUTHENTICATED',
+  RATE_LIMITED = 'COPILOT_RATE_LIMITED',
+  MODEL_UNAVAILABLE = 'COPILOT_MODEL_UNAVAILABLE',
+  NETWORK_ERROR = 'COPILOT_NETWORK_ERROR',
+  PROCESS_CRASHED = 'COPILOT_PROCESS_CRASHED',
+  TIMEOUT = 'COPILOT_TIMEOUT',
+  CLI_ERROR = 'COPILOT_CLI_ERROR',
+  SDK_ERROR = 'COPILOT_SDK_ERROR',
+  UNKNOWN = 'COPILOT_UNKNOWN_ERROR',
+}
+
+export interface CopilotError extends Error {
+  code: CopilotErrorCode;
+  recoverable: boolean;
+  suggestion?: string;
+}
+
+// =============================================================================
+// Tool Name Normalization
+// =============================================================================
+
+/**
+ * Copilot SDK tool name to standard tool name mapping
+ *
+ * Maps Copilot CLI tool names to our standard tool names for consistent UI display.
+ * Tool names are case-insensitive (normalized to lowercase before lookup).
+ */
+const COPILOT_TOOL_NAME_MAP: Record<string, string> = {
+  // File operations
+  read_file: 'Read',
+  read: 'Read',
+  view: 'Read', // Copilot uses 'view' for reading files
+  read_many_files: 'Read',
+  write_file: 'Write',
+  write: 'Write',
+  create_file: 'Write',
+  edit_file: 'Edit',
+  edit: 'Edit',
+  replace: 'Edit',
+  patch: 'Edit',
+  // Shell operations
+  run_shell: 'Bash',
+  run_shell_command: 'Bash',
+  shell: 'Bash',
+  bash: 'Bash',
+  execute: 'Bash',
+  terminal: 'Bash',
+  // Search operations
+  search: 'Grep',
+  grep: 'Grep',
+  search_file_content: 'Grep',
+  find_files: 'Glob',
+  glob: 'Glob',
+  list_dir: 'Ls',
+  list_directory: 'Ls',
+  ls: 'Ls',
+  // Web operations
+  web_fetch: 'WebFetch',
+  fetch: 'WebFetch',
+  web_search: 'WebSearch',
+  search_web: 'WebSearch',
+  google_web_search: 'WebSearch',
+  // Todo operations
+  todo_write: 'TodoWrite',
+  write_todos: 'TodoWrite',
+  update_todos: 'TodoWrite',
+  // Planning/intent operations (Copilot-specific)
+  report_intent: 'ReportIntent', // Keep as-is, it's a planning tool
+  think: 'Think',
+  plan: 'Plan',
+};
+
+/**
+ * Normalize Copilot tool names to standard tool names
+ */
+function normalizeCopilotToolName(copilotToolName: string): string {
+  const lowerName = copilotToolName.toLowerCase();
+  return COPILOT_TOOL_NAME_MAP[lowerName] || copilotToolName;
+}
+
+/**
+ * Normalize Copilot tool input parameters to standard format
+ *
+ * Maps Copilot's parameter names to our standard parameter names.
+ * Uses shared utilities from tool-normalization.ts for common normalizations.
+ */
+function normalizeCopilotToolInput(
+  toolName: string,
+  input: Record<string, unknown>
+): Record<string, unknown> {
+  const normalizedName = normalizeCopilotToolName(toolName);
+
+  // Normalize todo_write / write_todos: ensure proper format
+  if (normalizedName === 'TodoWrite' && Array.isArray(input.todos)) {
+    return { todos: normalizeTodos(input.todos) };
+  }
+
+  // Normalize file path parameters for Read/Write/Edit tools
+  if (normalizedName === 'Read' || normalizedName === 'Write' || normalizedName === 'Edit') {
+    return normalizeFilePathInput(input);
+  }
+
+  // Normalize shell command parameters for Bash tool
+  if (normalizedName === 'Bash') {
+    return normalizeCommandInput(input);
+  }
+
+  // Normalize search parameters for Grep tool
+  if (normalizedName === 'Grep') {
+    return normalizePatternInput(input);
+  }
+
+  return input;
+}
+
+/**
+ * CopilotProvider - Integrates GitHub Copilot SDK as an AI provider
+ *
+ * Features:
+ * - GitHub OAuth authentication
+ * - SDK-based session management
+ * - Runtime model discovery
+ * - Tool call normalization
+ * - Per-execution working directory support
+ */
+export class CopilotProvider extends CliProvider {
+  private runtimeModels: CopilotRuntimeModel[] | null = null;
+
+  constructor(config: ProviderConfig = {}) {
+    super(config);
+    // Trigger CLI detection on construction
+    this.ensureCliDetected();
+  }
+
+  // ==========================================================================
+  // CliProvider Abstract Method Implementations
+  // ==========================================================================
+
+  getName(): string {
+    return 'copilot';
+  }
+
+  getCliName(): string {
+    return 'copilot';
+  }
+
+  getSpawnConfig(): CliSpawnConfig {
+    return {
+      windowsStrategy: 'npx', // Copilot CLI can be run via npx
+      npxPackage: '@github/copilot', // Official GitHub Copilot CLI package
+      commonPaths: {
+        linux: [
+          path.join(os.homedir(), '.local/bin/copilot'),
+          '/usr/local/bin/copilot',
+          path.join(os.homedir(), '.npm-global/bin/copilot'),
+        ],
+        darwin: [
+          path.join(os.homedir(), '.local/bin/copilot'),
+          '/usr/local/bin/copilot',
+          '/opt/homebrew/bin/copilot',
+          path.join(os.homedir(), '.npm-global/bin/copilot'),
+        ],
+        win32: [
+          path.join(os.homedir(), 'AppData', 'Roaming', 'npm', 'copilot.cmd'),
+          path.join(os.homedir(), '.npm-global', 'copilot.cmd'),
+        ],
+      },
+    };
+  }
+
+  /**
+   * Extract prompt text from ExecuteOptions
+   *
+   * Note: CopilotProvider does not yet support vision/image inputs.
+   * If non-text content is provided, an error is thrown.
+   */
+  private extractPromptText(options: ExecuteOptions): string {
+    if (typeof options.prompt === 'string') {
+      return options.prompt;
+    } else if (Array.isArray(options.prompt)) {
+      // Check for non-text content (images, etc.) which we don't support yet
+      const hasNonText = options.prompt.some((p) => p.type !== 'text');
+      if (hasNonText) {
+        throw new Error(
+          'CopilotProvider does not yet support non-text prompt parts (e.g., images). ' +
+            'Please use text-only prompts or switch to a provider that supports vision.'
+        );
+      }
+      return options.prompt
+        .filter((p) => p.type === 'text' && p.text)
+        .map((p) => p.text)
+        .join('\n');
+    } else {
+      throw new Error('Invalid prompt format');
+    }
+  }
+
+  /**
+   * Not used with SDK approach - kept for interface compatibility
+   */
+  buildCliArgs(_options: ExecuteOptions): string[] {
+    return [];
+  }
+
+  /**
+   * Convert SDK event to AutoMaker ProviderMessage format
+   */
+  normalizeEvent(event: unknown): ProviderMessage | null {
+    const sdkEvent = event as SdkEvent;
+
+    switch (sdkEvent.type) {
+      case 'assistant.message': {
+        const messageEvent = sdkEvent as SdkMessageEvent;
+        return {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [{ type: 'text', text: messageEvent.data.content }],
+          },
+        };
+      }
+
+      case 'assistant.message_delta': {
+        // Skip delta events - they create too much noise
+        // The final assistant.message event has the complete content
+        return null;
+      }
+
+      case 'tool.execution_start': {
+        const toolEvent = sdkEvent as SdkToolExecutionStartEvent;
+        const normalizedName = normalizeCopilotToolName(toolEvent.data.toolName);
+        const normalizedInput = toolEvent.data.input
+          ? normalizeCopilotToolInput(toolEvent.data.toolName, toolEvent.data.input)
+          : {};
+
+        return {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [
+              {
+                type: 'tool_use',
+                name: normalizedName,
+                tool_use_id: toolEvent.data.toolCallId,
+                input: normalizedInput,
+              },
+            ],
+          },
+        };
+      }
+
+      case 'tool.execution_end': {
+        const toolResultEvent = sdkEvent as SdkToolExecutionEndEvent;
+        const isError = !!toolResultEvent.data.error;
+        const content = isError
+          ? `[ERROR] ${toolResultEvent.data.error}`
+          : toolResultEvent.data.result || '';
+
+        return {
+          type: 'assistant',
+          message: {
+            role: 'assistant',
+            content: [
+              {
+                type: 'tool_result',
+                tool_use_id: toolResultEvent.data.toolCallId,
+                content,
+              },
+            ],
+          },
+        };
+      }
+
+      case 'session.idle': {
+        logger.debug('Copilot session idle');
+        return {
+          type: 'result',
+          subtype: 'success',
+        };
+      }
+
+      case 'session.error': {
+        const errorEvent = sdkEvent as SdkSessionErrorEvent;
+        return {
+          type: 'error',
+          error: errorEvent.data.message || 'Unknown error',
+        };
+      }
+
+      default:
+        logger.debug(`Unknown Copilot SDK event type: ${sdkEvent.type}`);
+        return null;
+    }
+  }
+
+  // ==========================================================================
+  // CliProvider Overrides
+  // ==========================================================================
+
+  /**
+   * Override error mapping for Copilot-specific error codes
+   */
+  protected mapError(stderr: string, exitCode: number | null): CliErrorInfo {
+    const lower = stderr.toLowerCase();
+
+    if (
+      lower.includes('not authenticated') ||
+      lower.includes('please log in') ||
+      lower.includes('unauthorized') ||
+      lower.includes('login required') ||
+      lower.includes('authentication required') ||
+      lower.includes('github login')
+    ) {
+      return {
+        code: CopilotErrorCode.NOT_AUTHENTICATED,
+        message: 'GitHub Copilot is not authenticated',
+        recoverable: true,
+        suggestion: 'Run "gh auth login" or "copilot auth login" to authenticate with GitHub',
+      };
+    }
+
+    if (
+      lower.includes('rate limit') ||
+      lower.includes('too many requests') ||
+      lower.includes('429') ||
+      lower.includes('quota exceeded')
+    ) {
+      return {
+        code: CopilotErrorCode.RATE_LIMITED,
+        message: 'Copilot API rate limit exceeded',
+        recoverable: true,
+        suggestion: 'Wait a few minutes and try again',
+      };
+    }
+
+    if (
+      lower.includes('model not available') ||
+      lower.includes('invalid model') ||
+      lower.includes('unknown model') ||
+      lower.includes('model not found') ||
+      (lower.includes('not found') && lower.includes('404'))
+    ) {
+      return {
+        code: CopilotErrorCode.MODEL_UNAVAILABLE,
+        message: 'Requested model is not available',
+        recoverable: true,
+        suggestion: `Try using "${DEFAULT_BARE_MODEL}" or select a different model`,
+      };
+    }
+
+    if (
+      lower.includes('network') ||
+      lower.includes('connection') ||
+      lower.includes('econnrefused') ||
+      lower.includes('timeout')
+    ) {
+      return {
+        code: CopilotErrorCode.NETWORK_ERROR,
+        message: 'Network connection error',
+        recoverable: true,
+        suggestion: 'Check your internet connection and try again',
+      };
+    }
+
+    if (exitCode === 137 || lower.includes('killed') || lower.includes('sigterm')) {
+      return {
+        code: CopilotErrorCode.PROCESS_CRASHED,
+        message: 'Copilot CLI process was terminated',
+        recoverable: true,
+        suggestion: 'The process may have run out of memory. Try a simpler task.',
+      };
+    }
+
+    return {
+      code: CopilotErrorCode.UNKNOWN,
+      message: stderr || `Copilot CLI exited with code ${exitCode}`,
+      recoverable: false,
+    };
+  }
+
+  /**
+   * Override install instructions for Copilot-specific guidance
+   */
+  protected getInstallInstructions(): string {
+    return 'Install with: npm install -g @github/copilot (or visit https://github.com/github/copilot)';
+  }
+
+  /**
+   * Execute a prompt using Copilot SDK with real-time streaming
+   *
+   * Creates a new CopilotClient for each execution with the correct working directory.
+   * Streams tool execution events in real-time for UI display.
+   */
+  async *executeQuery(options: ExecuteOptions): AsyncGenerator<ProviderMessage> {
+    this.ensureCliDetected();
+
+    // Note: We don't use validateBareModelId here because Copilot's model IDs
+    // legitimately contain prefixes like claude-, gemini-, gpt- which are the
+    // actual model names from the Copilot CLI. We only need to ensure the
+    // copilot- prefix has been stripped by the ProviderFactory.
+    if (options.model?.startsWith('copilot-')) {
+      throw new Error(
+        `[CopilotProvider] Model ID should not have 'copilot-' prefix. Got: '${options.model}'. ` +
+          `The ProviderFactory should strip this prefix before passing to the provider.`
+      );
+    }
+
+    if (!this.cliPath) {
+      throw this.createError(
+        CopilotErrorCode.NOT_INSTALLED,
+        'Copilot CLI is not installed',
+        true,
+        this.getInstallInstructions()
+      );
+    }
+
+    const promptText = this.extractPromptText(options);
+    const bareModel = options.model || DEFAULT_BARE_MODEL;
+    const workingDirectory = options.cwd || process.cwd();
+
+    logger.debug(
+      `CopilotProvider.executeQuery called with model: "${bareModel}", cwd: "${workingDirectory}"`
+    );
+    logger.debug(`Prompt length: ${promptText.length} characters`);
+
+    // Create a client for this execution with the correct working directory
+    const client = new CopilotClient({
+      logLevel: 'warning',
+      autoRestart: false,
+      cwd: workingDirectory,
+    });
+
+    // Use an async queue to bridge callback-based SDK events to async generator
+    const eventQueue: SdkEvent[] = [];
+    let resolveWaiting: (() => void) | null = null;
+    let sessionComplete = false;
+    let sessionError: Error | null = null;
+
+    const pushEvent = (event: SdkEvent) => {
+      eventQueue.push(event);
+      if (resolveWaiting) {
+        resolveWaiting();
+        resolveWaiting = null;
+      }
+    };
+
+    const waitForEvent = (): Promise<void> => {
+      if (eventQueue.length > 0 || sessionComplete) {
+        return Promise.resolve();
+      }
+      return new Promise((resolve) => {
+        resolveWaiting = resolve;
+      });
+    };
+
+    try {
+      await client.start();
+      logger.debug(`CopilotClient started with cwd: ${workingDirectory}`);
+
+      // Create session with streaming enabled for real-time events
+      const session = await client.createSession({
+        model: bareModel,
+        streaming: true,
+        // AUTONOMOUS MODE: Auto-approve all permission requests.
+        // AutoMaker is designed for fully autonomous AI agent operation.
+        // Security boundary is provided by Docker containerization (see CLAUDE.md).
+        // User is warned about this at app startup.
+        onPermissionRequest: async (
+          request: PermissionRequest
+        ): Promise<{ kind: 'approved' } | { kind: 'denied-interactively-by-user' }> => {
+          logger.debug(`Permission request: ${request.kind}`);
+          return { kind: 'approved' };
+        },
+      });
+
+      const sessionId = session.sessionId;
+      logger.debug(`Session created: ${sessionId}`);
+
+      // Set up event handler to push events to queue
+      session.on((event: SdkEvent) => {
+        logger.debug(`SDK event: ${event.type}`);
+
+        if (event.type === 'session.idle') {
+          sessionComplete = true;
+          pushEvent(event);
+        } else if (event.type === 'session.error') {
+          const errorEvent = event as SdkSessionErrorEvent;
+          sessionError = new Error(errorEvent.data.message);
+          sessionComplete = true;
+          pushEvent(event);
+        } else {
+          // Push all other events (tool.execution_start, tool.execution_end, assistant.message, etc.)
+          pushEvent(event);
+        }
+      });
+
+      // Send the prompt (non-blocking)
+      await session.send({ prompt: promptText });
+
+      // Process events as they arrive
+      while (!sessionComplete || eventQueue.length > 0) {
+        await waitForEvent();
+
+        // Check for errors first (before processing events to avoid race condition)
+        if (sessionError) {
+          await session.destroy();
+          await client.stop();
+          throw sessionError;
+        }
+
+        // Process all queued events
+        while (eventQueue.length > 0) {
+          const event = eventQueue.shift()!;
+          const normalized = this.normalizeEvent(event);
+          if (normalized) {
+            // Add session_id if not present
+            if (!normalized.session_id) {
+              normalized.session_id = sessionId;
+            }
+            yield normalized;
+          }
+        }
+      }
+
+      // Cleanup
+      await session.destroy();
+      await client.stop();
+      logger.debug('CopilotClient stopped successfully');
+    } catch (error) {
+      // Ensure client is stopped on error
+      try {
+        await client.stop();
+      } catch (cleanupError) {
+        // Log but don't throw cleanup errors - the original error is more important
+        logger.debug(`Failed to stop client during cleanup: ${cleanupError}`);
+      }
+
+      if (isAbortError(error)) {
+        logger.debug('Query aborted');
+        return;
+      }
+
+      // Map errors to CopilotError
+      if (error instanceof Error) {
+        logger.error(`Copilot SDK error: ${error.message}`);
+        const errorInfo = this.mapError(error.message, null);
+        throw this.createError(
+          errorInfo.code as CopilotErrorCode,
+          errorInfo.message,
+          errorInfo.recoverable,
+          errorInfo.suggestion
+        );
+      }
+      throw error;
+    }
+  }
+
+  // ==========================================================================
+  // Copilot-Specific Methods
+  // ==========================================================================
+
+  /**
+   * Create a CopilotError with details
+   */
+  private createError(
+    code: CopilotErrorCode,
+    message: string,
+    recoverable: boolean = false,
+    suggestion?: string
+  ): CopilotError {
+    const error = new Error(message) as CopilotError;
+    error.code = code;
+    error.recoverable = recoverable;
+    error.suggestion = suggestion;
+    error.name = 'CopilotError';
+    return error;
+  }
+
+  /**
+   * Get Copilot CLI version
+   */
+  async getVersion(): Promise<string | null> {
+    this.ensureCliDetected();
+    if (!this.cliPath) return null;
+
+    try {
+      const result = execSync(`"${this.cliPath}" --version`, {
+        encoding: 'utf8',
+        timeout: 5000,
+        stdio: 'pipe',
+      }).trim();
+      return result;
+    } catch {
+      return null;
+    }
+  }
+
+  /**
+   * Check authentication status
+   *
+   * Uses GitHub CLI (gh) to check Copilot authentication status.
+   * The Copilot CLI relies on gh auth for authentication.
+   */
+  async checkAuth(): Promise<CopilotAuthStatus> {
+    this.ensureCliDetected();
+    if (!this.cliPath) {
+      logger.debug('checkAuth: CLI not found');
+      return { authenticated: false, method: 'none' };
+    }
+
+    logger.debug('checkAuth: Starting credential check');
+
+    // Try to check GitHub CLI authentication status first
+    // The Copilot CLI uses gh auth for authentication
+    try {
+      const ghStatus = execSync('gh auth status --hostname github.com', {
+        encoding: 'utf8',
+        timeout: 10000,
+        stdio: 'pipe',
+      });
+
+      logger.debug(`checkAuth: gh auth status output: ${ghStatus.substring(0, 200)}`);
+
+      // Parse gh auth status output
+      const loggedInMatch = ghStatus.match(/Logged in to github\.com account (\S+)/);
+      if (loggedInMatch) {
+        return {
+          authenticated: true,
+          method: 'oauth',
+          login: loggedInMatch[1],
+          host: 'github.com',
+        };
+      }
+
+      // Check for token auth
+      if (ghStatus.includes('Logged in') || ghStatus.includes('Token:')) {
+        return {
+          authenticated: true,
+          method: 'oauth',
+          host: 'github.com',
+        };
+      }
+    } catch (ghError) {
+      logger.debug(`checkAuth: gh auth status failed: ${ghError}`);
+    }
+
+    // Try Copilot-specific auth check if gh is not available
+    try {
+      const result = execSync(`"${this.cliPath}" auth status`, {
+        encoding: 'utf8',
+        timeout: 10000,
+        stdio: 'pipe',
+      });
+
+      logger.debug(`checkAuth: copilot auth status output: ${result.substring(0, 200)}`);
+
+      if (result.includes('authenticated') || result.includes('logged in')) {
+        return {
+          authenticated: true,
+          method: 'cli',
+        };
+      }
+    } catch (copilotError) {
+      logger.debug(`checkAuth: copilot auth status failed: ${copilotError}`);
+    }
+
+    // Check for GITHUB_TOKEN environment variable
+    if (process.env.GITHUB_TOKEN) {
+      logger.debug('checkAuth: Found GITHUB_TOKEN environment variable');
+      return {
+        authenticated: true,
+        method: 'oauth',
+        statusMessage: 'Using GITHUB_TOKEN environment variable',
+      };
+    }
+
+    // Check for gh config file
+    const ghConfigPath = path.join(os.homedir(), '.config', 'gh', 'hosts.yml');
+    try {
+      await fs.access(ghConfigPath);
+      const content = await fs.readFile(ghConfigPath, 'utf8');
+      if (content.includes('github.com') && content.includes('oauth_token')) {
+        logger.debug('checkAuth: Found gh config with oauth_token');
+        return {
+          authenticated: true,
+          method: 'oauth',
+          host: 'github.com',
+        };
+      }
+    } catch {
+      logger.debug('checkAuth: No gh config found');
+    }
+
+    // No credentials found
+    logger.debug('checkAuth: No valid credentials found');
+    return {
+      authenticated: false,
+      method: 'none',
+      error:
+        'No authentication configured. Run "gh auth login" or install GitHub Copilot extension.',
+    };
+  }
+
+  /**
+   * Fetch available models from the CLI at runtime
+   */
+  async fetchRuntimeModels(): Promise<CopilotRuntimeModel[]> {
+    this.ensureCliDetected();
+    if (!this.cliPath) {
+      return [];
+    }
+
+    try {
+      // Try to list models using the CLI
+      const result = execSync(`"${this.cliPath}" models list --format json`, {
+        encoding: 'utf8',
+        timeout: 15000,
+        stdio: 'pipe',
+      });
+
+      const models = JSON.parse(result) as CopilotRuntimeModel[];
+      this.runtimeModels = models;
+      logger.debug(`Fetched ${models.length} runtime models from Copilot CLI`);
+      return models;
+    } catch (error) {
+      // Clear cache on failure to avoid returning stale data
+      this.runtimeModels = null;
+      logger.debug(`Failed to fetch runtime models: ${error}`);
+      return [];
+    }
+  }
+
+  /**
+   * Detect installation status (required by BaseProvider)
+   */
+  async detectInstallation(): Promise<InstallationStatus> {
+    const installed = await this.isInstalled();
+    const version = installed ? await this.getVersion() : undefined;
+    const auth = await this.checkAuth();
+
+    return {
+      installed,
+      version: version || undefined,
+      path: this.cliPath || undefined,
+      method: 'cli',
+      authenticated: auth.authenticated,
+    };
+  }
+
+  /**
+   * Get the detected CLI path (public accessor for status endpoints)
+   */
+  getCliPath(): string | null {
+    this.ensureCliDetected();
+    return this.cliPath;
+  }
+
+  /**
+   * Get available Copilot models
+   *
+   * Returns both static model definitions and runtime-discovered models
+   */
+  getAvailableModels(): ModelDefinition[] {
+    // Start with static model definitions - explicitly typed to allow runtime models
+    const staticModels: ModelDefinition[] = Object.entries(COPILOT_MODEL_MAP).map(
+      ([id, config]) => ({
+        id, // Full model ID with copilot- prefix
+        name: config.label,
+        modelString: id.replace('copilot-', ''), // Bare model for CLI
+        provider: 'copilot',
+        description: config.description,
+        supportsTools: config.supportsTools,
+        supportsVision: config.supportsVision,
+        contextWindow: config.contextWindow,
+      })
+    );
+
+    // Add runtime models if available (discovered via CLI)
+    if (this.runtimeModels) {
+      for (const runtimeModel of this.runtimeModels) {
+        // Skip if already in static list
+        const staticId = `copilot-${runtimeModel.id}`;
+        if (staticModels.some((m) => m.id === staticId)) {
+          continue;
+        }
+
+        staticModels.push({
+          id: staticId,
+          name: runtimeModel.name || runtimeModel.id,
+          modelString: runtimeModel.id,
+          provider: 'copilot',
+          description: `Dynamic model: ${runtimeModel.name || runtimeModel.id}`,
+          supportsTools: true,
+          supportsVision: runtimeModel.capabilities?.supportsVision ?? false,
+          contextWindow: runtimeModel.capabilities?.maxInputTokens,
+        });
+      }
+    }
+
+    return staticModels;
+  }
+
+  /**
+   * Check if a feature is supported
+   *
+   * Note: Vision is NOT currently supported - the SDK doesn't handle image inputs yet.
+   * This may change in future versions of the Copilot SDK.
+   */
+  supportsFeature(feature: string): boolean {
+    const supported = ['tools', 'text', 'streaming'];
+    return supported.includes(feature);
+  }
+
+  /**
+   * Check if runtime models have been cached
+   */
+  hasCachedModels(): boolean {
+    return this.runtimeModels !== null && this.runtimeModels.length > 0;
+  }
+
+  /**
+   * Clear the runtime model cache
+   */
+  clearModelCache(): void {
+    this.runtimeModels = null;
+    logger.debug('Cleared Copilot model cache');
+  }
+
+  /**
+   * Refresh models from CLI and return all available models
+   */
+  async refreshModels(): Promise<ModelDefinition[]> {
+    logger.debug('Refreshing Copilot models from CLI');
+    await this.fetchRuntimeModels();
+    return this.getAvailableModels();
+  }
+}
--- a/apps/server/src/providers/cursor-config-manager.ts
+++ b/apps/server/src/providers/cursor-config-manager.ts
@@ -0,0 +1,197 @@
+/**
+ * Cursor CLI Configuration Manager
+ *
+ * Manages Cursor CLI configuration stored in .automaker/cursor-config.json
+ */
+
+import * as fs from 'fs';
+import * as path from 'path';
+import { getAllCursorModelIds, type CursorCliConfig, type CursorModelId } from '@automaker/types';
+import { createLogger } from '@automaker/utils';
+import { getAutomakerDir } from '@automaker/platform';
+
+// Create logger for this module
+const logger = createLogger('CursorConfigManager');
+
+/**
+ * Manages Cursor CLI configuration
+ * Config location: .automaker/cursor-config.json
+ */
+export class CursorConfigManager {
+  private configPath: string;
+  private config: CursorCliConfig;
+
+  constructor(projectPath: string) {
+    // Use getAutomakerDir for consistent path resolution
+    this.configPath = path.join(getAutomakerDir(projectPath), 'cursor-config.json');
+    this.config = this.loadConfig();
+  }
+
+  /**
+   * Load configuration from disk
+   */
+  private loadConfig(): CursorCliConfig {
+    try {
+      if (fs.existsSync(this.configPath)) {
+        const content = fs.readFileSync(this.configPath, 'utf8');
+        const parsed = JSON.parse(content) as CursorCliConfig;
+        logger.debug(`Loaded config from ${this.configPath}`);
+        return parsed;
+      }
+    } catch (error) {
+      logger.warn('Failed to load config:', error);
+    }
+
+    // Return default config with all available models
+    return {
+      defaultModel: 'cursor-auto',
+      models: getAllCursorModelIds(),
+    };
+  }
+
+  /**
+   * Save configuration to disk
+   */
+  private saveConfig(): void {
+    try {
+      const dir = path.dirname(this.configPath);
+      if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+      }
+      fs.writeFileSync(this.configPath, JSON.stringify(this.config, null, 2));
+      logger.debug('Config saved');
+    } catch (error) {
+      logger.error('Failed to save config:', error);
+      throw error;
+    }
+  }
+
+  /**
+   * Get the full configuration
+   */
+  getConfig(): CursorCliConfig {
+    return { ...this.config };
+  }
+
+  /**
+   * Get the default model
+   */
+  getDefaultModel(): CursorModelId {
+    return this.config.defaultModel || 'cursor-auto';
+  }
+
+  /**
+   * Set the default model
+   */
+  setDefaultModel(model: CursorModelId): void {
+    this.config.defaultModel = model;
+    this.saveConfig();
+    logger.info(`Default model set to: ${model}`);
+  }
+
+  /**
+   * Get enabled models
+   */
+  getEnabledModels(): CursorModelId[] {
+    return this.config.models || ['cursor-auto'];
+  }
+
+  /**
+   * Set enabled models
+   */
+  setEnabledModels(models: CursorModelId[]): void {
+    this.config.models = models;
+    this.saveConfig();
+    logger.info(`Enabled models updated: ${models.join(', ')}`);
+  }
+
+  /**
+   * Add a model to enabled list
+   */
+  addModel(model: CursorModelId): void {
+    if (!this.config.models) {
+      this.config.models = [];
+    }
+    if (!this.config.models.includes(model)) {
+      this.config.models.push(model);
+      this.saveConfig();
+      logger.info(`Model added: ${model}`);
+    }
+  }
+
+  /**
+   * Remove a model from enabled list
+   */
+  removeModel(model: CursorModelId): void {
+    if (this.config.models) {
+      this.config.models = this.config.models.filter((m) => m !== model);
+      this.saveConfig();
+      logger.info(`Model removed: ${model}`);
+    }
+  }
+
+  /**
+   * Check if a model is enabled
+   */
+  isModelEnabled(model: CursorModelId): boolean {
+    return this.config.models?.includes(model) ?? false;
+  }
+
+  /**
+   * Get MCP server configurations
+   */
+  getMcpServers(): string[] {
+    return this.config.mcpServers || [];
+  }
+
+  /**
+   * Set MCP server configurations
+   */
+  setMcpServers(servers: string[]): void {
+    this.config.mcpServers = servers;
+    this.saveConfig();
+    logger.info(`MCP servers updated: ${servers.join(', ')}`);
+  }
+
+  /**
+   * Get Cursor rules paths
+   */
+  getRules(): string[] {
+    return this.config.rules || [];
+  }
+
+  /**
+   * Set Cursor rules paths
+   */
+  setRules(rules: string[]): void {
+    this.config.rules = rules;
+    this.saveConfig();
+    logger.info(`Rules updated: ${rules.join(', ')}`);
+  }
+
+  /**
+   * Reset configuration to defaults
+   */
+  reset(): void {
+    this.config = {
+      defaultModel: 'cursor-auto',
+      models: getAllCursorModelIds(),
+    };
+    this.saveConfig();
+    logger.info('Config reset to defaults');
+  }
+
+  /**
+   * Check if config file exists
+   */
+  exists(): boolean {
+    return fs.existsSync(this.configPath);
+  }
+
+  /**
+   * Get the config file path
+   */
+  getConfigPath(): string {
+    return this.configPath;
+  }
+}
--- a/apps/server/src/providers/cursor-provider.ts
+++ b/apps/server/src/providers/cursor-provider.ts
--- a/apps/server/src/providers/gemini-provider.ts
+++ b/apps/server/src/providers/gemini-provider.ts
@@ -0,0 +1,810 @@
+/**
+ * Gemini Provider - Executes queries using the Gemini CLI
+ *
+ * Extends CliProvider with Gemini-specific:
+ * - Event normalization for Gemini's JSONL streaming format
+ * - Google account and API key authentication support
+ * - Thinking level configuration
+ *
+ * Based on https://github.com/google-gemini/gemini-cli
+ */
+
+import { execSync } from 'child_process';
+import * as fs from 'fs/promises';
+import * as path from 'path';
+import * as os from 'os';
+import { CliProvider, type CliSpawnConfig, type CliErrorInfo } from './cli-provider.js';
+import type {
+  ProviderConfig,
+  ExecuteOptions,
+  ProviderMessage,
+  InstallationStatus,
+  ModelDefinition,
+  ContentBlock,
+} from './types.js';
+import { validateBareModelId } from '@automaker/types';
+import { GEMINI_MODEL_MAP, type GeminiAuthStatus } from '@automaker/types';
+import { createLogger, isAbortError } from '@automaker/utils';
+import { spawnJSONLProcess } from '@automaker/platform';
+import { normalizeTodos } from './tool-normalization.js';
+
+// Create logger for this module
+const logger = createLogger('GeminiProvider');
+
+// =============================================================================
+// Gemini Stream Event Types
+// =============================================================================
+
+/**
+ * Base event structure from Gemini CLI --output-format stream-json
+ *
+ * Actual CLI output format:
+ * {"type":"init","timestamp":"...","session_id":"...","model":"..."}
+ * {"type":"message","timestamp":"...","role":"user","content":"..."}
+ * {"type":"message","timestamp":"...","role":"assistant","content":"...","delta":true}
+ * {"type":"tool_use","timestamp":"...","tool_name":"...","tool_id":"...","parameters":{...}}
+ * {"type":"tool_result","timestamp":"...","tool_id":"...","status":"success","output":"..."}
+ * {"type":"result","timestamp":"...","status":"success","stats":{...}}
+ */
+interface GeminiStreamEvent {
+  type: 'init' | 'message' | 'tool_use' | 'tool_result' | 'result' | 'error';
+  timestamp?: string;
+  session_id?: string;
+}
+
+interface GeminiInitEvent extends GeminiStreamEvent {
+  type: 'init';
+  session_id: string;
+  model: string;
+}
+
+interface GeminiMessageEvent extends GeminiStreamEvent {
+  type: 'message';
+  role: 'user' | 'assistant';
+  content: string;
+  delta?: boolean;
+  session_id?: string;
+}
+
+interface GeminiToolUseEvent extends GeminiStreamEvent {
+  type: 'tool_use';
+  tool_id: string;
+  tool_name: string;
+  parameters: Record<string, unknown>;
+  session_id?: string;
+}
+
+interface GeminiToolResultEvent extends GeminiStreamEvent {
+  type: 'tool_result';
+  tool_id: string;
+  status: 'success' | 'error';
+  output: string;
+  session_id?: string;
+}
+
+interface GeminiResultEvent extends GeminiStreamEvent {
+  type: 'result';
+  status: 'success' | 'error';
+  stats?: {
+    total_tokens?: number;
+    input_tokens?: number;
+    output_tokens?: number;
+    cached?: number;
+    input?: number;
+    duration_ms?: number;
+    tool_calls?: number;
+  };
+  error?: string;
+  session_id?: string;
+}
+
+// =============================================================================
+// Error Codes
+// =============================================================================
+
+export enum GeminiErrorCode {
+  NOT_INSTALLED = 'GEMINI_NOT_INSTALLED',
+  NOT_AUTHENTICATED = 'GEMINI_NOT_AUTHENTICATED',
+  RATE_LIMITED = 'GEMINI_RATE_LIMITED',
+  MODEL_UNAVAILABLE = 'GEMINI_MODEL_UNAVAILABLE',
+  NETWORK_ERROR = 'GEMINI_NETWORK_ERROR',
+  PROCESS_CRASHED = 'GEMINI_PROCESS_CRASHED',
+  TIMEOUT = 'GEMINI_TIMEOUT',
+  UNKNOWN = 'GEMINI_UNKNOWN_ERROR',
+}
+
+export interface GeminiError extends Error {
+  code: GeminiErrorCode;
+  recoverable: boolean;
+  suggestion?: string;
+}
+
+// =============================================================================
+// Tool Name Normalization
+// =============================================================================
+
+/**
+ * Gemini CLI tool name to standard tool name mapping
+ * This allows the UI to properly categorize and display Gemini tool calls
+ */
+const GEMINI_TOOL_NAME_MAP: Record<string, string> = {
+  write_todos: 'TodoWrite',
+  read_file: 'Read',
+  read_many_files: 'Read',
+  replace: 'Edit',
+  write_file: 'Write',
+  run_shell_command: 'Bash',
+  search_file_content: 'Grep',
+  glob: 'Glob',
+  list_directory: 'Ls',
+  web_fetch: 'WebFetch',
+  google_web_search: 'WebSearch',
+};
+
+/**
+ * Normalize Gemini tool names to standard tool names
+ */
+function normalizeGeminiToolName(geminiToolName: string): string {
+  return GEMINI_TOOL_NAME_MAP[geminiToolName] || geminiToolName;
+}
+
+/**
+ * Normalize Gemini tool input parameters to standard format
+ *
+ * Uses shared normalizeTodos utility for consistent todo normalization.
+ *
+ * Gemini `write_todos` format:
+ * {"todos": [{"description": "Task text", "status": "pending|in_progress|completed|cancelled"}]}
+ *
+ * Claude `TodoWrite` format:
+ * {"todos": [{"content": "Task text", "status": "pending|in_progress|completed", "activeForm": "..."}]}
+ */
+function normalizeGeminiToolInput(
+  toolName: string,
+  input: Record<string, unknown>
+): Record<string, unknown> {
+  // Normalize write_todos using shared utility
+  if (toolName === 'write_todos' && Array.isArray(input.todos)) {
+    return { todos: normalizeTodos(input.todos) };
+  }
+  return input;
+}
+
+/**
+ * GeminiProvider - Integrates Gemini CLI as an AI provider
+ *
+ * Features:
+ * - Google account OAuth login support
+ * - API key authentication (GEMINI_API_KEY)
+ * - Vertex AI support
+ * - Thinking level configuration
+ * - Streaming JSON output
+ */
+export class GeminiProvider extends CliProvider {
+  constructor(config: ProviderConfig = {}) {
+    super(config);
+    // Trigger CLI detection on construction
+    this.ensureCliDetected();
+  }
+
+  // ==========================================================================
+  // CliProvider Abstract Method Implementations
+  // ==========================================================================
+
+  getName(): string {
+    return 'gemini';
+  }
+
+  getCliName(): string {
+    return 'gemini';
+  }
+
+  getSpawnConfig(): CliSpawnConfig {
+    return {
+      windowsStrategy: 'npx', // Gemini CLI can be run via npx
+      npxPackage: '@google/gemini-cli', // Official Google Gemini CLI package
+      commonPaths: {
+        linux: [
+          path.join(os.homedir(), '.local/bin/gemini'),
+          '/usr/local/bin/gemini',
+          path.join(os.homedir(), '.npm-global/bin/gemini'),
+        ],
+        darwin: [
+          path.join(os.homedir(), '.local/bin/gemini'),
+          '/usr/local/bin/gemini',
+          '/opt/homebrew/bin/gemini',
+          path.join(os.homedir(), '.npm-global/bin/gemini'),
+        ],
+        win32: [
+          path.join(os.homedir(), 'AppData', 'Roaming', 'npm', 'gemini.cmd'),
+          path.join(os.homedir(), '.npm-global', 'gemini.cmd'),
+        ],
+      },
+    };
+  }
+
+  /**
+   * Extract prompt text from ExecuteOptions
+   */
+  private extractPromptText(options: ExecuteOptions): string {
+    if (typeof options.prompt === 'string') {
+      return options.prompt;
+    } else if (Array.isArray(options.prompt)) {
+      return options.prompt
+        .filter((p) => p.type === 'text' && p.text)
+        .map((p) => p.text)
+        .join('\n');
+    } else {
+      throw new Error('Invalid prompt format');
+    }
+  }
+
+  buildCliArgs(options: ExecuteOptions): string[] {
+    // Model comes in stripped of provider prefix (e.g., '2.5-flash' from 'gemini-2.5-flash')
+    // We need to add 'gemini-' back since it's part of the actual CLI model name
+    const bareModel = options.model || '2.5-flash';
+    const cliArgs: string[] = [];
+
+    // Streaming JSON output format for real-time updates
+    cliArgs.push('--output-format', 'stream-json');
+
+    // Model selection - Gemini CLI expects full model names like "gemini-2.5-flash"
+    // Unlike Cursor CLI where 'cursor-' is just a routing prefix, for Gemini CLI
+    // the 'gemini-' is part of the actual model name Google expects
+    if (bareModel && bareModel !== 'auto') {
+      // Add gemini- prefix if not already present (handles edge cases)
+      const cliModel = bareModel.startsWith('gemini-') ? bareModel : `gemini-${bareModel}`;
+      cliArgs.push('--model', cliModel);
+    }
+
+    // Disable sandbox mode for faster execution (sandbox adds overhead)
+    cliArgs.push('--sandbox', 'false');
+
+    // YOLO mode for automatic approval (required for non-interactive use)
+    // Use explicit approval-mode for clearer semantics
+    cliArgs.push('--approval-mode', 'yolo');
+
+    // Explicitly include the working directory in allowed workspace directories
+    // This ensures Gemini CLI allows file operations in the project directory,
+    // even if it has a different workspace cached from a previous session
+    if (options.cwd) {
+      cliArgs.push('--include-directories', options.cwd);
+    }
+
+    // Note: Gemini CLI doesn't have a --thinking-level flag.
+    // Thinking capabilities are determined by the model selection (e.g., gemini-2.5-pro).
+    // The model handles thinking internally based on the task complexity.
+
+    // The prompt will be passed as the last positional argument
+    // We'll append it in executeQuery after extracting the text
+
+    return cliArgs;
+  }
+
+  /**
+   * Convert Gemini event to AutoMaker ProviderMessage format
+   */
+  normalizeEvent(event: unknown): ProviderMessage | null {
+    const geminiEvent = event as GeminiStreamEvent;
+
+    switch (geminiEvent.type) {
+      case 'init': {
+        // Init event - capture session but don't yield a message
+        const initEvent = geminiEvent as GeminiInitEvent;
+        logger.debug(
+          `Gemini init event: session=${initEvent.session_id}, model=${initEvent.model}`
+        );
+        return null;
+      }
+
+      case 'message': {
+        const messageEvent = geminiEvent as GeminiMessageEvent;
+
+        // Skip user messages - already handled by caller
+        if (messageEvent.role === 'user') {
+          return null;
+        }
+
+        // Handle assistant messages
+        if (messageEvent.role === 'assistant') {
+          return {
+            type: 'assistant',
+            session_id: messageEvent.session_id,
+            message: {
+              role: 'assistant',
+              content: [{ type: 'text', text: messageEvent.content }],
+            },
+          };
+        }
+
+        return null;
+      }
+
+      case 'tool_use': {
+        const toolEvent = geminiEvent as GeminiToolUseEvent;
+        const normalizedName = normalizeGeminiToolName(toolEvent.tool_name);
+        const normalizedInput = normalizeGeminiToolInput(
+          toolEvent.tool_name,
+          toolEvent.parameters as Record<string, unknown>
+        );
+
+        return {
+          type: 'assistant',
+          session_id: toolEvent.session_id,
+          message: {
+            role: 'assistant',
+            content: [
+              {
+                type: 'tool_use',
+                name: normalizedName,
+                tool_use_id: toolEvent.tool_id,
+                input: normalizedInput,
+              },
+            ],
+          },
+        };
+      }
+
+      case 'tool_result': {
+        const toolResultEvent = geminiEvent as GeminiToolResultEvent;
+        // If tool result is an error, prefix with error indicator
+        const content =
+          toolResultEvent.status === 'error'
+            ? `[ERROR] ${toolResultEvent.output}`
+            : toolResultEvent.output;
+        return {
+          type: 'assistant',
+          session_id: toolResultEvent.session_id,
+          message: {
+            role: 'assistant',
+            content: [
+              {
+                type: 'tool_result',
+                tool_use_id: toolResultEvent.tool_id,
+                content,
+              },
+            ],
+          },
+        };
+      }
+
+      case 'result': {
+        const resultEvent = geminiEvent as GeminiResultEvent;
+
+        if (resultEvent.status === 'error') {
+          return {
+            type: 'error',
+            session_id: resultEvent.session_id,
+            error: resultEvent.error || 'Unknown error',
+          };
+        }
+
+        // Success result - include stats for logging
+        logger.debug(
+          `Gemini result: status=${resultEvent.status}, tokens=${resultEvent.stats?.total_tokens}`
+        );
+        return {
+          type: 'result',
+          subtype: 'success',
+          session_id: resultEvent.session_id,
+        };
+      }
+
+      case 'error': {
+        const errorEvent = geminiEvent as GeminiResultEvent;
+        return {
+          type: 'error',
+          session_id: errorEvent.session_id,
+          error: errorEvent.error || 'Unknown error',
+        };
+      }
+
+      default:
+        logger.debug(`Unknown Gemini event type: ${geminiEvent.type}`);
+        return null;
+    }
+  }
+
+  // ==========================================================================
+  // CliProvider Overrides
+  // ==========================================================================
+
+  /**
+   * Override error mapping for Gemini-specific error codes
+   */
+  protected mapError(stderr: string, exitCode: number | null): CliErrorInfo {
+    const lower = stderr.toLowerCase();
+
+    if (
+      lower.includes('not authenticated') ||
+      lower.includes('please log in') ||
+      lower.includes('unauthorized') ||
+      lower.includes('login required') ||
+      lower.includes('error authenticating') ||
+      lower.includes('loadcodeassist') ||
+      (lower.includes('econnrefused') && lower.includes('8888'))
+    ) {
+      return {
+        code: GeminiErrorCode.NOT_AUTHENTICATED,
+        message: 'Gemini CLI is not authenticated',
+        recoverable: true,
+        suggestion:
+          'Run "gemini" interactively to log in, or set GEMINI_API_KEY environment variable',
+      };
+    }
+
+    if (
+      lower.includes('rate limit') ||
+      lower.includes('too many requests') ||
+      lower.includes('429') ||
+      lower.includes('quota exceeded')
+    ) {
+      return {
+        code: GeminiErrorCode.RATE_LIMITED,
+        message: 'Gemini API rate limit exceeded',
+        recoverable: true,
+        suggestion: 'Wait a few minutes and try again. Free tier: 60 req/min, 1000 req/day',
+      };
+    }
+
+    if (
+      lower.includes('model not available') ||
+      lower.includes('invalid model') ||
+      lower.includes('unknown model') ||
+      lower.includes('modelnotfounderror') ||
+      lower.includes('model not found') ||
+      (lower.includes('not found') && lower.includes('404'))
+    ) {
+      return {
+        code: GeminiErrorCode.MODEL_UNAVAILABLE,
+        message: 'Requested model is not available',
+        recoverable: true,
+        suggestion: 'Try using "gemini-2.5-flash" or select a different model',
+      };
+    }
+
+    if (
+      lower.includes('network') ||
+      lower.includes('connection') ||
+      lower.includes('econnrefused') ||
+      lower.includes('timeout')
+    ) {
+      return {
+        code: GeminiErrorCode.NETWORK_ERROR,
+        message: 'Network connection error',
+        recoverable: true,
+        suggestion: 'Check your internet connection and try again',
+      };
+    }
+
+    if (exitCode === 137 || lower.includes('killed') || lower.includes('sigterm')) {
+      return {
+        code: GeminiErrorCode.PROCESS_CRASHED,
+        message: 'Gemini CLI process was terminated',
+        recoverable: true,
+        suggestion: 'The process may have run out of memory. Try a simpler task.',
+      };
+    }
+
+    return {
+      code: GeminiErrorCode.UNKNOWN,
+      message: stderr || `Gemini CLI exited with code ${exitCode}`,
+      recoverable: false,
+    };
+  }
+
+  /**
+   * Override install instructions for Gemini-specific guidance
+   */
+  protected getInstallInstructions(): string {
+    return 'Install with: npm install -g @google/gemini-cli (or visit https://github.com/google-gemini/gemini-cli)';
+  }
+
+  /**
+   * Execute a prompt using Gemini CLI with streaming
+   */
+  async *executeQuery(options: ExecuteOptions): AsyncGenerator<ProviderMessage> {
+    this.ensureCliDetected();
+
+    // Validate that model doesn't have a provider prefix
+    validateBareModelId(options.model, 'GeminiProvider');
+
+    if (!this.cliPath) {
+      throw this.createError(
+        GeminiErrorCode.NOT_INSTALLED,
+        'Gemini CLI is not installed',
+        true,
+        this.getInstallInstructions()
+      );
+    }
+
+    // Extract prompt text to pass as positional argument
+    const promptText = this.extractPromptText(options);
+
+    // Build CLI args and append the prompt as the last positional argument
+    const cliArgs = this.buildCliArgs(options);
+    cliArgs.push(promptText); // Gemini CLI uses positional args for the prompt
+
+    const subprocessOptions = this.buildSubprocessOptions(options, cliArgs);
+
+    let sessionId: string | undefined;
+
+    logger.debug(`GeminiProvider.executeQuery called with model: "${options.model}"`);
+
+    try {
+      for await (const rawEvent of spawnJSONLProcess(subprocessOptions)) {
+        const event = rawEvent as GeminiStreamEvent;
+
+        // Capture session ID from init event
+        if (event.type === 'init') {
+          const initEvent = event as GeminiInitEvent;
+          sessionId = initEvent.session_id;
+          logger.debug(`Session started: ${sessionId}, model: ${initEvent.model}`);
+        }
+
+        // Normalize and yield the event
+        const normalized = this.normalizeEvent(event);
+        if (normalized) {
+          if (!normalized.session_id && sessionId) {
+            normalized.session_id = sessionId;
+          }
+          yield normalized;
+        }
+      }
+    } catch (error) {
+      if (isAbortError(error)) {
+        logger.debug('Query aborted');
+        return;
+      }
+
+      // Map CLI errors to GeminiError
+      if (error instanceof Error && 'stderr' in error) {
+        const errorInfo = this.mapError(
+          (error as { stderr?: string }).stderr || error.message,
+          (error as { exitCode?: number | null }).exitCode ?? null
+        );
+        throw this.createError(
+          errorInfo.code as GeminiErrorCode,
+          errorInfo.message,
+          errorInfo.recoverable,
+          errorInfo.suggestion
+        );
+      }
+      throw error;
+    }
+  }
+
+  // ==========================================================================
+  // Gemini-Specific Methods
+  // ==========================================================================
+
+  /**
+   * Create a GeminiError with details
+   */
+  private createError(
+    code: GeminiErrorCode,
+    message: string,
+    recoverable: boolean = false,
+    suggestion?: string
+  ): GeminiError {
+    const error = new Error(message) as GeminiError;
+    error.code = code;
+    error.recoverable = recoverable;
+    error.suggestion = suggestion;
+    error.name = 'GeminiError';
+    return error;
+  }
+
+  /**
+   * Get Gemini CLI version
+   */
+  async getVersion(): Promise<string | null> {
+    this.ensureCliDetected();
+    if (!this.cliPath) return null;
+
+    try {
+      const result = execSync(`"${this.cliPath}" --version`, {
+        encoding: 'utf8',
+        timeout: 5000,
+        stdio: 'pipe',
+      }).trim();
+      return result;
+    } catch {
+      return null;
+    }
+  }
+
+  /**
+   * Check authentication status
+   *
+   * Uses a fast credential check approach:
+   * 1. Check for GEMINI_API_KEY environment variable
+   * 2. Check for Google Cloud credentials
+   * 3. Check for Gemini settings file with stored credentials
+   * 4. Quick CLI auth test with --help (fast, doesn't make API calls)
+   */
+  async checkAuth(): Promise<GeminiAuthStatus> {
+    this.ensureCliDetected();
+    if (!this.cliPath) {
+      logger.debug('checkAuth: CLI not found');
+      return { authenticated: false, method: 'none' };
+    }
+
+    logger.debug('checkAuth: Starting credential check');
+
+    // Determine the likely auth method based on environment
+    const hasApiKey = !!process.env.GEMINI_API_KEY;
+    const hasEnvApiKey = hasApiKey;
+    const hasVertexAi = !!(
+      process.env.GOOGLE_APPLICATION_CREDENTIALS || process.env.GOOGLE_CLOUD_PROJECT
+    );
+
+    logger.debug(`checkAuth: hasApiKey=${hasApiKey}, hasVertexAi=${hasVertexAi}`);
+
+    // Check for Gemini credentials file (~/.gemini/settings.json)
+    const geminiConfigDir = path.join(os.homedir(), '.gemini');
+    const settingsPath = path.join(geminiConfigDir, 'settings.json');
+    let hasCredentialsFile = false;
+    let authType: string | null = null;
+
+    try {
+      await fs.access(settingsPath);
+      logger.debug(`checkAuth: Found settings file at ${settingsPath}`);
+      try {
+        const content = await fs.readFile(settingsPath, 'utf8');
+        const settings = JSON.parse(content);
+
+        // Auth config is at security.auth.selectedType (e.g., "oauth-personal", "oauth-adc", "api-key")
+        const selectedType = settings?.security?.auth?.selectedType;
+        if (selectedType) {
+          hasCredentialsFile = true;
+          authType = selectedType;
+          logger.debug(`checkAuth: Settings file has auth config, selectedType=${selectedType}`);
+        } else {
+          logger.debug(`checkAuth: Settings file found but no auth type configured`);
+        }
+      } catch (e) {
+        logger.debug(`checkAuth: Failed to parse settings file: ${e}`);
+      }
+    } catch {
+      logger.debug('checkAuth: No settings file found');
+    }
+
+    // If we have an API key, we're authenticated
+    if (hasApiKey) {
+      logger.debug('checkAuth: Using API key authentication');
+      return {
+        authenticated: true,
+        method: 'api_key',
+        hasApiKey,
+        hasEnvApiKey,
+        hasCredentialsFile,
+      };
+    }
+
+    // If we have Vertex AI credentials, we're authenticated
+    if (hasVertexAi) {
+      logger.debug('checkAuth: Using Vertex AI authentication');
+      return {
+        authenticated: true,
+        method: 'vertex_ai',
+        hasApiKey,
+        hasEnvApiKey,
+        hasCredentialsFile,
+      };
+    }
+
+    // Check if settings file indicates configured authentication
+    if (hasCredentialsFile && authType) {
+      // OAuth types: "oauth-personal", "oauth-adc"
+      // API key type: "api-key"
+      // Code assist: "code-assist" (requires IDE integration)
+      if (authType.startsWith('oauth')) {
+        logger.debug(`checkAuth: OAuth authentication configured (${authType})`);
+        return {
+          authenticated: true,
+          method: 'google_login',
+          hasApiKey,
+          hasEnvApiKey,
+          hasCredentialsFile,
+        };
+      }
+
+      if (authType === 'api-key') {
+        logger.debug('checkAuth: API key authentication configured in settings');
+        return {
+          authenticated: true,
+          method: 'api_key',
+          hasApiKey,
+          hasEnvApiKey,
+          hasCredentialsFile,
+        };
+      }
+
+      if (authType === 'code-assist' || authType === 'codeassist') {
+        logger.debug('checkAuth: Code Assist auth configured but requires local server');
+        return {
+          authenticated: false,
+          method: 'google_login',
+          hasApiKey,
+          hasEnvApiKey,
+          hasCredentialsFile,
+          error:
+            'Code Assist authentication requires IDE integration. Please use "gemini" CLI to log in with a different method, or set GEMINI_API_KEY.',
+        };
+      }
+
+      // Unknown auth type but something is configured
+      logger.debug(`checkAuth: Unknown auth type configured: ${authType}`);
+      return {
+        authenticated: true,
+        method: 'google_login',
+        hasApiKey,
+        hasEnvApiKey,
+        hasCredentialsFile,
+      };
+    }
+
+    // No credentials found
+    logger.debug('checkAuth: No valid credentials found');
+    return {
+      authenticated: false,
+      method: 'none',
+      hasApiKey,
+      hasEnvApiKey,
+      hasCredentialsFile,
+      error:
+        'No authentication configured. Run "gemini" interactively to log in, or set GEMINI_API_KEY.',
+    };
+  }
+
+  /**
+   * Detect installation status (required by BaseProvider)
+   */
+  async detectInstallation(): Promise<InstallationStatus> {
+    const installed = await this.isInstalled();
+    const version = installed ? await this.getVersion() : undefined;
+    const auth = await this.checkAuth();
+
+    return {
+      installed,
+      version: version || undefined,
+      path: this.cliPath || undefined,
+      method: 'cli',
+      hasApiKey: !!process.env.GEMINI_API_KEY,
+      authenticated: auth.authenticated,
+    };
+  }
+
+  /**
+   * Get the detected CLI path (public accessor for status endpoints)
+   */
+  getCliPath(): string | null {
+    this.ensureCliDetected();
+    return this.cliPath;
+  }
+
+  /**
+   * Get available Gemini models
+   */
+  getAvailableModels(): ModelDefinition[] {
+    return Object.entries(GEMINI_MODEL_MAP).map(([id, config]) => ({
+      id, // Full model ID with gemini- prefix (e.g., 'gemini-2.5-flash')
+      name: config.label,
+      modelString: id, // Same as id - CLI uses the full model name
+      provider: 'gemini',
+      description: config.description,
+      supportsTools: true,
+      supportsVision: config.supportsVision,
+      contextWindow: config.contextWindow,
+    }));
+  }
+
+  /**
+   * Check if a feature is supported
+   */
+  supportsFeature(feature: string): boolean {
+    const supported = ['tools', 'text', 'streaming', 'vision', 'thinking'];
+    return supported.includes(feature);
+  }
+}
--- a/apps/server/src/providers/index.ts
+++ b/apps/server/src/providers/index.ts
@@ -0,0 +1,56 @@
+/**
+ * Provider exports
+ */
+
+// Base providers
+export { BaseProvider } from './base-provider.js';
+export {
+  CliProvider,
+  type SpawnStrategy,
+  type CliSpawnConfig,
+  type CliErrorInfo,
+} from './cli-provider.js';
+export type {
+  ProviderConfig,
+  ExecuteOptions,
+  ProviderMessage,
+  InstallationStatus,
+  ModelDefinition,
+  AgentDefinition,
+  ReasoningEffort,
+  SystemPromptPreset,
+  ConversationMessage,
+  ContentBlock,
+  ValidationResult,
+  McpServerConfig,
+  McpStdioServerConfig,
+  McpSSEServerConfig,
+  McpHttpServerConfig,
+} from './types.js';
+
+// Claude provider
+export { ClaudeProvider } from './claude-provider.js';
+
+// Cursor provider
+export { CursorProvider, CursorErrorCode, CursorError } from './cursor-provider.js';
+export { CursorConfigManager } from './cursor-config-manager.js';
+
+// OpenCode provider
+export { OpencodeProvider } from './opencode-provider.js';
+
+// Gemini provider
+export { GeminiProvider, GeminiErrorCode } from './gemini-provider.js';
+
+// Copilot provider (GitHub Copilot SDK)
+export { CopilotProvider, CopilotErrorCode } from './copilot-provider.js';
+
+// Provider factory
+export { ProviderFactory } from './provider-factory.js';
+
+// Simple query service - unified interface for basic AI queries
+export { simpleQuery, streamingQuery } from './simple-query-service.js';
+export type {
+  SimpleQueryOptions,
+  SimpleQueryResult,
+  StreamingQueryOptions,
+} from './simple-query-service.js';
--- a/apps/server/src/providers/opencode-provider.ts
+++ b/apps/server/src/providers/opencode-provider.ts
--- a/apps/server/src/providers/provider-factory.ts
+++ b/apps/server/src/providers/provider-factory.ts
@@ -1,51 +1,177 @@
 /**
 * Provider Factory - Routes model IDs to the appropriate provider
 *
- * This factory implements model-based routing to automatically select
- * the correct provider based on the model string. This makes adding
- * new providers (Cursor, OpenCode, etc.) trivial - just add one line.
+ * Uses a registry pattern for dynamic provider registration.
+ * Providers register themselves on import, making it easy to add new providers.
 */

 import { BaseProvider } from './base-provider.js';
-import { ClaudeProvider } from './claude-provider.js';
-import type { InstallationStatus } from './types.js';
+import type { InstallationStatus, ModelDefinition } from './types.js';
+import {
+  isCursorModel,
+  isCodexModel,
+  isOpencodeModel,
+  isGeminiModel,
+  isCopilotModel,
+  type ModelProvider,
+} from '@automaker/types';
+import * as fs from 'fs';
+import * as path from 'path';
+
+const DISCONNECTED_MARKERS: Record<string, string> = {
+  claude: '.claude-disconnected',
+  codex: '.codex-disconnected',
+  cursor: '.cursor-disconnected',
+  opencode: '.opencode-disconnected',
+  gemini: '.gemini-disconnected',
+  copilot: '.copilot-disconnected',
+};
+
+/**
+ * Check if a provider CLI is disconnected from the app
+ */
+export function isProviderDisconnected(providerName: string): boolean {
+  const markerFile = DISCONNECTED_MARKERS[providerName.toLowerCase()];
+  if (!markerFile) return false;
+
+  const markerPath = path.join(process.cwd(), '.automaker', markerFile);
+  return fs.existsSync(markerPath);
+}
+
+/**
+ * Provider registration entry
+ */
+interface ProviderRegistration {
+  /** Factory function to create provider instance */
+  factory: () => BaseProvider;
+  /** Aliases for this provider (e.g., 'anthropic' for 'claude') */
+  aliases?: string[];
+  /** Function to check if this provider can handle a model ID */
+  canHandleModel?: (modelId: string) => boolean;
+  /** Priority for model matching (higher = checked first) */
+  priority?: number;
+}
+
+/**
+ * Provider registry - stores registered providers
+ */
+const providerRegistry = new Map<string, ProviderRegistration>();
+
+/**
+ * Register a provider with the factory
+ *
+ * @param name Provider name (e.g., 'claude', 'cursor')
+ * @param registration Provider registration config
+ */
+export function registerProvider(name: string, registration: ProviderRegistration): void {
+  providerRegistry.set(name.toLowerCase(), registration);
+}

 export class ProviderFactory {
  /**
-   * Get the appropriate provider for a given model ID
+   * Determine which provider to use for a given model
   *
-   * @param modelId Model identifier (e.g., "claude-opus-4-5-20251101", "gpt-5.2", "cursor-fast")
-   * @returns Provider instance for the model
+   * @param model Model identifier
+   * @returns Provider name (ModelProvider type)
   */
-  static getProviderForModel(modelId: string): BaseProvider {
-    const lowerModel = modelId.toLowerCase();
+  static getProviderNameForModel(model: string): ModelProvider {
+    const lowerModel = model.toLowerCase();

-    // Claude models (claude-*, opus, sonnet, haiku)
-    if (lowerModel.startsWith('claude-') || ['haiku', 'sonnet', 'opus'].includes(lowerModel)) {
-      return new ClaudeProvider();
+    // Get all registered providers sorted by priority (descending)
+    const registrations = Array.from(providerRegistry.entries()).sort(
+      ([, a], [, b]) => (b.priority ?? 0) - (a.priority ?? 0)
+    );
+
+    // Check each provider's canHandleModel function
+    for (const [name, reg] of registrations) {
+      if (reg.canHandleModel?.(lowerModel)) {
+        return name as ModelProvider;
+      }
    }

-    // Future providers:
-    // if (lowerModel.startsWith("cursor-")) {
-    //   return new CursorProvider();
-    // }
-    // if (lowerModel.startsWith("opencode-")) {
-    //   return new OpenCodeProvider();
-    // }
+    // Fallback: Check for explicit prefixes
+    for (const [name] of registrations) {
+      if (lowerModel.startsWith(`${name}-`)) {
+        return name as ModelProvider;
+      }
+    }

-    // Default to Claude for unknown models
-    console.warn(`[ProviderFactory] Unknown model prefix for "${modelId}", defaulting to Claude`);
-    return new ClaudeProvider();
+    // Default to claude (first registered provider or claude)
+    return 'claude';
+  }
+
+  /**
+   * Get the appropriate provider for a given model ID
+   *
+   * @param modelId Model identifier (e.g., "claude-opus-4-5-20251101", "cursor-gpt-4o", "cursor-auto")
+   * @param options Optional settings
+   * @param options.throwOnDisconnected Throw error if provider is disconnected (default: true)
+   * @returns Provider instance for the model
+   * @throws Error if provider is disconnected and throwOnDisconnected is true
+   */
+  static getProviderForModel(
+    modelId: string,
+    options: { throwOnDisconnected?: boolean } = {}
+  ): BaseProvider {
+    const { throwOnDisconnected = true } = options;
+    const providerName = this.getProviderForModelName(modelId);
+
+    // Check if provider is disconnected
+    if (throwOnDisconnected && isProviderDisconnected(providerName)) {
+      throw new Error(
+        `${providerName.charAt(0).toUpperCase() + providerName.slice(1)} CLI is disconnected from the app. ` +
+          `Please go to Settings > Providers and click "Sign In" to reconnect.`
+      );
+    }
+
+    const provider = this.getProviderByName(providerName);
+
+    if (!provider) {
+      // Fallback to claude if provider not found
+      const claudeReg = providerRegistry.get('claude');
+      if (claudeReg) {
+        return claudeReg.factory();
+      }
+      throw new Error(`No provider found for model: ${modelId}`);
+    }
+
+    return provider;
+  }
+
+  /**
+   * Get the provider name for a given model ID (without creating provider instance)
+   */
+  static getProviderForModelName(modelId: string): string {
+    const lowerModel = modelId.toLowerCase();
+
+    // Get all registered providers sorted by priority (descending)
+    const registrations = Array.from(providerRegistry.entries()).sort(
+      ([, a], [, b]) => (b.priority ?? 0) - (a.priority ?? 0)
+    );
+
+    // Check each provider's canHandleModel function
+    for (const [name, reg] of registrations) {
+      if (reg.canHandleModel?.(lowerModel)) {
+        return name;
+      }
+    }
+
+    // Fallback: Check for explicit prefixes
+    for (const [name] of registrations) {
+      if (lowerModel.startsWith(`${name}-`)) {
+        return name;
+      }
+    }
+
+    // Default to claude (first registered provider or claude)
+    return 'claude';
  }

  /**
   * Get all available providers
   */
  static getAllProviders(): BaseProvider[] {
-    return [
-      new ClaudeProvider(),
-      // Future providers...
-    ];
+    return Array.from(providerRegistry.values()).map((reg) => reg.factory());
  }

  /**
@@ -54,11 +180,10 @@ export class ProviderFactory {
   * @returns Map of provider name to installation status
   */
  static async checkAllProviders(): Promise<Record<string, InstallationStatus>> {
-    const providers = this.getAllProviders();
    const statuses: Record<string, InstallationStatus> = {};

-    for (const provider of providers) {
-      const name = provider.getName();
+    for (const [name, reg] of providerRegistry.entries()) {
+      const provider = reg.factory();
      const status = await provider.detectInstallation();
      statuses[name] = status;
    }
@@ -69,40 +194,137 @@ export class ProviderFactory {
  /**
   * Get provider by name (for direct access if needed)
   *
-   * @param name Provider name (e.g., "claude", "cursor")
+   * @param name Provider name (e.g., "claude", "cursor") or alias (e.g., "anthropic")
   * @returns Provider instance or null if not found
   */
  static getProviderByName(name: string): BaseProvider | null {
    const lowerName = name.toLowerCase();

-    switch (lowerName) {
-      case 'claude':
-      case 'anthropic':
-        return new ClaudeProvider();
-
-      // Future providers:
-      // case "cursor":
-      //   return new CursorProvider();
-      // case "opencode":
-      //   return new OpenCodeProvider();
-
-      default:
-        return null;
+    // Direct lookup
+    const directReg = providerRegistry.get(lowerName);
+    if (directReg) {
+      return directReg.factory();
    }
+
+    // Check aliases
+    for (const [, reg] of providerRegistry.entries()) {
+      if (reg.aliases?.includes(lowerName)) {
+        return reg.factory();
+      }
+    }
+
+    return null;
  }

  /**
   * Get all available models from all providers
   */
-  static getAllAvailableModels() {
+  static getAllAvailableModels(): ModelDefinition[] {
    const providers = this.getAllProviders();
-    const allModels = [];
+    return providers.flatMap((p) => p.getAvailableModels());
+  }

-    for (const provider of providers) {
-      const models = provider.getAvailableModels();
-      allModels.push(...models);
+  /**
+   * Get list of registered provider names
+   */
+  static getRegisteredProviderNames(): string[] {
+    return Array.from(providerRegistry.keys());
+  }
+
+  /**
+   * Check if a specific model supports vision/image input
+   *
+   * @param modelId Model identifier
+   * @returns Whether the model supports vision (defaults to true if model not found)
+   */
+  static modelSupportsVision(modelId: string): boolean {
+    const provider = this.getProviderForModel(modelId);
+    const models = provider.getAvailableModels();
+
+    // Find the model in the available models list
+    for (const model of models) {
+      if (
+        model.id === modelId ||
+        model.modelString === modelId ||
+        model.id.endsWith(`-${modelId}`) ||
+        model.modelString.endsWith(`-${modelId}`) ||
+        model.modelString === modelId.replace(/^(claude|cursor|codex|gemini)-/, '') ||
+        model.modelString === modelId.replace(/-(claude|cursor|codex|gemini)$/, '')
+      ) {
+        return model.supportsVision ?? true;
+      }
    }

-    return allModels;
+    // Also try exact match with model string from provider's model map
+    for (const model of models) {
+      if (model.modelString === modelId || model.id === modelId) {
+        return model.supportsVision ?? true;
+      }
+    }
+
+    // Default to true (Claude SDK supports vision by default)
+    return true;
  }
 }
+
+// =============================================================================
+// Provider Registrations
+// =============================================================================
+
+// Import providers for registration side-effects
+import { ClaudeProvider } from './claude-provider.js';
+import { CursorProvider } from './cursor-provider.js';
+import { CodexProvider } from './codex-provider.js';
+import { OpencodeProvider } from './opencode-provider.js';
+import { GeminiProvider } from './gemini-provider.js';
+import { CopilotProvider } from './copilot-provider.js';
+
+// Register Claude provider
+registerProvider('claude', {
+  factory: () => new ClaudeProvider(),
+  aliases: ['anthropic'],
+  canHandleModel: (model: string) => {
+    return (
+      model.startsWith('claude-') || ['opus', 'sonnet', 'haiku'].some((n) => model.includes(n))
+    );
+  },
+  priority: 0, // Default priority
+});
+
+// Register Cursor provider
+registerProvider('cursor', {
+  factory: () => new CursorProvider(),
+  canHandleModel: (model: string) => isCursorModel(model),
+  priority: 10, // Higher priority - check Cursor models first
+});
+
+// Register Codex provider
+registerProvider('codex', {
+  factory: () => new CodexProvider(),
+  aliases: ['openai'],
+  canHandleModel: (model: string) => isCodexModel(model),
+  priority: 5, // Medium priority - check after Cursor but before Claude
+});
+
+// Register OpenCode provider
+registerProvider('opencode', {
+  factory: () => new OpencodeProvider(),
+  canHandleModel: (model: string) => isOpencodeModel(model),
+  priority: 3, // Between codex (5) and claude (0)
+});
+
+// Register Gemini provider
+registerProvider('gemini', {
+  factory: () => new GeminiProvider(),
+  aliases: ['google'],
+  canHandleModel: (model: string) => isGeminiModel(model),
+  priority: 4, // Between opencode (3) and codex (5)
+});
+
+// Register Copilot provider (GitHub Copilot SDK)
+registerProvider('copilot', {
+  factory: () => new CopilotProvider(),
+  aliases: ['github-copilot', 'github'],
+  canHandleModel: (model: string) => isCopilotModel(model),
+  priority: 6, // High priority - check before Codex since both can handle GPT models
+});
--- a/apps/server/src/providers/simple-query-service.ts
+++ b/apps/server/src/providers/simple-query-service.ts
@@ -0,0 +1,275 @@
+/**
+ * Simple Query Service - Simplified interface for basic AI queries
+ *
+ * Use this for routes that need simple text responses without
+ * complex event handling. This service abstracts away the provider
+ * selection and streaming details, providing a clean interface
+ * for common query patterns.
+ *
+ * Benefits:
+ * - No direct SDK imports needed in route files
+ * - Consistent provider routing based on model
+ * - Automatic text extraction from streaming responses
+ * - Structured output support for JSON schema responses
+ * - Eliminates duplicate extractTextFromStream() functions
+ */
+
+import { ProviderFactory } from './provider-factory.js';
+import type {
+  ProviderMessage,
+  ContentBlock,
+  ThinkingLevel,
+  ReasoningEffort,
+  ClaudeApiProfile,
+  ClaudeCompatibleProvider,
+  Credentials,
+} from '@automaker/types';
+import { stripProviderPrefix } from '@automaker/types';
+
+/**
+ * Options for simple query execution
+ */
+export interface SimpleQueryOptions {
+  /** The prompt to send to the AI (can be text or multi-part content) */
+  prompt: string | Array<{ type: string; text?: string; source?: object }>;
+  /** Model to use (with or without provider prefix) */
+  model?: string;
+  /** Working directory for the query */
+  cwd: string;
+  /** System prompt (combined with user prompt for some providers) */
+  systemPrompt?: string;
+  /** Maximum turns for agentic operations (default: 1) */
+  maxTurns?: number;
+  /** Tools to allow (default: [] for simple queries) */
+  allowedTools?: string[];
+  /** Abort controller for cancellation */
+  abortController?: AbortController;
+  /** Structured output format for JSON responses */
+  outputFormat?: {
+    type: 'json_schema';
+    schema: Record<string, unknown>;
+  };
+  /** Thinking level for Claude models */
+  thinkingLevel?: ThinkingLevel;
+  /** Reasoning effort for Codex/OpenAI models */
+  reasoningEffort?: ReasoningEffort;
+  /** If true, runs in read-only mode (no file writes) */
+  readOnly?: boolean;
+  /** Setting sources for CLAUDE.md loading */
+  settingSources?: Array<'user' | 'project' | 'local'>;
+  /**
+   * Active Claude API profile for alternative endpoint configuration
+   * @deprecated Use claudeCompatibleProvider instead
+   */
+  claudeApiProfile?: ClaudeApiProfile;
+  /**
+   * Claude-compatible provider for alternative endpoint configuration.
+   * Takes precedence over claudeApiProfile if both are set.
+   */
+  claudeCompatibleProvider?: ClaudeCompatibleProvider;
+  /** Credentials for resolving 'credentials' apiKeySource in Claude API profiles/providers */
+  credentials?: Credentials;
+}
+
+/**
+ * Result from a simple query
+ */
+export interface SimpleQueryResult {
+  /** The accumulated text response */
+  text: string;
+  /** Structured output if outputFormat was specified and provider supports it */
+  structured_output?: Record<string, unknown>;
+}
+
+/**
+ * Options for streaming query execution
+ */
+export interface StreamingQueryOptions extends SimpleQueryOptions {
+  /** Callback for each text chunk received */
+  onText?: (text: string) => void;
+  /** Callback for tool use events */
+  onToolUse?: (tool: string, input: unknown) => void;
+  /** Callback for thinking blocks (if available) */
+  onThinking?: (thinking: string) => void;
+}
+
+/**
+ * Default model to use when none specified
+ */
+const DEFAULT_MODEL = 'claude-sonnet-4-20250514';
+
+/**
+ * Execute a simple query and return the text result
+ *
+ * Use this for simple, non-streaming queries where you just need
+ * the final text response. For more complex use cases with progress
+ * callbacks, use streamingQuery() instead.
+ *
+ * @example
+ * ```typescript
+ * const result = await simpleQuery({
+ *   prompt: 'Generate a title for: user authentication',
+ *   cwd: process.cwd(),
+ *   systemPrompt: 'You are a title generator...',
+ *   maxTurns: 1,
+ *   allowedTools: [],
+ * });
+ * console.log(result.text); // "Add user authentication"
+ * ```
+ */
+export async function simpleQuery(options: SimpleQueryOptions): Promise<SimpleQueryResult> {
+  const model = options.model || DEFAULT_MODEL;
+  const provider = ProviderFactory.getProviderForModel(model);
+  const bareModel = stripProviderPrefix(model);
+
+  let responseText = '';
+  let structuredOutput: Record<string, unknown> | undefined;
+
+  // Build provider options
+  const providerOptions = {
+    prompt: options.prompt,
+    model: bareModel,
+    originalModel: model,
+    cwd: options.cwd,
+    systemPrompt: options.systemPrompt,
+    maxTurns: options.maxTurns ?? 1,
+    allowedTools: options.allowedTools ?? [],
+    abortController: options.abortController,
+    outputFormat: options.outputFormat,
+    thinkingLevel: options.thinkingLevel,
+    reasoningEffort: options.reasoningEffort,
+    readOnly: options.readOnly,
+    settingSources: options.settingSources,
+    claudeApiProfile: options.claudeApiProfile, // Legacy: Pass active Claude API profile for alternative endpoint configuration
+    claudeCompatibleProvider: options.claudeCompatibleProvider, // New: Pass Claude-compatible provider (takes precedence)
+    credentials: options.credentials, // Pass credentials for resolving 'credentials' apiKeySource
+  };
+
+  for await (const msg of provider.executeQuery(providerOptions)) {
+    // Handle error messages
+    if (msg.type === 'error') {
+      const errorMessage = msg.error || 'Provider returned an error';
+      throw new Error(errorMessage);
+    }
+
+    // Extract text from assistant messages
+    if (msg.type === 'assistant' && msg.message?.content) {
+      for (const block of msg.message.content) {
+        if (block.type === 'text' && block.text) {
+          responseText += block.text;
+        }
+      }
+    }
+
+    // Handle result messages
+    if (msg.type === 'result') {
+      if (msg.subtype === 'success') {
+        // Use result text if longer than accumulated text
+        if (msg.result && msg.result.length > responseText.length) {
+          responseText = msg.result;
+        }
+        // Capture structured output if present
+        if (msg.structured_output) {
+          structuredOutput = msg.structured_output;
+        }
+      } else if (msg.subtype === 'error_max_turns') {
+        // Max turns reached - return what we have
+        break;
+      } else if (msg.subtype === 'error_max_structured_output_retries') {
+        throw new Error('Could not produce valid structured output after retries');
+      }
+    }
+  }
+
+  return { text: responseText, structured_output: structuredOutput };
+}
+
+/**
+ * Execute a streaming query with event callbacks
+ *
+ * Use this for queries where you need real-time progress updates,
+ * such as when displaying streaming output to a user.
+ *
+ * @example
+ * ```typescript
+ * const result = await streamingQuery({
+ *   prompt: 'Analyze this project and suggest improvements',
+ *   cwd: '/path/to/project',
+ *   maxTurns: 250,
+ *   allowedTools: ['Read', 'Glob', 'Grep'],
+ *   onText: (text) => emitProgress(text),
+ *   onToolUse: (tool, input) => emitToolUse(tool, input),
+ * });
+ * ```
+ */
+export async function streamingQuery(options: StreamingQueryOptions): Promise<SimpleQueryResult> {
+  const model = options.model || DEFAULT_MODEL;
+  const provider = ProviderFactory.getProviderForModel(model);
+  const bareModel = stripProviderPrefix(model);
+
+  let responseText = '';
+  let structuredOutput: Record<string, unknown> | undefined;
+
+  // Build provider options
+  const providerOptions = {
+    prompt: options.prompt,
+    model: bareModel,
+    originalModel: model,
+    cwd: options.cwd,
+    systemPrompt: options.systemPrompt,
+    maxTurns: options.maxTurns ?? 250,
+    allowedTools: options.allowedTools ?? ['Read', 'Glob', 'Grep'],
+    abortController: options.abortController,
+    outputFormat: options.outputFormat,
+    thinkingLevel: options.thinkingLevel,
+    reasoningEffort: options.reasoningEffort,
+    readOnly: options.readOnly,
+    settingSources: options.settingSources,
+    claudeApiProfile: options.claudeApiProfile, // Legacy: Pass active Claude API profile for alternative endpoint configuration
+    claudeCompatibleProvider: options.claudeCompatibleProvider, // New: Pass Claude-compatible provider (takes precedence)
+    credentials: options.credentials, // Pass credentials for resolving 'credentials' apiKeySource
+  };
+
+  for await (const msg of provider.executeQuery(providerOptions)) {
+    // Handle error messages
+    if (msg.type === 'error') {
+      const errorMessage = msg.error || 'Provider returned an error';
+      throw new Error(errorMessage);
+    }
+
+    // Extract content from assistant messages
+    if (msg.type === 'assistant' && msg.message?.content) {
+      for (const block of msg.message.content) {
+        if (block.type === 'text' && block.text) {
+          responseText += block.text;
+          options.onText?.(block.text);
+        } else if (block.type === 'tool_use' && block.name) {
+          options.onToolUse?.(block.name, block.input);
+        } else if (block.type === 'thinking' && block.thinking) {
+          options.onThinking?.(block.thinking);
+        }
+      }
+    }
+
+    // Handle result messages
+    if (msg.type === 'result') {
+      if (msg.subtype === 'success') {
+        // Use result text if longer than accumulated text
+        if (msg.result && msg.result.length > responseText.length) {
+          responseText = msg.result;
+        }
+        // Capture structured output if present
+        if (msg.structured_output) {
+          structuredOutput = msg.structured_output;
+        }
+      } else if (msg.subtype === 'error_max_turns') {
+        // Max turns reached - return what we have
+        break;
+      } else if (msg.subtype === 'error_max_structured_output_retries') {
+        throw new Error('Could not produce valid structured output after retries');
+      }
+    }
+  }
+
+  return { text: responseText, structured_output: structuredOutput };
+}
--- a/apps/server/src/providers/tool-normalization.ts
+++ b/apps/server/src/providers/tool-normalization.ts
@@ -0,0 +1,112 @@
+/**
+ * Shared tool normalization utilities for AI providers
+ *
+ * These utilities help normalize tool inputs from various AI providers
+ * to the standard format expected by the application.
+ */
+
+/**
+ * Valid todo status values in the standard format
+ */
+type TodoStatus = 'pending' | 'in_progress' | 'completed';
+
+/**
+ * Set of valid status values for validation
+ */
+const VALID_STATUSES = new Set<TodoStatus>(['pending', 'in_progress', 'completed']);
+
+/**
+ * Todo item from various AI providers (Gemini, Copilot, etc.)
+ */
+interface ProviderTodo {
+  description?: string;
+  content?: string;
+  status?: string;
+}
+
+/**
+ * Standard todo format used by the application
+ */
+interface NormalizedTodo {
+  content: string;
+  status: TodoStatus;
+  activeForm: string;
+}
+
+/**
+ * Normalize a provider status value to a valid TodoStatus
+ */
+function normalizeStatus(status: string | undefined): TodoStatus {
+  if (!status) return 'pending';
+  if (status === 'cancelled' || status === 'canceled') return 'completed';
+  if (VALID_STATUSES.has(status as TodoStatus)) return status as TodoStatus;
+  return 'pending';
+}
+
+/**
+ * Normalize todos array from provider format to standard format
+ *
+ * Handles different formats from providers:
+ * - Gemini: { description, status } with 'cancelled' as possible status
+ * - Copilot: { content/description, status } with 'cancelled' as possible status
+ *
+ * Output format (Claude/Standard):
+ * - { content, status, activeForm } where status is 'pending'|'in_progress'|'completed'
+ */
+export function normalizeTodos(todos: ProviderTodo[] | null | undefined): NormalizedTodo[] {
+  if (!todos) return [];
+  return todos.map((todo) => ({
+    content: todo.content || todo.description || '',
+    status: normalizeStatus(todo.status),
+    // Use content/description as activeForm since providers may not have it
+    activeForm: todo.content || todo.description || '',
+  }));
+}
+
+/**
+ * Normalize file path parameters from various provider formats
+ *
+ * Different providers use different parameter names for file paths:
+ * - path, file, filename, filePath -> file_path
+ */
+export function normalizeFilePathInput(input: Record<string, unknown>): Record<string, unknown> {
+  const normalized = { ...input };
+  if (!normalized.file_path) {
+    if (input.path) normalized.file_path = input.path;
+    else if (input.file) normalized.file_path = input.file;
+    else if (input.filename) normalized.file_path = input.filename;
+    else if (input.filePath) normalized.file_path = input.filePath;
+  }
+  return normalized;
+}
+
+/**
+ * Normalize shell command parameters from various provider formats
+ *
+ * Different providers use different parameter names for commands:
+ * - cmd, script -> command
+ */
+export function normalizeCommandInput(input: Record<string, unknown>): Record<string, unknown> {
+  const normalized = { ...input };
+  if (!normalized.command) {
+    if (input.cmd) normalized.command = input.cmd;
+    else if (input.script) normalized.command = input.script;
+  }
+  return normalized;
+}
+
+/**
+ * Normalize search pattern parameters from various provider formats
+ *
+ * Different providers use different parameter names for search patterns:
+ * - query, search, regex -> pattern
+ */
+export function normalizePatternInput(input: Record<string, unknown>): Record<string, unknown> {
+  const normalized = { ...input };
+  if (!normalized.pattern) {
+    if (input.query) normalized.pattern = input.query;
+    else if (input.search) normalized.pattern = input.search;
+    else if (input.regex) normalized.pattern = input.regex;
+  }
+  return normalized;
+}
--- a/apps/server/src/providers/types.ts
+++ b/apps/server/src/providers/types.ts
@@ -1,104 +1,25 @@
 /**
 * Shared types for AI model providers
+ *
+ * Re-exports types from @automaker/types for consistency across the codebase.
+ * All provider types are defined in @automaker/types to avoid duplication.
 */

-/**
- * Configuration for a provider instance
- */
-export interface ProviderConfig {
-  apiKey?: string;
-  cliPath?: string;
-  env?: Record<string, string>;
-}
-
-/**
- * Message in conversation history
- */
-export interface ConversationMessage {
-  role: 'user' | 'assistant';
-  content: string | Array<{ type: string; text?: string; source?: object }>;
-}
-
-/**
- * Options for executing a query via a provider
- */
-export interface ExecuteOptions {
-  prompt: string | Array<{ type: string; text?: string; source?: object }>;
-  model: string;
-  cwd: string;
-  systemPrompt?: string;
-  maxTurns?: number;
-  allowedTools?: string[];
-  mcpServers?: Record<string, unknown>;
-  abortController?: AbortController;
-  conversationHistory?: ConversationMessage[]; // Previous messages for context
-  sdkSessionId?: string; // Claude SDK session ID for resuming conversations
-}
-
-/**
- * Content block in a provider message (matches Claude SDK format)
- */
-export interface ContentBlock {
-  type: 'text' | 'tool_use' | 'thinking' | 'tool_result';
-  text?: string;
-  thinking?: string;
-  name?: string;
-  input?: unknown;
-  tool_use_id?: string;
-  content?: string;
-}
-
-/**
- * Message returned by a provider (matches Claude SDK streaming format)
- */
-export interface ProviderMessage {
-  type: 'assistant' | 'user' | 'error' | 'result';
-  subtype?: 'success' | 'error';
-  session_id?: string;
-  message?: {
-    role: 'user' | 'assistant';
-    content: ContentBlock[];
-  };
-  result?: string;
-  error?: string;
-  parent_tool_use_id?: string | null;
-}
-
-/**
- * Installation status for a provider
- */
-export interface InstallationStatus {
-  installed: boolean;
-  path?: string;
-  version?: string;
-  method?: 'cli' | 'npm' | 'brew' | 'sdk';
-  hasApiKey?: boolean;
-  authenticated?: boolean;
-  error?: string;
-}
-
-/**
- * Validation result
- */
-export interface ValidationResult {
-  valid: boolean;
-  errors: string[];
-  warnings?: string[];
-}
-
-/**
- * Model definition
- */
-export interface ModelDefinition {
-  id: string;
-  name: string;
-  modelString: string;
-  provider: string;
-  description: string;
-  contextWindow?: number;
-  maxOutputTokens?: number;
-  supportsVision?: boolean;
-  supportsTools?: boolean;
-  tier?: 'basic' | 'standard' | 'premium';
-  default?: boolean;
-}
+// Re-export all provider types from @automaker/types
+export type {
+  ProviderConfig,
+  ConversationMessage,
+  ExecuteOptions,
+  McpServerConfig,
+  McpStdioServerConfig,
+  McpSSEServerConfig,
+  McpHttpServerConfig,
+  ContentBlock,
+  ProviderMessage,
+  InstallationStatus,
+  ValidationResult,
+  ModelDefinition,
+  AgentDefinition,
+  ReasoningEffort,
+  SystemPromptPreset,
+} from '@automaker/types';
--- a/apps/server/src/routes/agent/index.ts
+++ b/apps/server/src/routes/agent/index.ts
@@ -12,6 +12,10 @@ import { createHistoryHandler } from './routes/history.js';
 import { createStopHandler } from './routes/stop.js';
 import { createClearHandler } from './routes/clear.js';
 import { createModelHandler } from './routes/model.js';
+import { createQueueAddHandler } from './routes/queue-add.js';
+import { createQueueListHandler } from './routes/queue-list.js';
+import { createQueueRemoveHandler } from './routes/queue-remove.js';
+import { createQueueClearHandler } from './routes/queue-clear.js';

 export function createAgentRoutes(agentService: AgentService, _events: EventEmitter): Router {
  const router = Router();
@@ -27,5 +31,15 @@ export function createAgentRoutes(agentService: AgentService, _events: EventEmit
  router.post('/clear', createClearHandler(agentService));
  router.post('/model', createModelHandler(agentService));

+  // Queue routes
+  router.post(
+    '/queue/add',
+    validatePathParams('imagePaths[]'),
+    createQueueAddHandler(agentService)
+  );
+  router.post('/queue/list', createQueueListHandler(agentService));
+  router.post('/queue/remove', createQueueRemoveHandler(agentService));
+  router.post('/queue/clear', createQueueClearHandler(agentService));
+
  return router;
 }
--- a/apps/server/src/routes/agent/routes/queue-add.ts
+++ b/apps/server/src/routes/agent/routes/queue-add.ts
@@ -0,0 +1,41 @@
+/**
+ * POST /queue/add endpoint - Add a prompt to the queue
+ */
+
+import type { Request, Response } from 'express';
+import type { ThinkingLevel } from '@automaker/types';
+import { AgentService } from '../../../services/agent-service.js';
+import { getErrorMessage, logError } from '../common.js';
+
+export function createQueueAddHandler(agentService: AgentService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const { sessionId, message, imagePaths, model, thinkingLevel } = req.body as {
+        sessionId: string;
+        message: string;
+        imagePaths?: string[];
+        model?: string;
+        thinkingLevel?: ThinkingLevel;
+      };
+
+      if (!sessionId || !message) {
+        res.status(400).json({
+          success: false,
+          error: 'sessionId and message are required',
+        });
+        return;
+      }
+
+      const result = await agentService.addToQueue(sessionId, {
+        message,
+        imagePaths,
+        model,
+        thinkingLevel,
+      });
+      res.json(result);
+    } catch (error) {
+      logError(error, 'Add to queue failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/agent/routes/queue-clear.ts
+++ b/apps/server/src/routes/agent/routes/queue-clear.ts
@@ -0,0 +1,29 @@
+/**
+ * POST /queue/clear endpoint - Clear all prompts from the queue
+ */
+
+import type { Request, Response } from 'express';
+import { AgentService } from '../../../services/agent-service.js';
+import { getErrorMessage, logError } from '../common.js';
+
+export function createQueueClearHandler(agentService: AgentService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const { sessionId } = req.body as { sessionId: string };
+
+      if (!sessionId) {
+        res.status(400).json({
+          success: false,
+          error: 'sessionId is required',
+        });
+        return;
+      }
+
+      const result = await agentService.clearQueue(sessionId);
+      res.json(result);
+    } catch (error) {
+      logError(error, 'Clear queue failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/agent/routes/queue-list.ts
+++ b/apps/server/src/routes/agent/routes/queue-list.ts
@@ -0,0 +1,29 @@
+/**
+ * POST /queue/list endpoint - List queued prompts
+ */
+
+import type { Request, Response } from 'express';
+import { AgentService } from '../../../services/agent-service.js';
+import { getErrorMessage, logError } from '../common.js';
+
+export function createQueueListHandler(agentService: AgentService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const { sessionId } = req.body as { sessionId: string };
+
+      if (!sessionId) {
+        res.status(400).json({
+          success: false,
+          error: 'sessionId is required',
+        });
+        return;
+      }
+
+      const result = agentService.getQueue(sessionId);
+      res.json(result);
+    } catch (error) {
+      logError(error, 'List queue failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/agent/routes/queue-remove.ts
+++ b/apps/server/src/routes/agent/routes/queue-remove.ts
@@ -0,0 +1,32 @@
+/**
+ * POST /queue/remove endpoint - Remove a prompt from the queue
+ */
+
+import type { Request, Response } from 'express';
+import { AgentService } from '../../../services/agent-service.js';
+import { getErrorMessage, logError } from '../common.js';
+
+export function createQueueRemoveHandler(agentService: AgentService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const { sessionId, promptId } = req.body as {
+        sessionId: string;
+        promptId: string;
+      };
+
+      if (!sessionId || !promptId) {
+        res.status(400).json({
+          success: false,
+          error: 'sessionId and promptId are required',
+        });
+        return;
+      }
+
+      const result = await agentService.removeFromQueue(sessionId, promptId);
+      res.json(result);
+    } catch (error) {
+      logError(error, 'Remove from queue failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/agent/routes/send.ts
+++ b/apps/server/src/routes/agent/routes/send.ts
@@ -3,6 +3,7 @@
 */

 import type { Request, Response } from 'express';
+import type { ThinkingLevel } from '@automaker/types';
 import { AgentService } from '../../../services/agent-service.js';
 import { createLogger } from '@automaker/utils';
 import { getErrorMessage, logError } from '../common.js';
@@ -11,15 +12,27 @@ const logger = createLogger('Agent');
 export function createSendHandler(agentService: AgentService) {
  return async (req: Request, res: Response): Promise<void> => {
    try {
-      const { sessionId, message, workingDirectory, imagePaths, model } = req.body as {
-        sessionId: string;
-        message: string;
-        workingDirectory?: string;
-        imagePaths?: string[];
-        model?: string;
-      };
+      const { sessionId, message, workingDirectory, imagePaths, model, thinkingLevel } =
+        req.body as {
+          sessionId: string;
+          message: string;
+          workingDirectory?: string;
+          imagePaths?: string[];
+          model?: string;
+          thinkingLevel?: ThinkingLevel;
+        };
+
+      logger.debug('Received request:', {
+        sessionId,
+        messageLength: message?.length,
+        workingDirectory,
+        imageCount: imagePaths?.length || 0,
+        model,
+        thinkingLevel,
+      });

      if (!sessionId || !message) {
+        logger.warn('Validation failed - missing sessionId or message');
        res.status(400).json({
          success: false,
          error: 'sessionId and message are required',
@@ -27,6 +40,8 @@ export function createSendHandler(agentService: AgentService) {
        return;
      }

+      logger.debug('Validation passed, calling agentService.sendMessage()');
+
      // Start the message processing (don't await - it streams via WebSocket)
      agentService
        .sendMessage({
@@ -35,14 +50,19 @@ export function createSendHandler(agentService: AgentService) {
          workingDirectory,
          imagePaths,
          model,
+          thinkingLevel,
        })
        .catch((error) => {
+          logger.error('Background error in sendMessage():', error);
          logError(error, 'Send message failed (background)');
        });

+      logger.debug('Returning immediate response to client');
+
      // Return immediately - responses come via WebSocket
      res.json({ success: true, message: 'Message sent' });
    } catch (error) {
+      logger.error('Synchronous error:', error);
      logError(error, 'Send message failed');
      res.status(500).json({ success: false, error: getErrorMessage(error) });
    }
--- a/apps/server/src/routes/app-spec/common.ts
+++ b/apps/server/src/routes/app-spec/common.ts
@@ -6,26 +6,103 @@ import { createLogger } from '@automaker/utils';

 const logger = createLogger('SpecRegeneration');

-// Shared state for tracking generation status - private
-let isRunning = false;
-let currentAbortController: AbortController | null = null;
+// Types for running generation
+export type GenerationType = 'spec_regeneration' | 'feature_generation' | 'sync';
+
+interface RunningGeneration {
+  isRunning: boolean;
+  type: GenerationType;
+  startedAt: string;
+}
+
+// Shared state for tracking generation status - scoped by project path
+const runningProjects = new Map<string, RunningGeneration>();
+const abortControllers = new Map<string, AbortController>();

 /**
- * Get the current running state
+ * Get the running state for a specific project
 */
-export function getSpecRegenerationStatus(): {
+export function getSpecRegenerationStatus(projectPath?: string): {
  isRunning: boolean;
  currentAbortController: AbortController | null;
+  projectPath?: string;
+  type?: GenerationType;
+  startedAt?: string;
 } {
-  return { isRunning, currentAbortController };
+  if (projectPath) {
+    const generation = runningProjects.get(projectPath);
+    return {
+      isRunning: generation?.isRunning || false,
+      currentAbortController: abortControllers.get(projectPath) || null,
+      projectPath,
+      type: generation?.type,
+      startedAt: generation?.startedAt,
+    };
+  }
+  // Fallback: check if any project is running (for backward compatibility)
+  const isAnyRunning = Array.from(runningProjects.values()).some((g) => g.isRunning);
+  return { isRunning: isAnyRunning, currentAbortController: null };
 }

 /**
- * Set the running state and abort controller
+ * Get the project path that is currently running (if any)
 */
-export function setRunningState(running: boolean, controller: AbortController | null = null): void {
-  isRunning = running;
-  currentAbortController = controller;
+export function getRunningProjectPath(): string | null {
+  for (const [path, running] of runningProjects.entries()) {
+    if (running) return path;
+  }
+  return null;
+}
+
+/**
+ * Set the running state and abort controller for a specific project
+ */
+export function setRunningState(
+  projectPath: string,
+  running: boolean,
+  controller: AbortController | null = null,
+  type: GenerationType = 'spec_regeneration'
+): void {
+  if (running) {
+    runningProjects.set(projectPath, {
+      isRunning: true,
+      type,
+      startedAt: new Date().toISOString(),
+    });
+    if (controller) {
+      abortControllers.set(projectPath, controller);
+    }
+  } else {
+    runningProjects.delete(projectPath);
+    abortControllers.delete(projectPath);
+  }
+}
+
+/**
+ * Get all running spec/feature generations for the running agents view
+ */
+export function getAllRunningGenerations(): Array<{
+  projectPath: string;
+  type: GenerationType;
+  startedAt: string;
+}> {
+  const results: Array<{
+    projectPath: string;
+    type: GenerationType;
+    startedAt: string;
+  }> = [];
+
+  for (const [projectPath, generation] of runningProjects.entries()) {
+    if (generation.isRunning) {
+      results.push({
+        projectPath,
+        type: generation.type,
+        startedAt: generation.startedAt,
+      });
+    }
+  }
+
+  return results;
 }

 /**
--- a/apps/server/src/routes/app-spec/generate-features-from-spec.ts
+++ b/apps/server/src/routes/app-spec/generate-features-from-spec.ts
@@ -1,25 +1,95 @@
 /**
 * Generate features from existing app_spec.txt
+ *
+ * Model is configurable via phaseModels.featureGenerationModel in settings
+ * (defaults to Sonnet for balanced speed and quality).
 */

-import { query } from '@anthropic-ai/claude-agent-sdk';
 import * as secureFs from '../../lib/secure-fs.js';
 import type { EventEmitter } from '../../lib/events.js';
 import { createLogger } from '@automaker/utils';
-import { createFeatureGenerationOptions } from '../../lib/sdk-options.js';
-import { logAuthStatus } from './common.js';
+import { DEFAULT_PHASE_MODELS, supportsStructuredOutput, isCodexModel } from '@automaker/types';
+import { resolvePhaseModel } from '@automaker/model-resolver';
+import { streamingQuery } from '../../providers/simple-query-service.js';
 import { parseAndCreateFeatures } from './parse-and-create-features.js';
+import { extractJsonWithArray } from '../../lib/json-extractor.js';
 import { getAppSpecPath } from '@automaker/platform';
+import type { SettingsService } from '../../services/settings-service.js';
+import {
+  getAutoLoadClaudeMdSetting,
+  getPromptCustomization,
+  getPhaseModelWithOverrides,
+} from '../../lib/settings-helpers.js';
+import { FeatureLoader } from '../../services/feature-loader.js';

 const logger = createLogger('SpecRegeneration');

 const DEFAULT_MAX_FEATURES = 50;

+/**
+ * Timeout for Codex models when generating features (5 minutes).
+ * Codex models are slower and need more time to generate 50+ features.
+ */
+const CODEX_FEATURE_GENERATION_TIMEOUT_MS = 300000; // 5 minutes
+
+/**
+ * Type for extracted features JSON response
+ */
+interface FeaturesExtractionResult {
+  features: Array<{
+    id: string;
+    category?: string;
+    title: string;
+    description: string;
+    priority?: number;
+    complexity?: 'simple' | 'moderate' | 'complex';
+    dependencies?: string[];
+  }>;
+}
+
+/**
+ * JSON schema for features output format (Claude/Codex structured output)
+ */
+const featuresOutputSchema = {
+  type: 'object',
+  properties: {
+    features: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          id: { type: 'string', description: 'Unique feature identifier (kebab-case)' },
+          category: { type: 'string', description: 'Feature category' },
+          title: { type: 'string', description: 'Short, descriptive title' },
+          description: { type: 'string', description: 'Detailed feature description' },
+          priority: {
+            type: 'number',
+            description: 'Priority level: 1 (highest) to 5 (lowest)',
+          },
+          complexity: {
+            type: 'string',
+            enum: ['simple', 'moderate', 'complex'],
+            description: 'Implementation complexity',
+          },
+          dependencies: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'IDs of features this depends on',
+          },
+        },
+        required: ['id', 'title', 'description'],
+      },
+    },
+  },
+  required: ['features'],
+} as const;
+
 export async function generateFeaturesFromSpec(
  projectPath: string,
  events: EventEmitter,
  abortController: AbortController,
-  maxFeatures?: number
+  maxFeatures?: number,
+  settingsService?: SettingsService
 ): Promise<void> {
  const featureCount = maxFeatures ?? DEFAULT_MAX_FEATURES;
  logger.debug('========== generateFeaturesFromSpec() started ==========');
@@ -47,38 +117,48 @@ export async function generateFeaturesFromSpec(
    return;
  }

+  // Get customized prompts from settings
+  const prompts = await getPromptCustomization(settingsService, '[FeatureGeneration]');
+
+  // Load existing features to prevent duplicates
+  const featureLoader = new FeatureLoader();
+  const existingFeatures = await featureLoader.getAll(projectPath);
+
+  logger.info(`Found ${existingFeatures.length} existing features to exclude from generation`);
+
+  // Build existing features context for the prompt
+  let existingFeaturesContext = '';
+  if (existingFeatures.length > 0) {
+    const featuresList = existingFeatures
+      .map(
+        (f) =>
+          `- "${f.title}" (ID: ${f.id}): ${f.description?.substring(0, 100) || 'No description'}`
+      )
+      .join('\n');
+    existingFeaturesContext = `
+
+## EXISTING FEATURES (DO NOT REGENERATE THESE)
+
+The following ${existingFeatures.length} features already exist in the project. You MUST NOT generate features that duplicate or overlap with these:
+
+${featuresList}
+
+CRITICAL INSTRUCTIONS:
+- DO NOT generate any features with the same or similar titles as the existing features listed above
+- DO NOT generate features that cover the same functionality as existing features
+- ONLY generate NEW features that are not yet in the system
+- If a feature from the roadmap already exists, skip it entirely
+- Generate unique feature IDs that do not conflict with existing IDs: ${existingFeatures.map((f) => f.id).join(', ')}
+`;
+  }
+
  const prompt = `Based on this project specification:

 ${spec}
+${existingFeaturesContext}
+${prompts.appSpec.generateFeaturesFromSpecPrompt}

-Generate a prioritized list of implementable features. For each feature provide:
-
-1. **id**: A unique lowercase-hyphenated identifier
-2. **category**: Functional category (e.g., "Core", "UI", "API", "Authentication", "Database")
-3. **title**: Short descriptive title
-4. **description**: What this feature does (2-3 sentences)
-5. **priority**: 1 (high), 2 (medium), or 3 (low)
-6. **complexity**: "simple", "moderate", or "complex"
-7. **dependencies**: Array of feature IDs this depends on (can be empty)
-
-Format as JSON:
-{
-  "features": [
-    {
-      "id": "feature-id",
-      "category": "Feature Category",
-      "title": "Feature Title",
-      "description": "What it does",
-      "priority": 1,
-      "complexity": "moderate",
-      "dependencies": []
-    }
-  ]
-}
-
-Generate ${featureCount} features that build on each other logically.
-
-IMPORTANT: Do not ask for clarification. The specification is provided above. Generate the JSON immediately.`;
+Generate ${featureCount} NEW features that build on each other logically. Remember: ONLY generate features that DO NOT already exist.`;

  logger.info('========== PROMPT BEING SENT ==========');
  logger.info(`Prompt length: ${prompt.length} chars`);
@@ -91,72 +171,159 @@ IMPORTANT: Do not ask for clarification. The specification is provided above. Ge
    projectPath: projectPath,
  });

-  const options = createFeatureGenerationOptions({
+  // Load autoLoadClaudeMd setting
+  const autoLoadClaudeMd = await getAutoLoadClaudeMdSetting(
+    projectPath,
+    settingsService,
+    '[FeatureGeneration]'
+  );
+
+  // Get model from phase settings with provider info
+  const {
+    phaseModel: phaseModelEntry,
+    provider,
+    credentials,
+  } = settingsService
+    ? await getPhaseModelWithOverrides(
+        'featureGenerationModel',
+        settingsService,
+        projectPath,
+        '[FeatureGeneration]'
+      )
+    : {
+        phaseModel: DEFAULT_PHASE_MODELS.featureGenerationModel,
+        provider: undefined,
+        credentials: undefined,
+      };
+  const { model, thinkingLevel, reasoningEffort } = resolvePhaseModel(phaseModelEntry);
+
+  logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
+
+  // Codex models need extended timeout for generating many features.
+  // Use 'xhigh' reasoning effort to get 5-minute timeout (300s base * 1.0x = 300s).
+  // The Codex provider has a special 5-minute base timeout for feature generation.
+  const isCodex = isCodexModel(model);
+  const effectiveReasoningEffort = isCodex ? 'xhigh' : reasoningEffort;
+
+  if (isCodex) {
+    logger.info('Codex model detected - using extended timeout (5 minutes for feature generation)');
+  }
+  if (effectiveReasoningEffort) {
+    logger.info('Reasoning effort:', effectiveReasoningEffort);
+  }
+
+  // Determine if we should use structured output based on model type
+  const useStructuredOutput = supportsStructuredOutput(model);
+  logger.info(
+    `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
+  );
+
+  // Build the final prompt - for non-Claude/Codex models, include explicit JSON instructions
+  let finalPrompt = prompt;
+  if (!useStructuredOutput) {
+    finalPrompt = `${prompt}
+
+CRITICAL INSTRUCTIONS:
+1. DO NOT write any files. Return the JSON in your response only.
+2. After analyzing the spec, respond with ONLY a JSON object - no explanations, no markdown, just raw JSON.
+3. The JSON must have this exact structure:
+{
+  "features": [
+    {
+      "id": "unique-feature-id",
+      "category": "Category Name",
+      "title": "Short Feature Title",
+      "description": "Detailed description of the feature",
+      "priority": 1,
+      "complexity": "simple|moderate|complex",
+      "dependencies": ["other-feature-id"]
+    }
+  ]
+}
+
+4. Feature IDs must be unique, lowercase, kebab-case (e.g., "user-authentication", "data-export")
+5. Priority ranges from 1 (highest) to 5 (lowest)
+6. Complexity must be one of: "simple", "moderate", "complex"
+7. Dependencies is an array of feature IDs that must be completed first (can be empty)
+
+Your entire response should be valid JSON starting with { and ending with }. No text before or after.`;
+  }
+
+  // Use streamingQuery with event callbacks
+  const result = await streamingQuery({
+    prompt: finalPrompt,
+    model,
    cwd: projectPath,
+    maxTurns: 250,
+    allowedTools: ['Read', 'Glob', 'Grep'],
    abortController,
+    thinkingLevel,
+    reasoningEffort: effectiveReasoningEffort, // Extended timeout for Codex models
+    readOnly: true, // Feature generation only reads code, doesn't write
+    settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
+    claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
+    credentials, // Pass credentials for resolving 'credentials' apiKeySource
+    outputFormat: useStructuredOutput
+      ? {
+          type: 'json_schema',
+          schema: featuresOutputSchema,
+        }
+      : undefined,
+    onText: (text) => {
+      logger.debug(`Feature text block received (${text.length} chars)`);
+      events.emit('spec-regeneration:event', {
+        type: 'spec_regeneration_progress',
+        content: text,
+        projectPath: projectPath,
+      });
+    },
  });

-  logger.debug('SDK Options:', JSON.stringify(options, null, 2));
-  logger.info('Calling Claude Agent SDK query() for features...');
+  // Get response content - prefer structured output if available
+  let contentForParsing: string;

-  logAuthStatus('Right before SDK query() for features');
+  if (result.structured_output) {
+    // Use structured output from Claude/Codex models
+    logger.info('✅ Received structured output from model');
+    contentForParsing = JSON.stringify(result.structured_output);
+    logger.debug('Structured output:', contentForParsing);
+  } else {
+    // Use text response (for non-Claude/Codex models or fallback)
+    // Pre-extract JSON to handle conversational text that may surround the JSON response
+    // This follows the same pattern used in generate-spec.ts and validate-issue.ts
+    const rawText = result.text;
+    logger.info(`Feature stream complete.`);
+    logger.info(`Feature response length: ${rawText.length} chars`);
+    logger.info('========== FULL RESPONSE TEXT ==========');
+    logger.info(rawText);
+    logger.info('========== END RESPONSE TEXT ==========');

-  let stream;
-  try {
-    stream = query({ prompt, options });
-    logger.debug('query() returned stream successfully');
-  } catch (queryError) {
-    logger.error('❌ query() threw an exception:');
-    logger.error('Error:', queryError);
-    throw queryError;
-  }
-
-  let responseText = '';
-  let messageCount = 0;
-
-  logger.debug('Starting to iterate over feature stream...');
-
-  try {
-    for await (const msg of stream) {
-      messageCount++;
-      logger.debug(
-        `Feature stream message #${messageCount}:`,
-        JSON.stringify({ type: msg.type, subtype: (msg as any).subtype }, null, 2)
+    // Pre-extract JSON from response - handles conversational text around the JSON
+    const extracted = extractJsonWithArray<FeaturesExtractionResult>(rawText, 'features', {
+      logger,
+    });
+    if (extracted) {
+      contentForParsing = JSON.stringify(extracted);
+      logger.info('✅ Pre-extracted JSON from text response');
+    } else {
+      // If pre-extraction fails, we know the next step will also fail.
+      // Throw an error here to avoid redundant parsing and make the failure point clearer.
+      logger.error(
+        '❌ Could not extract features JSON from model response. Full response text was:\n' +
+          rawText
      );
-
-      if (msg.type === 'assistant' && msg.message.content) {
-        for (const block of msg.message.content) {
-          if (block.type === 'text') {
-            responseText += block.text;
-            logger.debug(`Feature text block received (${block.text.length} chars)`);
-            events.emit('spec-regeneration:event', {
-              type: 'spec_regeneration_progress',
-              content: block.text,
-              projectPath: projectPath,
-            });
-          }
-        }
-      } else if (msg.type === 'result' && (msg as any).subtype === 'success') {
-        logger.debug('Received success result for features');
-        responseText = (msg as any).result || responseText;
-      } else if ((msg as { type: string }).type === 'error') {
-        logger.error('❌ Received error message from feature stream:');
-        logger.error('Error message:', JSON.stringify(msg, null, 2));
-      }
+      const errorMessage =
+        'Failed to parse features from model response: No valid JSON with a "features" array found.';
+      events.emit('spec-regeneration:event', {
+        type: 'spec_regeneration_error',
+        error: errorMessage,
+        projectPath: projectPath,
+      });
+      throw new Error(errorMessage);
    }
-  } catch (streamError) {
-    logger.error('❌ Error while iterating feature stream:');
-    logger.error('Stream error:', streamError);
-    throw streamError;
  }

-  logger.info(`Feature stream complete. Total messages: ${messageCount}`);
-  logger.info(`Feature response length: ${responseText.length} chars`);
-  logger.info('========== FULL RESPONSE TEXT ==========');
-  logger.info(responseText);
-  logger.info('========== END RESPONSE TEXT ==========');
-
-  await parseAndCreateFeatures(projectPath, responseText, events);
+  await parseAndCreateFeatures(projectPath, contentForParsing, events);

  logger.debug('========== generateFeaturesFromSpec() completed ==========');
 }
--- a/apps/server/src/routes/app-spec/generate-spec.ts
+++ b/apps/server/src/routes/app-spec/generate-spec.ts
@@ -1,22 +1,26 @@
 /**
 * Generate app_spec.txt from project overview
+ *
+ * Model is configurable via phaseModels.specGenerationModel in settings
+ * (defaults to Opus for high-quality specification generation).
 */

-import { query } from '@anthropic-ai/claude-agent-sdk';
-import path from 'path';
 import * as secureFs from '../../lib/secure-fs.js';
 import type { EventEmitter } from '../../lib/events.js';
-import {
-  specOutputSchema,
-  specToXml,
-  getStructuredSpecPromptInstruction,
-  type SpecOutput,
-} from '../../lib/app-spec-format.js';
+import { specOutputSchema, specToXml, type SpecOutput } from '../../lib/app-spec-format.js';
 import { createLogger } from '@automaker/utils';
-import { createSpecGenerationOptions } from '../../lib/sdk-options.js';
-import { logAuthStatus } from './common.js';
+import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
+import { resolvePhaseModel } from '@automaker/model-resolver';
+import { extractJson } from '../../lib/json-extractor.js';
+import { streamingQuery } from '../../providers/simple-query-service.js';
 import { generateFeaturesFromSpec } from './generate-features-from-spec.js';
 import { ensureAutomakerDir, getAppSpecPath } from '@automaker/platform';
+import type { SettingsService } from '../../services/settings-service.js';
+import {
+  getAutoLoadClaudeMdSetting,
+  getPromptCustomization,
+  getPhaseModelWithOverrides,
+} from '../../lib/settings-helpers.js';

 const logger = createLogger('SpecRegeneration');

@@ -27,7 +31,8 @@ export async function generateSpec(
  abortController: AbortController,
  generateFeatures?: boolean,
  analyzeProject?: boolean,
-  maxFeatures?: number
+  maxFeatures?: number,
+  settingsService?: SettingsService
 ): Promise<void> {
  logger.info('========== generateSpec() started ==========');
  logger.info('projectPath:', projectPath);
@@ -37,6 +42,9 @@ export async function generateSpec(
  logger.info('analyzeProject:', analyzeProject);
  logger.info('maxFeatures:', maxFeatures);

+  // Get customized prompts from settings
+  const prompts = await getPromptCustomization(settingsService, '[SpecRegeneration]');
+
  // Build the prompt based on whether we should analyze the project
  let analysisInstructions = '';
  let techStackDefaults = '';
@@ -60,9 +68,7 @@ export async function generateSpec(
 Use these technologies as the foundation for the specification.`;
  }

-  const prompt = `You are helping to define a software project specification.
-
-IMPORTANT: Never ask for clarification or additional information. Use the information provided and make reasonable assumptions to create the best possible specification. If details are missing, infer them based on common patterns and best practices.
+  const prompt = `${prompts.appSpec.generateSpecSystemPrompt}

 Project Overview:
 ${projectOverview}
@@ -71,7 +77,7 @@ ${techStackDefaults}

 ${analysisInstructions}

-${getStructuredSpecPromptInstruction()}`;
+${prompts.appSpec.structuredSpecInstructions}`;

  logger.info('========== PROMPT BEING SENT ==========');
  logger.info(`Prompt length: ${prompt.length} chars`);
@@ -83,104 +89,109 @@ ${getStructuredSpecPromptInstruction()}`;
    content: 'Starting spec generation...\n',
  });

-  const options = createSpecGenerationOptions({
+  // Load autoLoadClaudeMd setting
+  const autoLoadClaudeMd = await getAutoLoadClaudeMdSetting(
+    projectPath,
+    settingsService,
+    '[SpecRegeneration]'
+  );
+
+  // Get model from phase settings with provider info
+  const {
+    phaseModel: phaseModelEntry,
+    provider,
+    credentials,
+  } = settingsService
+    ? await getPhaseModelWithOverrides(
+        'specGenerationModel',
+        settingsService,
+        projectPath,
+        '[SpecRegeneration]'
+      )
+    : {
+        phaseModel: DEFAULT_PHASE_MODELS.specGenerationModel,
+        provider: undefined,
+        credentials: undefined,
+      };
+  const { model, thinkingLevel } = resolvePhaseModel(phaseModelEntry);
+
+  logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
+
+  let responseText = '';
+  let structuredOutput: SpecOutput | null = null;
+
+  // Determine if we should use structured output based on model type
+  const useStructuredOutput = supportsStructuredOutput(model);
+  logger.info(
+    `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
+  );
+
+  // Build the final prompt - for non-Claude/Codex models, include JSON schema instructions
+  let finalPrompt = prompt;
+  if (!useStructuredOutput) {
+    finalPrompt = `${prompt}
+
+CRITICAL INSTRUCTIONS:
+1. DO NOT write any files. DO NOT create any files like "project_specification.json".
+2. After analyzing the project, respond with ONLY a JSON object - no explanations, no markdown, just raw JSON.
+3. The JSON must match this exact schema:
+
+${JSON.stringify(specOutputSchema, null, 2)}
+
+Your entire response should be valid JSON starting with { and ending with }. No text before or after.`;
+  }
+
+  // Use streamingQuery with event callbacks
+  const result = await streamingQuery({
+    prompt: finalPrompt,
+    model,
    cwd: projectPath,
+    maxTurns: 250,
+    allowedTools: ['Read', 'Glob', 'Grep'],
    abortController,
-    outputFormat: {
-      type: 'json_schema',
-      schema: specOutputSchema,
+    thinkingLevel,
+    readOnly: true, // Spec generation only reads code, we write the spec ourselves
+    settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
+    claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
+    credentials, // Pass credentials for resolving 'credentials' apiKeySource
+    outputFormat: useStructuredOutput
+      ? {
+          type: 'json_schema',
+          schema: specOutputSchema,
+        }
+      : undefined,
+    onText: (text) => {
+      responseText += text;
+      logger.info(
+        `Text block received (${text.length} chars), total now: ${responseText.length} chars`
+      );
+      events.emit('spec-regeneration:event', {
+        type: 'spec_regeneration_progress',
+        content: text,
+        projectPath: projectPath,
+      });
+    },
+    onToolUse: (tool, input) => {
+      logger.info('Tool use:', tool);
+      events.emit('spec-regeneration:event', {
+        type: 'spec_tool',
+        tool,
+        input,
+      });
    },
  });

-  logger.debug('SDK Options:', JSON.stringify(options, null, 2));
-  logger.info('Calling Claude Agent SDK query()...');
-
-  // Log auth status right before the SDK call
-  logAuthStatus('Right before SDK query()');
-
-  let stream;
-  try {
-    stream = query({ prompt, options });
-    logger.debug('query() returned stream successfully');
-  } catch (queryError) {
-    logger.error('❌ query() threw an exception:');
-    logger.error('Error:', queryError);
-    throw queryError;
+  // Get structured output if available
+  if (result.structured_output) {
+    structuredOutput = result.structured_output as unknown as SpecOutput;
+    logger.info('✅ Received structured output');
+    logger.debug('Structured output:', JSON.stringify(structuredOutput, null, 2));
+  } else if (!useStructuredOutput && responseText) {
+    // For non-Claude providers, parse JSON from response text
+    structuredOutput = extractJson<SpecOutput>(responseText, { logger });
  }

-  let responseText = '';
-  let messageCount = 0;
-  let structuredOutput: SpecOutput | null = null;
-
-  logger.info('Starting to iterate over stream...');
-
-  try {
-    for await (const msg of stream) {
-      messageCount++;
-      logger.info(
-        `Stream message #${messageCount}: type=${msg.type}, subtype=${(msg as any).subtype}`
-      );
-
-      if (msg.type === 'assistant') {
-        const msgAny = msg as any;
-        if (msgAny.message?.content) {
-          for (const block of msgAny.message.content) {
-            if (block.type === 'text') {
-              responseText += block.text;
-              logger.info(
-                `Text block received (${block.text.length} chars), total now: ${responseText.length} chars`
-              );
-              events.emit('spec-regeneration:event', {
-                type: 'spec_regeneration_progress',
-                content: block.text,
-                projectPath: projectPath,
-              });
-            } else if (block.type === 'tool_use') {
-              logger.info('Tool use:', block.name);
-              events.emit('spec-regeneration:event', {
-                type: 'spec_tool',
-                tool: block.name,
-                input: block.input,
-              });
-            }
-          }
-        }
-      } else if (msg.type === 'result' && (msg as any).subtype === 'success') {
-        logger.info('Received success result');
-        // Check for structured output - this is the reliable way to get spec data
-        const resultMsg = msg as any;
-        if (resultMsg.structured_output) {
-          structuredOutput = resultMsg.structured_output as SpecOutput;
-          logger.info('✅ Received structured output');
-          logger.debug('Structured output:', JSON.stringify(structuredOutput, null, 2));
-        } else {
-          logger.warn('⚠️ No structured output in result, will fall back to text parsing');
-        }
-      } else if (msg.type === 'result') {
-        // Handle error result types
-        const subtype = (msg as any).subtype;
-        logger.info(`Result message: subtype=${subtype}`);
-        if (subtype === 'error_max_turns') {
-          logger.error('❌ Hit max turns limit!');
-        } else if (subtype === 'error_max_structured_output_retries') {
-          logger.error('❌ Failed to produce valid structured output after retries');
-          throw new Error('Could not produce valid spec output');
-        }
-      } else if ((msg as { type: string }).type === 'error') {
-        logger.error('❌ Received error message from stream:');
-        logger.error('Error message:', JSON.stringify(msg, null, 2));
-      } else if (msg.type === 'user') {
-        // Log user messages (tool results)
-        logger.info(`User message (tool result): ${JSON.stringify(msg).substring(0, 500)}`);
-      }
-    }
-  } catch (streamError) {
-    logger.error('❌ Error while iterating stream:');
-    logger.error('Stream error:', streamError);
-    throw streamError;
-  }
-
-  logger.info(`Stream iteration complete. Total messages: ${messageCount}`);
+  logger.info(`Stream iteration complete.`);
  logger.info(`Response text length: ${responseText.length} chars`);

  // Determine XML content to save
@@ -212,19 +223,33 @@ ${getStructuredSpecPromptInstruction()}`;
      xmlContent = responseText.substring(xmlStart, xmlEnd + '</project_specification>'.length);
      logger.info(`Extracted XML content: ${xmlContent.length} chars (from position ${xmlStart})`);
    } else {
-      // No valid XML structure found in the response text
-      // This happens when structured output was expected but not received, and the agent
-      // output conversational text instead of XML (e.g., "The project directory appears to be empty...")
-      // We should NOT save this conversational text as it's not a valid spec
-      logger.error('❌ Response does not contain valid <project_specification> XML structure');
-      logger.error(
-        'This typically happens when structured output failed and the agent produced conversational text instead of XML'
-      );
-      throw new Error(
-        'Failed to generate spec: No valid XML structure found in response. ' +
-          'The response contained conversational text but no <project_specification> tags. ' +
-          'Please try again.'
-      );
+      // No XML found, try JSON extraction
+      logger.warn('⚠️ No XML tags found, attempting JSON extraction...');
+      const extractedJson = extractJson<SpecOutput>(responseText, { logger });
+
+      if (
+        extractedJson &&
+        typeof extractedJson.project_name === 'string' &&
+        typeof extractedJson.overview === 'string' &&
+        Array.isArray(extractedJson.technology_stack) &&
+        Array.isArray(extractedJson.core_capabilities) &&
+        Array.isArray(extractedJson.implemented_features)
+      ) {
+        logger.info('✅ Successfully extracted JSON from response text');
+        xmlContent = specToXml(extractedJson);
+        logger.info(`✅ Converted extracted JSON to XML: ${xmlContent.length} chars`);
+      } else {
+        // Neither XML nor valid JSON found
+        logger.error('❌ Response does not contain valid XML or JSON structure');
+        logger.error(
+          'This typically happens when structured output failed and the agent produced conversational text instead of structured output'
+        );
+        throw new Error(
+          'Failed to generate spec: No valid XML or JSON structure found in response. ' +
+            'The response contained conversational text but no <project_specification> tags or valid JSON. ' +
+            'Please try again.'
+        );
+      }
    }
  }

@@ -269,7 +294,13 @@ ${getStructuredSpecPromptInstruction()}`;
    // Create a new abort controller for feature generation
    const featureAbortController = new AbortController();
    try {
-      await generateFeaturesFromSpec(projectPath, events, featureAbortController, maxFeatures);
+      await generateFeaturesFromSpec(
+        projectPath,
+        events,
+        featureAbortController,
+        maxFeatures,
+        settingsService
+      );
      // Final completion will be emitted by generateFeaturesFromSpec -> parseAndCreateFeatures
    } catch (featureError) {
      logger.error('Feature generation failed:', featureError);
--- a/apps/server/src/routes/app-spec/index.ts
+++ b/apps/server/src/routes/app-spec/index.ts
@@ -7,15 +7,21 @@ import type { EventEmitter } from '../../lib/events.js';
 import { createCreateHandler } from './routes/create.js';
 import { createGenerateHandler } from './routes/generate.js';
 import { createGenerateFeaturesHandler } from './routes/generate-features.js';
+import { createSyncHandler } from './routes/sync.js';
 import { createStopHandler } from './routes/stop.js';
 import { createStatusHandler } from './routes/status.js';
+import type { SettingsService } from '../../services/settings-service.js';

-export function createSpecRegenerationRoutes(events: EventEmitter): Router {
+export function createSpecRegenerationRoutes(
+  events: EventEmitter,
+  settingsService?: SettingsService
+): Router {
  const router = Router();

  router.post('/create', createCreateHandler(events));
-  router.post('/generate', createGenerateHandler(events));
-  router.post('/generate-features', createGenerateFeaturesHandler(events));
+  router.post('/generate', createGenerateHandler(events, settingsService));
+  router.post('/generate-features', createGenerateFeaturesHandler(events, settingsService));
+  router.post('/sync', createSyncHandler(events, settingsService));
  router.post('/stop', createStopHandler());
  router.get('/status', createStatusHandler());

--- a/apps/server/src/routes/app-spec/parse-and-create-features.ts
+++ b/apps/server/src/routes/app-spec/parse-and-create-features.ts
@@ -5,8 +5,10 @@
 import path from 'path';
 import * as secureFs from '../../lib/secure-fs.js';
 import type { EventEmitter } from '../../lib/events.js';
-import { createLogger } from '@automaker/utils';
+import { createLogger, atomicWriteJson, DEFAULT_BACKUP_COUNT } from '@automaker/utils';
 import { getFeaturesDir } from '@automaker/platform';
+import { extractJsonWithArray } from '../../lib/json-extractor.js';
+import { getNotificationService } from '../../services/notification-service.js';

 const logger = createLogger('SpecRegeneration');

@@ -22,23 +24,30 @@ export async function parseAndCreateFeatures(
  logger.info('========== END CONTENT ==========');

  try {
-    // Extract JSON from response
-    logger.info('Extracting JSON from response...');
-    logger.info(`Looking for pattern: /{[\\s\\S]*"features"[\\s\\S]*}/`);
-    const jsonMatch = content.match(/\{[\s\S]*"features"[\s\S]*\}/);
-    if (!jsonMatch) {
-      logger.error('❌ No valid JSON found in response');
+    // Extract JSON from response using shared utility
+    logger.info('Extracting JSON from response using extractJsonWithArray...');
+
+    interface FeaturesResponse {
+      features: Array<{
+        id: string;
+        category?: string;
+        title: string;
+        description: string;
+        priority?: number;
+        complexity?: string;
+        dependencies?: string[];
+      }>;
+    }
+
+    const parsed = extractJsonWithArray<FeaturesResponse>(content, 'features', { logger });
+
+    if (!parsed || !parsed.features) {
+      logger.error('❌ No valid JSON with "features" array found in response');
      logger.error('Full content received:');
      logger.error(content);
      throw new Error('No valid JSON found in response');
    }

-    logger.info(`JSON match found (${jsonMatch[0].length} chars)`);
-    logger.info('========== MATCHED JSON ==========');
-    logger.info(jsonMatch[0]);
-    logger.info('========== END MATCHED JSON ==========');
-
-    const parsed = JSON.parse(jsonMatch[0]);
    logger.info(`Parsed ${parsed.features?.length || 0} features`);
    logger.info('Parsed features:', JSON.stringify(parsed.features, null, 2));

@@ -65,10 +74,10 @@ export async function parseAndCreateFeatures(
        updatedAt: new Date().toISOString(),
      };

-      await secureFs.writeFile(
-        path.join(featureDir, 'feature.json'),
-        JSON.stringify(featureData, null, 2)
-      );
+      // Use atomic write with backup support for crash protection
+      await atomicWriteJson(path.join(featureDir, 'feature.json'), featureData, {
+        backupCount: DEFAULT_BACKUP_COUNT,
+      });

      createdFeatures.push({ id: feature.id, title: feature.title });
    }
@@ -80,6 +89,15 @@ export async function parseAndCreateFeatures(
      message: `Spec regeneration complete! Created ${createdFeatures.length} features.`,
      projectPath: projectPath,
    });
+
+    // Create notification for spec generation completion
+    const notificationService = getNotificationService();
+    await notificationService.createNotification({
+      type: 'spec_regeneration_complete',
+      title: 'Spec Generation Complete',
+      message: `Created ${createdFeatures.length} features from the project specification.`,
+      projectPath: projectPath,
+    });
  } catch (error) {
    logger.error('❌ parseAndCreateFeatures() failed:');
    logger.error('Error:', error);
--- a/apps/server/src/routes/app-spec/routes/create.ts
+++ b/apps/server/src/routes/app-spec/routes/create.ts
@@ -47,17 +47,17 @@ export function createCreateHandler(events: EventEmitter) {
        return;
      }

-      const { isRunning } = getSpecRegenerationStatus();
+      const { isRunning } = getSpecRegenerationStatus(projectPath);
      if (isRunning) {
-        logger.warn('Generation already running, rejecting request');
-        res.json({ success: false, error: 'Spec generation already running' });
+        logger.warn('Generation already running for project:', projectPath);
+        res.json({ success: false, error: 'Spec generation already running for this project' });
        return;
      }

      logAuthStatus('Before starting generation');

      const abortController = new AbortController();
-      setRunningState(true, abortController);
+      setRunningState(projectPath, true, abortController);
      logger.info('Starting background generation task...');

      // Start generation in background
@@ -80,7 +80,7 @@ export function createCreateHandler(events: EventEmitter) {
        })
        .finally(() => {
          logger.info('Generation task finished (success or error)');
-          setRunningState(false, null);
+          setRunningState(projectPath, false, null);
        });

      logger.info('Returning success response (generation running in background)');
--- a/apps/server/src/routes/app-spec/routes/generate-features.ts
+++ b/apps/server/src/routes/app-spec/routes/generate-features.ts
@@ -13,10 +13,14 @@ import {
  getErrorMessage,
 } from '../common.js';
 import { generateFeaturesFromSpec } from '../generate-features-from-spec.js';
+import type { SettingsService } from '../../../services/settings-service.js';

 const logger = createLogger('SpecRegeneration');

-export function createGenerateFeaturesHandler(events: EventEmitter) {
+export function createGenerateFeaturesHandler(
+  events: EventEmitter,
+  settingsService?: SettingsService
+) {
  return async (req: Request, res: Response): Promise<void> => {
    logger.info('========== /generate-features endpoint called ==========');
    logger.debug('Request body:', JSON.stringify(req.body, null, 2));
@@ -36,20 +40,20 @@ export function createGenerateFeaturesHandler(events: EventEmitter) {
        return;
      }

-      const { isRunning } = getSpecRegenerationStatus();
+      const { isRunning } = getSpecRegenerationStatus(projectPath);
      if (isRunning) {
-        logger.warn('Generation already running, rejecting request');
-        res.json({ success: false, error: 'Generation already running' });
+        logger.warn('Generation already running for project:', projectPath);
+        res.json({ success: false, error: 'Generation already running for this project' });
        return;
      }

      logAuthStatus('Before starting feature generation');

      const abortController = new AbortController();
-      setRunningState(true, abortController);
+      setRunningState(projectPath, true, abortController, 'feature_generation');
      logger.info('Starting background feature generation task...');

-      generateFeaturesFromSpec(projectPath, events, abortController, maxFeatures)
+      generateFeaturesFromSpec(projectPath, events, abortController, maxFeatures, settingsService)
        .catch((error) => {
          logError(error, 'Feature generation failed with error');
          events.emit('spec-regeneration:event', {
@@ -59,7 +63,7 @@ export function createGenerateFeaturesHandler(events: EventEmitter) {
        })
        .finally(() => {
          logger.info('Feature generation task finished (success or error)');
-          setRunningState(false, null);
+          setRunningState(projectPath, false, null);
        });

      logger.info('Returning success response (generation running in background)');
--- a/apps/server/src/routes/app-spec/routes/generate.ts
+++ b/apps/server/src/routes/app-spec/routes/generate.ts
@@ -13,10 +13,11 @@ import {
  getErrorMessage,
 } from '../common.js';
 import { generateSpec } from '../generate-spec.js';
+import type { SettingsService } from '../../../services/settings-service.js';

 const logger = createLogger('SpecRegeneration');

-export function createGenerateHandler(events: EventEmitter) {
+export function createGenerateHandler(events: EventEmitter, settingsService?: SettingsService) {
  return async (req: Request, res: Response): Promise<void> => {
    logger.info('========== /generate endpoint called ==========');
    logger.debug('Request body:', JSON.stringify(req.body, null, 2));
@@ -47,17 +48,17 @@ export function createGenerateHandler(events: EventEmitter) {
        return;
      }

-      const { isRunning } = getSpecRegenerationStatus();
+      const { isRunning } = getSpecRegenerationStatus(projectPath);
      if (isRunning) {
-        logger.warn('Generation already running, rejecting request');
-        res.json({ success: false, error: 'Spec generation already running' });
+        logger.warn('Generation already running for project:', projectPath);
+        res.json({ success: false, error: 'Spec generation already running for this project' });
        return;
      }

      logAuthStatus('Before starting generation');

      const abortController = new AbortController();
-      setRunningState(true, abortController);
+      setRunningState(projectPath, true, abortController);
      logger.info('Starting background generation task...');

      generateSpec(
@@ -67,7 +68,8 @@ export function createGenerateHandler(events: EventEmitter) {
        abortController,
        generateFeatures,
        analyzeProject,
-        maxFeatures
+        maxFeatures,
+        settingsService
      )
        .catch((error) => {
          logError(error, 'Generation failed with error');
@@ -79,7 +81,7 @@ export function createGenerateHandler(events: EventEmitter) {
        })
        .finally(() => {
          logger.info('Generation task finished (success or error)');
-          setRunningState(false, null);
+          setRunningState(projectPath, false, null);
        });

      logger.info('Returning success response (generation running in background)');
--- a/apps/server/src/routes/app-spec/routes/status.ts
+++ b/apps/server/src/routes/app-spec/routes/status.ts
@@ -6,10 +6,11 @@ import type { Request, Response } from 'express';
 import { getSpecRegenerationStatus, getErrorMessage } from '../common.js';

 export function createStatusHandler() {
-  return async (_req: Request, res: Response): Promise<void> => {
+  return async (req: Request, res: Response): Promise<void> => {
    try {
-      const { isRunning } = getSpecRegenerationStatus();
-      res.json({ success: true, isRunning });
+      const projectPath = req.query.projectPath as string | undefined;
+      const { isRunning } = getSpecRegenerationStatus(projectPath);
+      res.json({ success: true, isRunning, projectPath });
    } catch (error) {
      res.status(500).json({ success: false, error: getErrorMessage(error) });
    }
--- a/apps/server/src/routes/app-spec/routes/stop.ts
+++ b/apps/server/src/routes/app-spec/routes/stop.ts
@@ -6,13 +6,16 @@ import type { Request, Response } from 'express';
 import { getSpecRegenerationStatus, setRunningState, getErrorMessage } from '../common.js';

 export function createStopHandler() {
-  return async (_req: Request, res: Response): Promise<void> => {
+  return async (req: Request, res: Response): Promise<void> => {
    try {
-      const { currentAbortController } = getSpecRegenerationStatus();
+      const { projectPath } = req.body as { projectPath?: string };
+      const { currentAbortController } = getSpecRegenerationStatus(projectPath);
      if (currentAbortController) {
        currentAbortController.abort();
      }
-      setRunningState(false, null);
+      if (projectPath) {
+        setRunningState(projectPath, false, null);
+      }
      res.json({ success: true });
    } catch (error) {
      res.status(500).json({ success: false, error: getErrorMessage(error) });
--- a/apps/server/src/routes/app-spec/routes/sync.ts
+++ b/apps/server/src/routes/app-spec/routes/sync.ts
@@ -0,0 +1,76 @@
+/**
+ * POST /sync endpoint - Sync spec with codebase and features
+ */
+
+import type { Request, Response } from 'express';
+import type { EventEmitter } from '../../../lib/events.js';
+import { createLogger } from '@automaker/utils';
+import {
+  getSpecRegenerationStatus,
+  setRunningState,
+  logAuthStatus,
+  logError,
+  getErrorMessage,
+} from '../common.js';
+import { syncSpec } from '../sync-spec.js';
+import type { SettingsService } from '../../../services/settings-service.js';
+
+const logger = createLogger('SpecSync');
+
+export function createSyncHandler(events: EventEmitter, settingsService?: SettingsService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    logger.info('========== /sync endpoint called ==========');
+    logger.debug('Request body:', JSON.stringify(req.body, null, 2));
+
+    try {
+      const { projectPath } = req.body as {
+        projectPath: string;
+      };
+
+      logger.debug('projectPath:', projectPath);
+
+      if (!projectPath) {
+        logger.error('Missing projectPath parameter');
+        res.status(400).json({ success: false, error: 'projectPath required' });
+        return;
+      }
+
+      const { isRunning } = getSpecRegenerationStatus(projectPath);
+      if (isRunning) {
+        logger.warn('Generation/sync already running for project:', projectPath);
+        res.json({ success: false, error: 'Operation already running for this project' });
+        return;
+      }
+
+      logAuthStatus('Before starting spec sync');
+
+      const abortController = new AbortController();
+      setRunningState(projectPath, true, abortController, 'sync');
+      logger.info('Starting background spec sync task...');
+
+      syncSpec(projectPath, events, abortController, settingsService)
+        .then((result) => {
+          logger.info('Spec sync completed successfully');
+          logger.info('Result:', JSON.stringify(result, null, 2));
+        })
+        .catch((error) => {
+          logError(error, 'Spec sync failed with error');
+          events.emit('spec-regeneration:event', {
+            type: 'spec_regeneration_error',
+            error: getErrorMessage(error),
+            projectPath,
+          });
+        })
+        .finally(() => {
+          logger.info('Spec sync task finished (success or error)');
+          setRunningState(projectPath, false, null);
+        });
+
+      logger.info('Returning success response (sync running in background)');
+      res.json({ success: true });
+    } catch (error) {
+      logError(error, 'Sync route handler failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/app-spec/sync-spec.ts
+++ b/apps/server/src/routes/app-spec/sync-spec.ts
@@ -0,0 +1,390 @@
+/**
+ * Sync spec with current codebase and feature state
+ *
+ * Updates the spec file based on:
+ * - Completed Automaker features
+ * - Code analysis for tech stack and implementations
+ * - Roadmap phase status updates
+ */
+
+import * as secureFs from '../../lib/secure-fs.js';
+import type { EventEmitter } from '../../lib/events.js';
+import { createLogger } from '@automaker/utils';
+import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
+import { resolvePhaseModel } from '@automaker/model-resolver';
+import { streamingQuery } from '../../providers/simple-query-service.js';
+import { extractJson } from '../../lib/json-extractor.js';
+import { getAppSpecPath } from '@automaker/platform';
+import type { SettingsService } from '../../services/settings-service.js';
+import {
+  getAutoLoadClaudeMdSetting,
+  getPhaseModelWithOverrides,
+} from '../../lib/settings-helpers.js';
+import { FeatureLoader } from '../../services/feature-loader.js';
+import {
+  extractImplementedFeatures,
+  extractTechnologyStack,
+  extractRoadmapPhases,
+  updateImplementedFeaturesSection,
+  updateTechnologyStack,
+  updateRoadmapPhaseStatus,
+  type ImplementedFeature,
+  type RoadmapPhase,
+} from '../../lib/xml-extractor.js';
+import { getNotificationService } from '../../services/notification-service.js';
+
+const logger = createLogger('SpecSync');
+
+/**
+ * Type for extracted tech stack JSON response
+ */
+interface TechStackExtractionResult {
+  technologies: string[];
+}
+
+/**
+ * JSON schema for tech stack analysis output (Claude/Codex structured output)
+ */
+const techStackOutputSchema = {
+  type: 'object',
+  properties: {
+    technologies: {
+      type: 'array',
+      items: { type: 'string' },
+      description: 'List of technologies detected in the project',
+    },
+  },
+  required: ['technologies'],
+} as const;
+
+/**
+ * Result of a sync operation
+ */
+export interface SyncResult {
+  techStackUpdates: {
+    added: string[];
+    removed: string[];
+  };
+  implementedFeaturesUpdates: {
+    addedFromFeatures: string[];
+    removed: string[];
+  };
+  roadmapUpdates: Array<{ phaseName: string; newStatus: string }>;
+  summary: string;
+}
+
+/**
+ * Sync the spec with current codebase and feature state
+ */
+export async function syncSpec(
+  projectPath: string,
+  events: EventEmitter,
+  abortController: AbortController,
+  settingsService?: SettingsService
+): Promise<SyncResult> {
+  logger.info('========== syncSpec() started ==========');
+  logger.info('projectPath:', projectPath);
+
+  const result: SyncResult = {
+    techStackUpdates: { added: [], removed: [] },
+    implementedFeaturesUpdates: { addedFromFeatures: [], removed: [] },
+    roadmapUpdates: [],
+    summary: '',
+  };
+
+  // Read existing spec
+  const specPath = getAppSpecPath(projectPath);
+  let specContent: string;
+
+  try {
+    specContent = (await secureFs.readFile(specPath, 'utf-8')) as string;
+    logger.info(`Spec loaded successfully (${specContent.length} chars)`);
+  } catch (readError) {
+    logger.error('Failed to read spec file:', readError);
+    events.emit('spec-regeneration:event', {
+      type: 'spec_regeneration_error',
+      error: 'No project spec found. Create or regenerate spec first.',
+      projectPath,
+    });
+    throw new Error('No project spec found');
+  }
+
+  events.emit('spec-regeneration:event', {
+    type: 'spec_regeneration_progress',
+    content: '[Phase: sync] Starting spec sync...\n',
+    projectPath,
+  });
+
+  // Extract current state from spec
+  const currentImplementedFeatures = extractImplementedFeatures(specContent);
+  const currentTechStack = extractTechnologyStack(specContent);
+  const currentRoadmapPhases = extractRoadmapPhases(specContent);
+
+  logger.info(`Current spec has ${currentImplementedFeatures.length} implemented features`);
+  logger.info(`Current spec has ${currentTechStack.length} technologies`);
+  logger.info(`Current spec has ${currentRoadmapPhases.length} roadmap phases`);
+
+  // Load completed Automaker features
+  const featureLoader = new FeatureLoader();
+  const allFeatures = await featureLoader.getAll(projectPath);
+  const completedFeatures = allFeatures.filter(
+    (f) => f.status === 'completed' || f.status === 'verified'
+  );
+
+  logger.info(`Found ${completedFeatures.length} completed/verified features in Automaker`);
+
+  events.emit('spec-regeneration:event', {
+    type: 'spec_regeneration_progress',
+    content: `Found ${completedFeatures.length} completed features to sync...\n`,
+    projectPath,
+  });
+
+  // Build new implemented features list from completed Automaker features
+  const newImplementedFeatures: ImplementedFeature[] = [];
+  const existingNames = new Set(currentImplementedFeatures.map((f) => f.name.toLowerCase()));
+
+  for (const feature of completedFeatures) {
+    const name = feature.title || `Feature: ${feature.id}`;
+    if (!existingNames.has(name.toLowerCase())) {
+      newImplementedFeatures.push({
+        name,
+        description: feature.description || '',
+      });
+      result.implementedFeaturesUpdates.addedFromFeatures.push(name);
+    }
+  }
+
+  // Merge: keep existing + add new from completed features
+  const mergedFeatures = [...currentImplementedFeatures, ...newImplementedFeatures];
+
+  // Update spec with merged features
+  if (result.implementedFeaturesUpdates.addedFromFeatures.length > 0) {
+    specContent = updateImplementedFeaturesSection(specContent, mergedFeatures);
+    logger.info(
+      `Added ${result.implementedFeaturesUpdates.addedFromFeatures.length} features to spec`
+    );
+  }
+
+  // Analyze codebase for tech stack updates using AI
+  events.emit('spec-regeneration:event', {
+    type: 'spec_regeneration_progress',
+    content: 'Analyzing codebase for technology updates...\n',
+    projectPath,
+  });
+
+  const autoLoadClaudeMd = await getAutoLoadClaudeMdSetting(
+    projectPath,
+    settingsService,
+    '[SpecSync]'
+  );
+
+  // Get model from phase settings with provider info
+  const {
+    phaseModel: phaseModelEntry,
+    provider,
+    credentials,
+  } = settingsService
+    ? await getPhaseModelWithOverrides(
+        'specGenerationModel',
+        settingsService,
+        projectPath,
+        '[SpecSync]'
+      )
+    : {
+        phaseModel: DEFAULT_PHASE_MODELS.specGenerationModel,
+        provider: undefined,
+        credentials: undefined,
+      };
+  const { model, thinkingLevel } = resolvePhaseModel(phaseModelEntry);
+
+  logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
+
+  // Determine if we should use structured output based on model type
+  const useStructuredOutput = supportsStructuredOutput(model);
+  logger.info(
+    `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
+  );
+
+  // Use AI to analyze tech stack
+  let techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack.
+
+Current known technologies: ${currentTechStack.join(', ')}
+
+Look at package.json, config files, and source code to identify:
+- Frameworks (React, Vue, Express, etc.)
+- Languages (TypeScript, JavaScript, Python, etc.)
+- Build tools (Vite, Webpack, etc.)
+- Databases (PostgreSQL, MongoDB, etc.)
+- Key libraries and tools
+
+Return ONLY this JSON format, no other text:
+{
+  "technologies": ["Technology 1", "Technology 2", ...]
+}`;
+
+  // Add explicit JSON instructions for non-Claude/Codex models
+  if (!useStructuredOutput) {
+    techAnalysisPrompt = `${techAnalysisPrompt}
+
+CRITICAL INSTRUCTIONS:
+1. DO NOT write any files. Return the JSON in your response only.
+2. Your entire response should be valid JSON starting with { and ending with }.
+3. No explanations, no markdown, no text before or after the JSON.`;
+  }
+
+  try {
+    const techResult = await streamingQuery({
+      prompt: techAnalysisPrompt,
+      model,
+      cwd: projectPath,
+      maxTurns: 10,
+      allowedTools: ['Read', 'Glob', 'Grep'],
+      abortController,
+      thinkingLevel,
+      readOnly: true,
+      settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
+      claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
+      credentials, // Pass credentials for resolving 'credentials' apiKeySource
+      outputFormat: useStructuredOutput
+        ? {
+            type: 'json_schema',
+            schema: techStackOutputSchema,
+          }
+        : undefined,
+      onText: (text) => {
+        logger.debug(`Tech analysis text: ${text.substring(0, 100)}`);
+      },
+    });
+
+    // Parse tech stack from response - prefer structured output if available
+    let parsedTechnologies: string[] | null = null;
+
+    if (techResult.structured_output) {
+      // Use structured output from Claude/Codex models
+      const structured = techResult.structured_output as unknown as TechStackExtractionResult;
+      if (Array.isArray(structured.technologies)) {
+        parsedTechnologies = structured.technologies;
+        logger.info('✅ Received structured output for tech analysis');
+      }
+    } else {
+      // Fall back to text parsing for non-Claude/Codex models
+      const extracted = extractJson<TechStackExtractionResult>(techResult.text, {
+        logger,
+        requiredKey: 'technologies',
+        requireArray: true,
+      });
+      if (extracted && Array.isArray(extracted.technologies)) {
+        parsedTechnologies = extracted.technologies;
+        logger.info('✅ Extracted tech stack from text response');
+      } else {
+        logger.warn('⚠️ Failed to extract tech stack JSON from response');
+      }
+    }
+
+    if (parsedTechnologies) {
+      const newTechStack = parsedTechnologies;
+
+      // Calculate differences
+      const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase()));
+      const newSet = new Set(newTechStack.map((t) => t.toLowerCase()));
+
+      for (const tech of newTechStack) {
+        if (!currentSet.has(tech.toLowerCase())) {
+          result.techStackUpdates.added.push(tech);
+        }
+      }
+
+      for (const tech of currentTechStack) {
+        if (!newSet.has(tech.toLowerCase())) {
+          result.techStackUpdates.removed.push(tech);
+        }
+      }
+
+      // Update spec with new tech stack if there are changes
+      if (result.techStackUpdates.added.length > 0 || result.techStackUpdates.removed.length > 0) {
+        specContent = updateTechnologyStack(specContent, newTechStack);
+        logger.info(
+          `Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}`
+        );
+      }
+    }
+  } catch (error) {
+    logger.warn('Failed to analyze tech stack:', error);
+    // Continue with other sync operations
+  }
+
+  // Update roadmap phase statuses based on completed features
+  events.emit('spec-regeneration:event', {
+    type: 'spec_regeneration_progress',
+    content: 'Checking roadmap phase statuses...\n',
+    projectPath,
+  });
+
+  // For each phase, check if all its features are completed
+  // This is a heuristic - we check if the phase name appears in any feature titles/descriptions
+  for (const phase of currentRoadmapPhases) {
+    if (phase.status === 'completed') continue; // Already completed
+
+    // Check if this phase should be marked as completed
+    // A phase is considered complete if we have completed features that mention it
+    const phaseNameLower = phase.name.toLowerCase();
+    const relatedCompletedFeatures = completedFeatures.filter(
+      (f) =>
+        f.title?.toLowerCase().includes(phaseNameLower) ||
+        f.description?.toLowerCase().includes(phaseNameLower) ||
+        f.category?.toLowerCase().includes(phaseNameLower)
+    );
+
+    // If we have related completed features and the phase is still pending/in_progress,
+    // update it to in_progress or completed based on feature count
+    if (relatedCompletedFeatures.length > 0 && phase.status !== 'completed') {
+      const newStatus = 'in_progress';
+      specContent = updateRoadmapPhaseStatus(specContent, phase.name, newStatus);
+      result.roadmapUpdates.push({ phaseName: phase.name, newStatus });
+      logger.info(`Updated phase "${phase.name}" to ${newStatus}`);
+    }
+  }
+
+  // Save updated spec
+  await secureFs.writeFile(specPath, specContent, 'utf-8');
+  logger.info('Spec saved successfully');
+
+  // Build summary
+  const summaryParts: string[] = [];
+  if (result.implementedFeaturesUpdates.addedFromFeatures.length > 0) {
+    summaryParts.push(
+      `Added ${result.implementedFeaturesUpdates.addedFromFeatures.length} implemented features`
+    );
+  }
+  if (result.techStackUpdates.added.length > 0) {
+    summaryParts.push(`Added ${result.techStackUpdates.added.length} technologies`);
+  }
+  if (result.techStackUpdates.removed.length > 0) {
+    summaryParts.push(`Removed ${result.techStackUpdates.removed.length} technologies`);
+  }
+  if (result.roadmapUpdates.length > 0) {
+    summaryParts.push(`Updated ${result.roadmapUpdates.length} roadmap phases`);
+  }
+
+  result.summary = summaryParts.length > 0 ? summaryParts.join(', ') : 'Spec is already up to date';
+
+  // Create notification
+  const notificationService = getNotificationService();
+  await notificationService.createNotification({
+    type: 'spec_regeneration_complete',
+    title: 'Spec Sync Complete',
+    message: result.summary,
+    projectPath,
+  });
+
+  events.emit('spec-regeneration:event', {
+    type: 'spec_regeneration_complete',
+    message: `Spec sync complete! ${result.summary}`,
+    projectPath,
+  });
+
+  logger.info('========== syncSpec() completed ==========');
+  logger.info('Summary:', result.summary);
+
+  return result;
+}
--- a/apps/server/src/routes/auth/index.ts
+++ b/apps/server/src/routes/auth/index.ts
@@ -0,0 +1,266 @@
+/**
+ * Auth routes - Login, logout, and status endpoints
+ *
+ * Security model:
+ * - Web mode: User enters API key (shown on server console) to get HTTP-only session cookie
+ * - Electron mode: Uses X-API-Key header (handled automatically via IPC)
+ *
+ * The session cookie is:
+ * - HTTP-only: JavaScript cannot read it (protects against XSS)
+ * - SameSite=Strict: Only sent for same-site requests (protects against CSRF)
+ *
+ * Mounted at /api/auth in the main server (BEFORE auth middleware).
+ */
+
+import { Router } from 'express';
+import type { Request } from 'express';
+import {
+  validateApiKey,
+  createSession,
+  invalidateSession,
+  getSessionCookieOptions,
+  getSessionCookieName,
+  isRequestAuthenticated,
+  createWsConnectionToken,
+} from '../../lib/auth.js';
+
+// Rate limiting configuration
+const RATE_LIMIT_WINDOW_MS = 60 * 1000; // 1 minute window
+const RATE_LIMIT_MAX_ATTEMPTS = 5; // Max 5 attempts per window
+
+// Check if we're in test mode - disable rate limiting for E2E tests
+const isTestMode = process.env.AUTOMAKER_MOCK_AGENT === 'true';
+
+// In-memory rate limit tracking (resets on server restart)
+const loginAttempts = new Map<string, { count: number; windowStart: number }>();
+
+// Clean up old rate limit entries periodically (every 5 minutes)
+setInterval(
+  () => {
+    const now = Date.now();
+    loginAttempts.forEach((data, ip) => {
+      if (now - data.windowStart > RATE_LIMIT_WINDOW_MS * 2) {
+        loginAttempts.delete(ip);
+      }
+    });
+  },
+  5 * 60 * 1000
+);
+
+/**
+ * Get client IP address from request
+ * Handles X-Forwarded-For header for reverse proxy setups
+ */
+function getClientIp(req: Request): string {
+  const forwarded = req.headers['x-forwarded-for'];
+  if (forwarded) {
+    // X-Forwarded-For can be a comma-separated list; take the first (original client)
+    const forwardedIp = Array.isArray(forwarded) ? forwarded[0] : forwarded.split(',')[0];
+    return forwardedIp.trim();
+  }
+  return req.ip || req.socket.remoteAddress || 'unknown';
+}
+
+/**
+ * Check if an IP is rate limited
+ * Returns { limited: boolean, retryAfter?: number }
+ */
+function checkRateLimit(ip: string): { limited: boolean; retryAfter?: number } {
+  const now = Date.now();
+  const attempt = loginAttempts.get(ip);
+
+  if (!attempt) {
+    return { limited: false };
+  }
+
+  // Check if window has expired
+  if (now - attempt.windowStart > RATE_LIMIT_WINDOW_MS) {
+    loginAttempts.delete(ip);
+    return { limited: false };
+  }
+
+  // Check if over limit
+  if (attempt.count >= RATE_LIMIT_MAX_ATTEMPTS) {
+    const retryAfter = Math.ceil((RATE_LIMIT_WINDOW_MS - (now - attempt.windowStart)) / 1000);
+    return { limited: true, retryAfter };
+  }
+
+  return { limited: false };
+}
+
+/**
+ * Record a login attempt for rate limiting
+ */
+function recordLoginAttempt(ip: string): void {
+  const now = Date.now();
+  const attempt = loginAttempts.get(ip);
+
+  if (!attempt || now - attempt.windowStart > RATE_LIMIT_WINDOW_MS) {
+    // Start new window
+    loginAttempts.set(ip, { count: 1, windowStart: now });
+  } else {
+    // Increment existing window
+    attempt.count++;
+  }
+}
+
+/**
+ * Create auth routes
+ *
+ * @returns Express Router with auth endpoints
+ */
+export function createAuthRoutes(): Router {
+  const router = Router();
+
+  /**
+   * GET /api/auth/status
+   *
+   * Returns whether the current request is authenticated.
+   * Used by the UI to determine if login is needed.
+   *
+   * If AUTOMAKER_AUTO_LOGIN=true is set, automatically creates a session
+   * for unauthenticated requests (useful for development).
+   */
+  router.get('/status', async (req, res) => {
+    let authenticated = isRequestAuthenticated(req);
+
+    // Auto-login for development: create session automatically if enabled
+    // Only works in non-production environments as a safeguard
+    if (
+      !authenticated &&
+      process.env.AUTOMAKER_AUTO_LOGIN === 'true' &&
+      process.env.NODE_ENV !== 'production'
+    ) {
+      const sessionToken = await createSession();
+      const cookieOptions = getSessionCookieOptions();
+      const cookieName = getSessionCookieName();
+      res.cookie(cookieName, sessionToken, cookieOptions);
+      authenticated = true;
+    }
+
+    res.json({
+      success: true,
+      authenticated,
+      required: true,
+    });
+  });
+
+  /**
+   * POST /api/auth/login
+   *
+   * Validates the API key and sets a session cookie.
+   * Body: { apiKey: string }
+   *
+   * Rate limited to 5 attempts per minute per IP to prevent brute force attacks.
+   */
+  router.post('/login', async (req, res) => {
+    const clientIp = getClientIp(req);
+
+    // Skip rate limiting in test mode to allow parallel E2E tests
+    if (!isTestMode) {
+      // Check rate limit before processing
+      const rateLimit = checkRateLimit(clientIp);
+      if (rateLimit.limited) {
+        res.status(429).json({
+          success: false,
+          error: 'Too many login attempts. Please try again later.',
+          retryAfter: rateLimit.retryAfter,
+        });
+        return;
+      }
+    }
+
+    const { apiKey } = req.body as { apiKey?: string };
+
+    if (!apiKey) {
+      res.status(400).json({
+        success: false,
+        error: 'API key is required.',
+      });
+      return;
+    }
+
+    // Record this attempt (only for actual API key validation attempts, skip in test mode)
+    if (!isTestMode) {
+      recordLoginAttempt(clientIp);
+    }
+
+    if (!validateApiKey(apiKey)) {
+      res.status(401).json({
+        success: false,
+        error: 'Invalid API key.',
+      });
+      return;
+    }
+
+    // Create session and set cookie
+    const sessionToken = await createSession();
+    const cookieOptions = getSessionCookieOptions();
+    const cookieName = getSessionCookieName();
+
+    res.cookie(cookieName, sessionToken, cookieOptions);
+    res.json({
+      success: true,
+      message: 'Logged in successfully.',
+      // Return token for explicit header-based auth (works around cross-origin cookie issues)
+      token: sessionToken,
+    });
+  });
+
+  /**
+   * GET /api/auth/token
+   *
+   * Generates a short-lived WebSocket connection token if the user has a valid session.
+   * This token is used for initial WebSocket handshake authentication and expires in 5 minutes.
+   * The token is NOT the session cookie value - it's a separate, short-lived token.
+   */
+  router.get('/token', (req, res) => {
+    // Validate the session is still valid (via cookie, API key, or session token header)
+    if (!isRequestAuthenticated(req)) {
+      res.status(401).json({
+        success: false,
+        error: 'Authentication required.',
+      });
+      return;
+    }
+
+    // Generate a new short-lived WebSocket connection token
+    const wsToken = createWsConnectionToken();
+
+    res.json({
+      success: true,
+      token: wsToken,
+      expiresIn: 300, // 5 minutes in seconds
+    });
+  });
+
+  /**
+   * POST /api/auth/logout
+   *
+   * Clears the session cookie and invalidates the session.
+   */
+  router.post('/logout', async (req, res) => {
+    const cookieName = getSessionCookieName();
+    const sessionToken = req.cookies?.[cookieName] as string | undefined;
+
+    if (sessionToken) {
+      await invalidateSession(sessionToken);
+    }
+
+    // Clear the cookie by setting it to empty with immediate expiration
+    // Using res.cookie() with maxAge: 0 is more reliable than clearCookie()
+    // in cross-origin development environments
+    res.cookie(cookieName, '', {
+      ...getSessionCookieOptions(),
+      maxAge: 0,
+      expires: new Date(0),
+    });
+
+    res.json({
+      success: true,
+      message: 'Logged out successfully.',
+    });
+  });
+
+  return router;
+}
--- a/apps/server/src/routes/auto-mode/index.ts
+++ b/apps/server/src/routes/auto-mode/index.ts
@@ -10,6 +10,8 @@ import { validatePathParams } from '../../middleware/validate-paths.js';
 import { createStopFeatureHandler } from './routes/stop-feature.js';
 import { createStatusHandler } from './routes/status.js';
 import { createRunFeatureHandler } from './routes/run-feature.js';
+import { createStartHandler } from './routes/start.js';
+import { createStopHandler } from './routes/stop.js';
 import { createVerifyFeatureHandler } from './routes/verify-feature.js';
 import { createResumeFeatureHandler } from './routes/resume-feature.js';
 import { createContextExistsHandler } from './routes/context-exists.js';
@@ -17,10 +19,15 @@ import { createAnalyzeProjectHandler } from './routes/analyze-project.js';
 import { createFollowUpFeatureHandler } from './routes/follow-up-feature.js';
 import { createCommitFeatureHandler } from './routes/commit-feature.js';
 import { createApprovePlanHandler } from './routes/approve-plan.js';
+import { createResumeInterruptedHandler } from './routes/resume-interrupted.js';

 export function createAutoModeRoutes(autoModeService: AutoModeService): Router {
  const router = Router();

+  // Auto loop control routes
+  router.post('/start', validatePathParams('projectPath'), createStartHandler(autoModeService));
+  router.post('/stop', validatePathParams('projectPath'), createStopHandler(autoModeService));
+
  router.post('/stop-feature', createStopFeatureHandler(autoModeService));
  router.post('/status', validatePathParams('projectPath?'), createStatusHandler(autoModeService));
  router.post(
@@ -63,6 +70,11 @@ export function createAutoModeRoutes(autoModeService: AutoModeService): Router {
    validatePathParams('projectPath'),
    createApprovePlanHandler(autoModeService)
  );
+  router.post(
+    '/resume-interrupted',
+    validatePathParams('projectPath'),
+    createResumeInterruptedHandler(autoModeService)
+  );

  return router;
 }
--- a/apps/server/src/routes/auto-mode/routes/follow-up-feature.ts
+++ b/apps/server/src/routes/auto-mode/routes/follow-up-feature.ts
@@ -31,7 +31,9 @@ export function createFollowUpFeatureHandler(autoModeService: AutoModeService) {
      // Start follow-up in background
      // followUpFeature derives workDir from feature.branchName
      autoModeService
-        .followUpFeature(projectPath, featureId, prompt, imagePaths, useWorktrees ?? true)
+        // Default to false to match run-feature/resume-feature behavior.
+        // Worktrees should only be used when explicitly enabled by the user.
+        .followUpFeature(projectPath, featureId, prompt, imagePaths, useWorktrees ?? false)
        .catch((error) => {
          logger.error(`[AutoMode] Follow up feature ${featureId} error:`, error);
        })
--- a/apps/server/src/routes/auto-mode/routes/resume-feature.ts
+++ b/apps/server/src/routes/auto-mode/routes/resume-feature.ts
@@ -31,7 +31,7 @@ export function createResumeFeatureHandler(autoModeService: AutoModeService) {
      autoModeService
        .resumeFeature(projectPath, featureId, useWorktrees ?? false)
        .catch((error) => {
-          logger.error(`[AutoMode] Resume feature ${featureId} error:`, error);
+          logger.error(`Resume feature ${featureId} error:`, error);
        });

      res.json({ success: true });
--- a/apps/server/src/routes/auto-mode/routes/resume-interrupted.ts
+++ b/apps/server/src/routes/auto-mode/routes/resume-interrupted.ts
@@ -0,0 +1,42 @@
+/**
+ * Resume Interrupted Features Handler
+ *
+ * Checks for features that were interrupted (in pipeline steps or in_progress)
+ * when the server was restarted and resumes them.
+ */
+
+import type { Request, Response } from 'express';
+import { createLogger } from '@automaker/utils';
+import type { AutoModeService } from '../../../services/auto-mode-service.js';
+
+const logger = createLogger('ResumeInterrupted');
+
+interface ResumeInterruptedRequest {
+  projectPath: string;
+}
+
+export function createResumeInterruptedHandler(autoModeService: AutoModeService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    const { projectPath } = req.body as ResumeInterruptedRequest;
+
+    if (!projectPath) {
+      res.status(400).json({ error: 'Project path is required' });
+      return;
+    }
+
+    logger.info(`Checking for interrupted features in ${projectPath}`);
+
+    try {
+      await autoModeService.resumeInterruptedFeatures(projectPath);
+      res.json({
+        success: true,
+        message: 'Resume check completed',
+      });
+    } catch (error) {
+      logger.error('Error resuming interrupted features:', error);
+      res.status(500).json({
+        error: error instanceof Error ? error.message : 'Unknown error',
+      });
+    }
+  };
+}
--- a/apps/server/src/routes/auto-mode/routes/run-feature.ts
+++ b/apps/server/src/routes/auto-mode/routes/run-feature.ts
@@ -26,12 +26,30 @@ export function createRunFeatureHandler(autoModeService: AutoModeService) {
        return;
      }

+      // Check per-worktree capacity before starting
+      const capacity = await autoModeService.checkWorktreeCapacity(projectPath, featureId);
+      if (!capacity.hasCapacity) {
+        const worktreeDesc = capacity.branchName
+          ? `worktree "${capacity.branchName}"`
+          : 'main worktree';
+        res.status(429).json({
+          success: false,
+          error: `Agent limit reached for ${worktreeDesc} (${capacity.currentAgents}/${capacity.maxAgents}). Wait for running tasks to complete or increase the limit.`,
+          details: {
+            currentAgents: capacity.currentAgents,
+            maxAgents: capacity.maxAgents,
+            branchName: capacity.branchName,
+          },
+        });
+        return;
+      }
+
      // Start execution in background
      // executeFeature derives workDir from feature.branchName
      autoModeService
        .executeFeature(projectPath, featureId, useWorktrees ?? false, false)
        .catch((error) => {
-          logger.error(`[AutoMode] Feature ${featureId} error:`, error);
+          logger.error(`Feature ${featureId} error:`, error);
        })
        .finally(() => {
          // Release the starting slot when execution completes (success or error)
--- a/apps/server/src/routes/auto-mode/routes/start.ts
+++ b/apps/server/src/routes/auto-mode/routes/start.ts
@@ -0,0 +1,67 @@
+/**
+ * POST /start endpoint - Start auto mode loop for a project
+ */
+
+import type { Request, Response } from 'express';
+import type { AutoModeService } from '../../../services/auto-mode-service.js';
+import { createLogger } from '@automaker/utils';
+import { getErrorMessage, logError } from '../common.js';
+
+const logger = createLogger('AutoMode');
+
+export function createStartHandler(autoModeService: AutoModeService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const { projectPath, branchName, maxConcurrency } = req.body as {
+        projectPath: string;
+        branchName?: string | null;
+        maxConcurrency?: number;
+      };
+
+      if (!projectPath) {
+        res.status(400).json({
+          success: false,
+          error: 'projectPath is required',
+        });
+        return;
+      }
+
+      // Normalize branchName: undefined becomes null
+      const normalizedBranchName = branchName ?? null;
+      const worktreeDesc = normalizedBranchName
+        ? `worktree ${normalizedBranchName}`
+        : 'main worktree';
+
+      // Check if already running
+      if (autoModeService.isAutoLoopRunningForProject(projectPath, normalizedBranchName)) {
+        res.json({
+          success: true,
+          message: `Auto mode is already running for ${worktreeDesc}`,
+          alreadyRunning: true,
+          branchName: normalizedBranchName,
+        });
+        return;
+      }
+
+      // Start the auto loop for this project/worktree
+      const resolvedMaxConcurrency = await autoModeService.startAutoLoopForProject(
+        projectPath,
+        normalizedBranchName,
+        maxConcurrency
+      );
+
+      logger.info(
+        `Started auto loop for ${worktreeDesc} in project: ${projectPath} with maxConcurrency: ${resolvedMaxConcurrency}`
+      );
+
+      res.json({
+        success: true,
+        message: `Auto mode started with max ${resolvedMaxConcurrency} concurrent features`,
+        branchName: normalizedBranchName,
+      });
+    } catch (error) {
+      logError(error, 'Start auto mode failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/auto-mode/routes/status.ts
+++ b/apps/server/src/routes/auto-mode/routes/status.ts
@@ -1,5 +1,8 @@
 /**
 * POST /status endpoint - Get auto mode status
+ *
+ * If projectPath is provided, returns per-project status including autoloop state.
+ * If no projectPath, returns global status for backward compatibility.
 */

 import type { Request, Response } from 'express';
@@ -9,10 +12,41 @@ import { getErrorMessage, logError } from '../common.js';
 export function createStatusHandler(autoModeService: AutoModeService) {
  return async (req: Request, res: Response): Promise<void> => {
    try {
+      const { projectPath, branchName } = req.body as {
+        projectPath?: string;
+        branchName?: string | null;
+      };
+
+      // If projectPath is provided, return per-project/worktree status
+      if (projectPath) {
+        // Normalize branchName: undefined becomes null
+        const normalizedBranchName = branchName ?? null;
+        const projectStatus = autoModeService.getStatusForProject(
+          projectPath,
+          normalizedBranchName
+        );
+        res.json({
+          success: true,
+          isRunning: projectStatus.runningCount > 0,
+          isAutoLoopRunning: projectStatus.isAutoLoopRunning,
+          runningFeatures: projectStatus.runningFeatures,
+          runningCount: projectStatus.runningCount,
+          maxConcurrency: projectStatus.maxConcurrency,
+          projectPath,
+          branchName: normalizedBranchName,
+        });
+        return;
+      }
+
+      // Fall back to global status for backward compatibility
      const status = autoModeService.getStatus();
+      const activeProjects = autoModeService.getActiveAutoLoopProjects();
+      const activeWorktrees = autoModeService.getActiveAutoLoopWorktrees();
      res.json({
        success: true,
        ...status,
+        activeAutoLoopProjects: activeProjects,
+        activeAutoLoopWorktrees: activeWorktrees,
      });
    } catch (error) {
      logError(error, 'Get status failed');
--- a/apps/server/src/routes/auto-mode/routes/stop.ts
+++ b/apps/server/src/routes/auto-mode/routes/stop.ts
@@ -0,0 +1,66 @@
+/**
+ * POST /stop endpoint - Stop auto mode loop for a project
+ */
+
+import type { Request, Response } from 'express';
+import type { AutoModeService } from '../../../services/auto-mode-service.js';
+import { createLogger } from '@automaker/utils';
+import { getErrorMessage, logError } from '../common.js';
+
+const logger = createLogger('AutoMode');
+
+export function createStopHandler(autoModeService: AutoModeService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const { projectPath, branchName } = req.body as {
+        projectPath: string;
+        branchName?: string | null;
+      };
+
+      if (!projectPath) {
+        res.status(400).json({
+          success: false,
+          error: 'projectPath is required',
+        });
+        return;
+      }
+
+      // Normalize branchName: undefined becomes null
+      const normalizedBranchName = branchName ?? null;
+      const worktreeDesc = normalizedBranchName
+        ? `worktree ${normalizedBranchName}`
+        : 'main worktree';
+
+      // Check if running
+      if (!autoModeService.isAutoLoopRunningForProject(projectPath, normalizedBranchName)) {
+        res.json({
+          success: true,
+          message: `Auto mode is not running for ${worktreeDesc}`,
+          wasRunning: false,
+          branchName: normalizedBranchName,
+        });
+        return;
+      }
+
+      // Stop the auto loop for this project/worktree
+      const runningCount = await autoModeService.stopAutoLoopForProject(
+        projectPath,
+        normalizedBranchName
+      );
+
+      logger.info(
+        `Stopped auto loop for ${worktreeDesc} in project: ${projectPath}, ${runningCount} features still running`
+      );
+
+      res.json({
+        success: true,
+        message: 'Auto mode stopped',
+        runningFeaturesCount: runningCount,
+        branchName: normalizedBranchName,
+      });
+    } catch (error) {
+      logError(error, 'Stop auto mode failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/backlog-plan/common.ts
+++ b/apps/server/src/routes/backlog-plan/common.ts
@@ -0,0 +1,163 @@
+/**
+ * Common utilities for backlog plan routes
+ */
+
+import { createLogger } from '@automaker/utils';
+import { ensureAutomakerDir, getAutomakerDir } from '@automaker/platform';
+import * as secureFs from '../../lib/secure-fs.js';
+import path from 'path';
+import type { BacklogPlanResult } from '@automaker/types';
+
+const logger = createLogger('BacklogPlan');
+
+// State for tracking running generation
+let isRunning = false;
+let currentAbortController: AbortController | null = null;
+let runningDetails: {
+  projectPath: string;
+  prompt: string;
+  model?: string;
+  startedAt: string;
+} | null = null;
+
+const BACKLOG_PLAN_FILENAME = 'backlog-plan.json';
+
+export interface StoredBacklogPlan {
+  savedAt: string;
+  prompt: string;
+  model?: string;
+  result: BacklogPlanResult;
+}
+
+export function getBacklogPlanStatus(): { isRunning: boolean } {
+  return { isRunning };
+}
+
+export function setRunningState(running: boolean, abortController?: AbortController | null): void {
+  isRunning = running;
+  if (!running) {
+    runningDetails = null;
+  }
+  if (abortController !== undefined) {
+    currentAbortController = abortController;
+  }
+}
+
+export function setRunningDetails(
+  details: {
+    projectPath: string;
+    prompt: string;
+    model?: string;
+    startedAt: string;
+  } | null
+): void {
+  runningDetails = details;
+}
+
+export function getRunningDetails(): {
+  projectPath: string;
+  prompt: string;
+  model?: string;
+  startedAt: string;
+} | null {
+  return runningDetails;
+}
+
+function getBacklogPlanPath(projectPath: string): string {
+  return path.join(getAutomakerDir(projectPath), BACKLOG_PLAN_FILENAME);
+}
+
+export async function saveBacklogPlan(projectPath: string, plan: StoredBacklogPlan): Promise<void> {
+  await ensureAutomakerDir(projectPath);
+  const filePath = getBacklogPlanPath(projectPath);
+  await secureFs.writeFile(filePath, JSON.stringify(plan, null, 2), 'utf-8');
+}
+
+export async function loadBacklogPlan(projectPath: string): Promise<StoredBacklogPlan | null> {
+  try {
+    const filePath = getBacklogPlanPath(projectPath);
+    const raw = await secureFs.readFile(filePath, 'utf-8');
+    const parsed = JSON.parse(raw as string) as StoredBacklogPlan;
+    if (!Array.isArray(parsed?.result?.changes)) {
+      return null;
+    }
+    return parsed;
+  } catch {
+    return null;
+  }
+}
+
+export async function clearBacklogPlan(projectPath: string): Promise<void> {
+  try {
+    const filePath = getBacklogPlanPath(projectPath);
+    await secureFs.unlink(filePath);
+  } catch {
+    // ignore missing file
+  }
+}
+
+export function getAbortController(): AbortController | null {
+  return currentAbortController;
+}
+
+/**
+ * Map SDK/CLI errors to user-friendly messages
+ */
+export function mapBacklogPlanError(rawMessage: string): string {
+  // Claude Code spawn failures
+  if (
+    rawMessage.includes('Failed to spawn Claude Code process') ||
+    rawMessage.includes('spawn node ENOENT') ||
+    rawMessage.includes('Claude Code executable not found') ||
+    rawMessage.includes('Claude Code native binary not found')
+  ) {
+    return 'Claude CLI could not be launched. Make sure the Claude CLI is installed and available in PATH, or check that Node.js is correctly installed. Try running "which claude" or "claude --version" in your terminal to verify.';
+  }
+
+  // Claude Code process crash
+  if (rawMessage.includes('Claude Code process exited')) {
+    return 'Claude exited unexpectedly. Try again. If it keeps happening, re-run `claude login` or update your API key in Setup.';
+  }
+
+  // Rate limiting
+  if (rawMessage.toLowerCase().includes('rate limit') || rawMessage.includes('429')) {
+    return 'Rate limited. Please wait a moment and try again.';
+  }
+
+  // Network errors
+  if (
+    rawMessage.toLowerCase().includes('network') ||
+    rawMessage.toLowerCase().includes('econnrefused') ||
+    rawMessage.toLowerCase().includes('timeout')
+  ) {
+    return 'Network error. Check your internet connection and try again.';
+  }
+
+  // Authentication errors
+  if (
+    rawMessage.toLowerCase().includes('not authenticated') ||
+    rawMessage.toLowerCase().includes('unauthorized') ||
+    rawMessage.includes('401')
+  ) {
+    return 'Authentication failed. Please check your API key or run `claude login` to authenticate.';
+  }
+
+  // Return original message for unknown errors
+  return rawMessage;
+}
+
+export function getErrorMessage(error: unknown): string {
+  let rawMessage: string;
+  if (error instanceof Error) {
+    rawMessage = error.message;
+  } else {
+    rawMessage = String(error);
+  }
+  return mapBacklogPlanError(rawMessage);
+}
+
+export function logError(error: unknown, context: string): void {
+  logger.error(`[BacklogPlan] ${context}:`, getErrorMessage(error));
+}
+
+export { logger };
--- a/apps/server/src/routes/backlog-plan/generate-plan.ts
+++ b/apps/server/src/routes/backlog-plan/generate-plan.ts
@@ -0,0 +1,266 @@
+/**
+ * Generate backlog plan using Claude AI
+ *
+ * Model is configurable via phaseModels.backlogPlanningModel in settings
+ * (defaults to Sonnet). Can be overridden per-call via model parameter.
+ */
+
+import type { EventEmitter } from '../../lib/events.js';
+import type { Feature, BacklogPlanResult, BacklogChange, DependencyUpdate } from '@automaker/types';
+import {
+  DEFAULT_PHASE_MODELS,
+  isCursorModel,
+  stripProviderPrefix,
+  type ThinkingLevel,
+} from '@automaker/types';
+import { resolvePhaseModel } from '@automaker/model-resolver';
+import { FeatureLoader } from '../../services/feature-loader.js';
+import { ProviderFactory } from '../../providers/provider-factory.js';
+import { extractJsonWithArray } from '../../lib/json-extractor.js';
+import {
+  logger,
+  setRunningState,
+  setRunningDetails,
+  getErrorMessage,
+  saveBacklogPlan,
+} from './common.js';
+import type { SettingsService } from '../../services/settings-service.js';
+import {
+  getAutoLoadClaudeMdSetting,
+  getPromptCustomization,
+  getPhaseModelWithOverrides,
+} from '../../lib/settings-helpers.js';
+
+const featureLoader = new FeatureLoader();
+
+/**
+ * Format features for the AI prompt
+ */
+function formatFeaturesForPrompt(features: Feature[]): string {
+  if (features.length === 0) {
+    return 'No features in backlog yet.';
+  }
+
+  return features
+    .map((f) => {
+      const deps = f.dependencies?.length ? `Dependencies: [${f.dependencies.join(', ')}]` : '';
+      const priority = f.priority !== undefined ? `Priority: ${f.priority}` : '';
+      return `- ID: ${f.id}
+  Title: ${f.title || 'Untitled'}
+  Description: ${f.description}
+  Category: ${f.category}
+  Status: ${f.status || 'backlog'}
+  ${priority}
+  ${deps}`.trim();
+    })
+    .join('\n\n');
+}
+
+/**
+ * Parse the AI response into a BacklogPlanResult
+ */
+function parsePlanResponse(response: string): BacklogPlanResult {
+  // Use shared JSON extraction utility for robust parsing
+  // extractJsonWithArray validates that 'changes' exists AND is an array
+  const parsed = extractJsonWithArray<BacklogPlanResult>(response, 'changes', {
+    logger,
+  });
+
+  if (parsed) {
+    return parsed;
+  }
+
+  // If parsing fails, log details and return an empty result
+  logger.warn('[BacklogPlan] Failed to parse AI response as JSON');
+  logger.warn('[BacklogPlan] Response text length:', response.length);
+  logger.warn('[BacklogPlan] Response preview:', response.slice(0, 500));
+  if (response.length === 0) {
+    logger.error('[BacklogPlan] Response text is EMPTY! No content was extracted from stream.');
+  }
+  return {
+    changes: [],
+    summary: 'Failed to parse AI response',
+    dependencyUpdates: [],
+  };
+}
+
+/**
+ * Generate a backlog modification plan based on user prompt
+ */
+export async function generateBacklogPlan(
+  projectPath: string,
+  prompt: string,
+  events: EventEmitter,
+  abortController: AbortController,
+  settingsService?: SettingsService,
+  model?: string
+): Promise<BacklogPlanResult> {
+  try {
+    // Load current features
+    const features = await featureLoader.getAll(projectPath);
+
+    events.emit('backlog-plan:event', {
+      type: 'backlog_plan_progress',
+      content: `Loaded ${features.length} features from backlog`,
+    });
+
+    // Load prompts from settings
+    const prompts = await getPromptCustomization(settingsService, '[BacklogPlan]');
+
+    // Build the system prompt
+    const systemPrompt = prompts.backlogPlan.systemPrompt;
+
+    // Build the user prompt from template
+    const currentFeatures = formatFeaturesForPrompt(features);
+    const userPrompt = prompts.backlogPlan.userPromptTemplate
+      .replace('{{currentFeatures}}', currentFeatures)
+      .replace('{{userRequest}}', prompt);
+
+    events.emit('backlog-plan:event', {
+      type: 'backlog_plan_progress',
+      content: 'Generating plan with AI...',
+    });
+
+    // Get the model to use from settings or provided override with provider info
+    let effectiveModel = model;
+    let thinkingLevel: ThinkingLevel | undefined;
+    let claudeCompatibleProvider: import('@automaker/types').ClaudeCompatibleProvider | undefined;
+    let credentials: import('@automaker/types').Credentials | undefined;
+
+    if (effectiveModel) {
+      // Use explicit override - resolve model alias and get credentials
+      const resolved = resolvePhaseModel({ model: effectiveModel });
+      effectiveModel = resolved.model;
+      thinkingLevel = resolved.thinkingLevel;
+      credentials = await settingsService?.getCredentials();
+    } else if (settingsService) {
+      // Use settings-based model with provider info
+      const phaseResult = await getPhaseModelWithOverrides(
+        'backlogPlanningModel',
+        settingsService,
+        projectPath,
+        '[BacklogPlan]'
+      );
+      const resolved = resolvePhaseModel(phaseResult.phaseModel);
+      effectiveModel = resolved.model;
+      thinkingLevel = resolved.thinkingLevel;
+      claudeCompatibleProvider = phaseResult.provider;
+      credentials = phaseResult.credentials;
+    } else {
+      // Fallback to defaults
+      const resolved = resolvePhaseModel(DEFAULT_PHASE_MODELS.backlogPlanningModel);
+      effectiveModel = resolved.model;
+      thinkingLevel = resolved.thinkingLevel;
+    }
+    logger.info(
+      '[BacklogPlan] Using model:',
+      effectiveModel,
+      claudeCompatibleProvider ? `via provider: ${claudeCompatibleProvider.name}` : 'direct API'
+    );
+
+    const provider = ProviderFactory.getProviderForModel(effectiveModel);
+    // Strip provider prefix - providers expect bare model IDs
+    const bareModel = stripProviderPrefix(effectiveModel);
+
+    // Get autoLoadClaudeMd setting
+    const autoLoadClaudeMd = await getAutoLoadClaudeMdSetting(
+      projectPath,
+      settingsService,
+      '[BacklogPlan]'
+    );
+
+    // For Cursor models, we need to combine prompts with explicit instructions
+    // because Cursor doesn't support systemPrompt separation like Claude SDK
+    let finalPrompt = userPrompt;
+    let finalSystemPrompt: string | undefined = systemPrompt;
+
+    if (isCursorModel(effectiveModel)) {
+      logger.info('[BacklogPlan] Using Cursor model - adding explicit no-file-write instructions');
+      finalPrompt = `${systemPrompt}
+
+CRITICAL INSTRUCTIONS:
+1. DO NOT write any files. Return the JSON in your response only.
+2. DO NOT use Write, Edit, or any file modification tools.
+3. Respond with ONLY a JSON object - no explanations, no markdown, just raw JSON.
+4. Your entire response should be valid JSON starting with { and ending with }.
+5. No text before or after the JSON object.
+
+${userPrompt}`;
+      finalSystemPrompt = undefined; // System prompt is now embedded in the user prompt
+    }
+
+    // Execute the query
+    const stream = provider.executeQuery({
+      prompt: finalPrompt,
+      model: bareModel,
+      cwd: projectPath,
+      systemPrompt: finalSystemPrompt,
+      maxTurns: 1,
+      allowedTools: [], // No tools needed for this
+      abortController,
+      settingSources: autoLoadClaudeMd ? ['user', 'project'] : undefined,
+      readOnly: true, // Plan generation only generates text, doesn't write files
+      thinkingLevel, // Pass thinking level for extended thinking
+      claudeCompatibleProvider, // Pass provider for alternative endpoint configuration
+      credentials, // Pass credentials for resolving 'credentials' apiKeySource
+    });
+
+    let responseText = '';
+
+    for await (const msg of stream) {
+      if (abortController.signal.aborted) {
+        throw new Error('Generation aborted');
+      }
+
+      if (msg.type === 'assistant') {
+        if (msg.message?.content) {
+          for (const block of msg.message.content) {
+            if (block.type === 'text') {
+              responseText += block.text;
+            }
+          }
+        }
+      } else if (msg.type === 'result' && msg.subtype === 'success' && msg.result) {
+        // Use result if it's a final accumulated message (from Cursor provider)
+        logger.info('[BacklogPlan] Received result from Cursor, length:', msg.result.length);
+        logger.info('[BacklogPlan] Previous responseText length:', responseText.length);
+        if (msg.result.length > responseText.length) {
+          logger.info('[BacklogPlan] Using Cursor result (longer than accumulated text)');
+          responseText = msg.result;
+        } else {
+          logger.info('[BacklogPlan] Keeping accumulated text (longer than Cursor result)');
+        }
+      }
+    }
+
+    // Parse the response
+    const result = parsePlanResponse(responseText);
+
+    await saveBacklogPlan(projectPath, {
+      savedAt: new Date().toISOString(),
+      prompt,
+      model: effectiveModel,
+      result,
+    });
+
+    events.emit('backlog-plan:event', {
+      type: 'backlog_plan_complete',
+      result,
+    });
+
+    return result;
+  } catch (error) {
+    const errorMessage = getErrorMessage(error);
+    logger.error('[BacklogPlan] Generation failed:', errorMessage);
+
+    events.emit('backlog-plan:event', {
+      type: 'backlog_plan_error',
+      error: errorMessage,
+    });
+
+    throw error;
+  } finally {
+    setRunningState(false, null);
+    setRunningDetails(null);
+  }
+}
--- a/apps/server/src/routes/backlog-plan/index.ts
+++ b/apps/server/src/routes/backlog-plan/index.ts
@@ -0,0 +1,32 @@
+/**
+ * Backlog Plan routes - HTTP API for AI-assisted backlog modification
+ */
+
+import { Router } from 'express';
+import type { EventEmitter } from '../../lib/events.js';
+import { validatePathParams } from '../../middleware/validate-paths.js';
+import { createGenerateHandler } from './routes/generate.js';
+import { createStopHandler } from './routes/stop.js';
+import { createStatusHandler } from './routes/status.js';
+import { createApplyHandler } from './routes/apply.js';
+import { createClearHandler } from './routes/clear.js';
+import type { SettingsService } from '../../services/settings-service.js';
+
+export function createBacklogPlanRoutes(
+  events: EventEmitter,
+  settingsService?: SettingsService
+): Router {
+  const router = Router();
+
+  router.post(
+    '/generate',
+    validatePathParams('projectPath'),
+    createGenerateHandler(events, settingsService)
+  );
+  router.post('/stop', createStopHandler());
+  router.get('/status', validatePathParams('projectPath'), createStatusHandler());
+  router.post('/apply', validatePathParams('projectPath'), createApplyHandler());
+  router.post('/clear', validatePathParams('projectPath'), createClearHandler());
+
+  return router;
+}
--- a/Show More
+++ b/Show More