chore: update .gitignore and remove obsolete automaker files

- Added .automaker/ to .gitignore to prevent tracking of the entire directory. - Deleted outdated files including app_spec.txt, categories.json, memory.md, clean-code.md, and gemini.md from the .automaker context. - Enhanced the mcp-server-factory.js and spec-regeneration-service.js to enforce status management for new features, ensuring they default to "backlog" and clarifying status handling in comments. - Introduced a new file browsing endpoint in fs.ts to improve directory navigation while maintaining security constraints.
2026-01-30 06:12:03 +00:00 · 2025-12-12 17:34:16 -05:00
parent a4c5567768
commit 28bbc3e0e1
10 changed files with 161 additions and 947 deletions
--- a/.automaker/app_spec.txt
+++ b/.automaker/app_spec.txt
@@ -1,202 +0,0 @@
-<project_specification>
-  <project_name>Automaker - Autonomous AI Development Studio</project_name>
-
-  <overview>
-    Automaker is a sophisticated desktop application that empowers developers to build software autonomously through AI-powered agents. Built with Electron and Next.js, it provides an intelligent GUI for project management, feature tracking via Kanban boards, and autonomous code generation. The application leverages multiple AI models (Claude, GPT) and supports complex workflows including git worktree isolation, testing automation, and multi-model agent execution. It acts as a complete development orchestrator, managing the entire lifecycle from specification to verified implementation.
-  </overview>
-
-  <technology_stack>
-    <frontend>
-      <framework>Next.js 16.0.7 (App Router)</framework>
-      <ui_library>shadcn/ui with Radix UI primitives</ui_library>
-      <styling>Tailwind CSS 4.0</styling>
-      <state_management>Zustand with persistence</state_management>
-      <drag_drop>@dnd-kit for Kanban board</drag_drop>
-      <icons>Lucide React</icons>
-      <query_client>TanStack Query for server state</query_client>
-    </frontend>
-    <desktop_shell>
-      <framework>Electron 39.2.6</framework>
-      <language>TypeScript 5.x</language>
-      <inter_process_communication>Electron IPC with security sandboxing</inter_process_communication>
-      <file_system>Node.js fs/promises with path validation</file_system>
-    </desktop_shell>
-    <ai_engine>
-      <primary_model>Claude 3.5 (Opus, Sonnet, Haiku) via Anthropic Claude Agent SDK</primary_model>
-      <secondary_model>GPT-5.1 Codex family via OpenAI CLI</secondary_model>
-      <orchestration>Custom Agent Service with streaming responses</orchestration>
-      <model_registry>Dynamic model provider system with CLI detection</model_registry>
-    </ai_engine>
-    <testing>
-      <framework>Playwright for E2E testing</framework>
-      <unit>Jest/Vitest compatible</unit>
-      <integration>Agent-driven test execution and verification</integration>
-    </testing>
-    <version_control>
-      <system>Git with worktree isolation support</system>
-      <branching>Feature branch management</branching>
-      <workflow>Automated commit and merge capabilities</workflow>
-    </version_control>
-  </technology_stack>
-
-  <core_capabilities>
-    <project_management>
-      - Open and manage multiple local projects
-      - Project-specific themes and configurations
-      - Session management with project context
-      - Recently used project cycling (Q/E shortcuts)
-      - Project search and type-ahead selection
-      - Trash and restore functionality for projects
-    </project_management>
-
-    <intelligent_analysis>
-      - Auto-generation and updating of app_spec.txt
-      - Feature extraction from existing codebases
-      - Technology stack detection and documentation
-      - Project structure analysis with file tree visualization - "Project Ingestion": Analyzes existing codebases to understand structure
-      - Auto-generation of `.automaker/app_spec.txt` based on codebase analysis
-      - Auto-generation of features in `.automaker/features/{id}/feature.json`:
-        - Scans code for implemented features
-        - Creates test cases for existing features
-        - Marks existing features as "passes": true automatically
-    </intelligent_analysis>
-
-    <kanban_workflow>
-      - Visual representation of features from `.automaker/features/` folder
-      - Drag-and-drop interface to reprioritize tasks
-      - direct editing of feature details (steps, description) from the card
-      - Visual Kanban board with drag-and-drop functionality
-      - Multiple status columns: Backlog, In Progress, Waiting Approval, Verified
-      - Feature cards with detailed information display (3 detail levels)
-      - Real-time status updates during agent execution
-      - Search and filtering capabilities
-      - Category management and autocomplete
-      - Image attachment support for feature descriptions
-    </kanban_workflow>
-
-    <autonomous_agent_engine>
-      - Multi-model agent system with profile-based execution
-      - Streaming agent output with real-time logs
-      - Git worktree isolation for safe feature development
-      - Automatic testing and verification workflows
-      - Context-aware prompt generation
-      - Agent memory and learning capabilities
-      - Concurrent feature processing with configurable limits
-      - Follow-up and resume capabilities
-    </autonomous_agent_engine>
-
-    <advanced_workflows>
-      - Git worktree management for isolated development
-      - Feature-specific branching and merging
-      - Automated commit generation with file tracking
-      - Test-driven development support
-      - Code review and approval workflows
-      - Revert and rollback capabilities
-    </advanced_workflows>
-
-    <user_interface>
-      - Dark/Light theme support with 12 custom themes
-      - Per-project theme configurations
-      - Comprehensive keyboard shortcut system
-      - Sidebar navigation with project switching
-      - Multi-view architecture (Board, Spec, Agent, Context, Settings)
-      - Setup wizard for first-time configuration
-      - CLI integration status monitoring
-    </user_interface>
-
-    <extensibility>
-      - AI Profile system for model/thinking level presets
-      - Keyboard shortcut customization
-      - Model provider plugin architecture
-      - Context file management for agent guidance
-      - Feature suggestion generation
-      - Spec regeneration workflows
-    </extensibility>
-  </core_capabilities>
-
-  <ui_layout>
-    <window_structure>
-      - Sidebar: Project List, Settings, Logs, Plugins
-      - Main Content:
-        - **Spec View**: Split editor for `.automaker/app_spec.txt`
-        - **Board View**: Kanban board for `.automaker/features/` folder
-        - **Code View**: Read-only Monaco editor to see what the agent is writing
-        - **Agent View**: Chat-like interface showing agent thought process and tool usage. Also used for the "New Project Interview".
-    </window_structure>
-    <theme>
-      - Dark/Light mode support (system sync)
-      - "Hacker" aesthetic option (terminal-like)
-      - Professional/Clean default
-    </theme>
-  </ui_layout>
-
-  <development_workflow>
-    <local_testing>
-      - "Browser Mode": Run the Next.js frontend in a standard browser with mocked Electron IPC for rapid UI iteration.
-      - "Electron Mode": Full desktop app testing.
-      - Hot Reloading for both Main and Renderer processes.
-    </local_testing>
-  </development_workflow>
-
-  <implemented_features>
-    - Complete Kanban board with drag-and-drop functionality
-    - Multi-model AI agent execution (Claude + GPT/Codex)
-    - Git worktree isolation for features
-    - Real-time agent output streaming and logging
-    - Project management with session persistence
-    - Theme system with 12 themes + per-project themes
-    - Comprehensive settings panel with all configurations
-    - Feature image attachment and context system
-    - Agent profiles with model/thinking level presets
-    - Keyboard shortcut system with customization
-    - CLI integration detection (Claude Code + Codex CLI)
-    - Auto mode for autonomous feature processing
-    - Feature suggestions generation
-    - Spec regeneration and project analysis
-    - Context file management
-    - Chat history and session management
-    - File diff viewing and git integration
-    - Search and filtering across all features
-    - Category management and autocomplete
-    - Test automation and verification workflows
-  </implemented_features>
-
-  <implementation_roadmap>
-    <phase_1_foundation>
-      - Enhanced error handling and recovery mechanisms
-      - Performance optimization for large projects
-      - Improved memory management for long-running sessions
-      - Advanced logging and debugging capabilities
-    </phase_1_foundation>
-    
-    <phase_2_core_logic>
-      - Plugin system for custom model providers
-      - Advanced workflow customization engine
-      - Team collaboration features
-      - Cloud synchronization capabilities
-      - Advanced project templates and scaffolding
-    </phase_2_core_logic>
-
-    <phase_3_kanban_and_interaction>
-      - Build Kanban board with drag-and-drop
-      - Connect Kanban state to `.automaker/features/` filesystem
-      - Implement "Run Feature" capability
-      - Integrate standard prompts library
-    </phase_3_kanban_and_interaction>
-
-    <phase_3_polish>
-      - Enhanced accessibility features
-      - Advanced theme customization
-      - Performance monitoring and analytics
-      - Documentation generation automation
-      - Integration with external development tools
-      - Advanced security auditing and sandboxing
-    </phase_3_polish>
-
-    <phase_4_polish>
-      - Advanced terminal integration
-      - Settings & Extensibility
-      - UI refinement
-    </phase_4_polish>
-  </implementation_roadmap>
-</project_specification>
--- a/.automaker/categories.json
+++ b/.automaker/categories.json
@@ -1,9 +0,0 @@
-[
-  "Agent Runner",
-  "Core",
-  "Kanban",
-  "Other",
-  "Settings",
-  "Uncategorized",
-  "ka"
-]
--- a/.automaker/context/clean-code.md
+++ b/.automaker/context/clean-code.md
@@ -1,474 +0,0 @@
-# Clean Code Guidelines
-
-## Overview
-
-This document serves as a comprehensive guide for writing clean, maintainable, and extensible code. It outlines principles and practices that ensure code quality, reusability, and long-term maintainability. When writing or reviewing code, follow these guidelines to create software that is easy to understand, modify, and extend. This file is used by LLMs to understand and enforce coding standards throughout the codebase.
-
---
-
-## Core Principles
-
-### 1. DRY (Don't Repeat Yourself)
-
-**Principle**: Every piece of knowledge should have a single, unambiguous representation within a system.
-
-**Practices**:
-
- Extract repeated logic into reusable functions, classes, or modules
- Use constants for repeated values
- Create shared utilities for common operations
- Avoid copy-pasting code blocks
- When you find yourself writing similar code more than twice, refactor it
-
-**Example - Bad**:
-
-```typescript
-// Repeated validation logic
-if (email.includes("@") && email.length > 5) {
-  // ...
-}
-if (email.includes("@") && email.length > 5) {
-  // ...
-}
-```
-
-**Example - Good**:
-
-```typescript
-function isValidEmail(email: string): boolean {
-  return email.includes("@") && email.length > 5;
-}
-
-if (isValidEmail(email)) {
-  // ...
-}
-```
-
---
-
-### 2. Code Reusability
-
-**Principle**: Write code that can be used in multiple contexts without modification or with minimal adaptation.
-
-**Practices**:
-
- Create generic, parameterized functions instead of specific ones
- Use composition over inheritance where appropriate
- Design functions to be pure (no side effects) when possible
- Create utility libraries for common operations
- Use dependency injection to make components reusable
- Design APIs that are flexible and configurable
-
-**Example - Bad**:
-
-```typescript
-function calculateUserTotal(userId: string) {
-  const user = getUser(userId);
-  return user.items.reduce((sum, item) => sum + item.price, 0);
-}
-```
-
-**Example - Good**:
-
-```typescript
-function calculateTotal<T extends { price: number }>(items: T[]): number {
-  return items.reduce((sum, item) => sum + item.price, 0);
-}
-
-function calculateUserTotal(userId: string) {
-  const user = getUser(userId);
-  return calculateTotal(user.items);
-}
-```
-
---
-
-### 3. Abstract Functions and Abstractions
-
-**Principle**: Create abstractions that hide implementation details and provide clear, simple interfaces.
-
-**Practices**:
-
- Use interfaces and abstract classes to define contracts
- Create abstraction layers between different concerns
- Hide complex implementation behind simple function signatures
- Use dependency inversion - depend on abstractions, not concretions
- Create factory functions/classes for object creation
- Use strategy pattern for interchangeable algorithms
-
-**Example - Bad**:
-
-```typescript
-function processPayment(amount: number, cardNumber: string, cvv: string) {
-  // Direct implementation tied to specific payment processor
-  fetch("https://stripe.com/api/charge", {
-    method: "POST",
-    body: JSON.stringify({ amount, cardNumber, cvv }),
-  });
-}
-```
-
-**Example - Good**:
-
-```typescript
-interface PaymentProcessor {
-  processPayment(
-    amount: number,
-    details: PaymentDetails
-  ): Promise<PaymentResult>;
-}
-
-class StripeProcessor implements PaymentProcessor {
-  async processPayment(
-    amount: number,
-    details: PaymentDetails
-  ): Promise<PaymentResult> {
-    // Implementation
-  }
-}
-
-function processPayment(
-  processor: PaymentProcessor,
-  amount: number,
-  details: PaymentDetails
-) {
-  return processor.processPayment(amount, details);
-}
-```
-
---
-
-### 4. Extensibility
-
-**Principle**: Design code that can be easily extended with new features without modifying existing code.
-
-**Practices**:
-
- Follow the Open/Closed Principle: open for extension, closed for modification
- Use plugin architectures and hooks for extensibility
- Design with future requirements in mind (but don't over-engineer)
- Use configuration over hardcoding
- Create extension points through interfaces and callbacks
- Use composition and dependency injection
- Design APIs that can accommodate new parameters/options
-
-**Example - Bad**:
-
-```typescript
-function sendNotification(user: User, type: string) {
-  if (type === "email") {
-    sendEmail(user.email);
-  } else if (type === "sms") {
-    sendSMS(user.phone);
-  }
-  // Adding new notification types requires modifying this function
-}
-```
-
-**Example - Good**:
-
-```typescript
-interface NotificationChannel {
-  send(user: User): Promise<void>;
-}
-
-class EmailChannel implements NotificationChannel {
-  async send(user: User): Promise<void> {
-    // Implementation
-  }
-}
-
-class SMSChannel implements NotificationChannel {
-  async send(user: User): Promise<void> {
-    // Implementation
-  }
-}
-
-class NotificationService {
-  constructor(private channels: NotificationChannel[]) {}
-
-  async send(user: User): Promise<void> {
-    await Promise.all(this.channels.map((channel) => channel.send(user)));
-  }
-}
-// New notification types can be added without modifying existing code
-```
-
---
-
-### 5. Avoid Magic Numbers and Strings
-
-**Principle**: Use named constants instead of hardcoded values to improve readability and maintainability.
-
-**Practices**:
-
- Extract all magic numbers into named constants
- Use enums for related constants
- Create configuration objects for settings
- Use constants for API endpoints, timeouts, limits, etc.
- Document why specific values are used
-
-**Example - Bad**:
-
-```typescript
-if (user.age >= 18) {
-  // What does 18 mean?
-}
-
-setTimeout(() => {
-  // What does 3000 mean?
-}, 3000);
-
-if (status === "active") {
-  // What are the valid statuses?
-}
-```
-
-**Example - Good**:
-
-```typescript
-const MINIMUM_AGE_FOR_ADULTS = 18;
-const SESSION_TIMEOUT_MS = 3000;
-
-enum UserStatus {
-  ACTIVE = "active",
-  INACTIVE = "inactive",
-  SUSPENDED = "suspended",
-}
-
-if (user.age >= MINIMUM_AGE_FOR_ADULTS) {
-  // Clear intent
-}
-
-setTimeout(() => {
-  // Clear intent
-}, SESSION_TIMEOUT_MS);
-
-if (status === UserStatus.ACTIVE) {
-  // Type-safe and clear
-}
-```
-
---
-
-## Additional Best Practices
-
-### 6. Single Responsibility Principle
-
-Each function, class, or module should have one reason to change.
-
-**Example**:
-
-```typescript
-// Bad: Multiple responsibilities
-class User {
-  save() {
-    /* database logic */
-  }
-  sendEmail() {
-    /* email logic */
-  }
-  validate() {
-    /* validation logic */
-  }
-}
-
-// Good: Single responsibility
-class User {
-  validate() {
-    /* validation only */
-  }
-}
-
-class UserRepository {
-  save(user: User) {
-    /* database logic */
-  }
-}
-
-class EmailService {
-  sendToUser(user: User) {
-    /* email logic */
-  }
-}
-```
-
-### 7. Meaningful Names
-
- Use descriptive names that reveal intent
- Avoid abbreviations unless they're widely understood
- Use verbs for functions, nouns for classes
- Be consistent with naming conventions
-
-**Example**:
-
-```typescript
-// Bad
-const d = new Date();
-const u = getUser();
-function calc(x, y) {}
-
-// Good
-const currentDate = new Date();
-const currentUser = getUser();
-function calculateTotal(price: number, quantity: number): number {}
-```
-
-### 8. Small Functions
-
- Functions should do one thing and do it well
- Keep functions short (ideally under 20 lines)
- Extract complex logic into separate functions
- Use descriptive function names instead of comments
-
-### 9. Error Handling
-
- Handle errors explicitly
- Use appropriate error types
- Provide meaningful error messages
- Don't swallow errors silently
- Use try-catch appropriately
-
-**Example**:
-
-```typescript
-// Bad
-function divide(a: number, b: number) {
-  return a / b; // Can throw division by zero
-}
-
-// Good
-function divide(a: number, b: number): number {
-  if (b === 0) {
-    throw new Error("Division by zero is not allowed");
-  }
-  return a / b;
-}
-```
-
-### 10. Comments and Documentation
-
- Write self-documenting code (code should explain itself)
- Use comments to explain "why", not "what"
- Document complex algorithms or business logic
- Keep comments up-to-date with code changes
- Use JSDoc/TSDoc for public APIs
-
-### 11. Type Safety
-
- Use TypeScript types/interfaces effectively
- Avoid `any` type unless absolutely necessary
- Use union types and discriminated unions
- Leverage type inference where appropriate
- Create custom types for domain concepts
-
-**Example**:
-
-```typescript
-// Bad
-function processUser(data: any) {
-  return data.name;
-}
-
-// Good
-interface User {
-  id: string;
-  name: string;
-  email: string;
-}
-
-function processUser(user: User): string {
-  return user.name;
-}
-```
-
-### 12. Testing Considerations
-
- Write testable code (pure functions, dependency injection)
- Keep functions small and focused
- Avoid hidden dependencies
- Use mocks and stubs appropriately
- Design for testability from the start
-
-### 13. Performance vs. Readability
-
- Prefer readability over premature optimization
- Profile before optimizing
- Use clear algorithms first, optimize if needed
- Document performance-critical sections
- Balance between clean code and performance requirements
-
-### 14. Code Organization
-
- Group related functionality together
- Use modules/packages to organize code
- Follow consistent file and folder structures
- Separate concerns (UI, business logic, data access)
- Use barrel exports (index files) appropriately
-
-### 15. Configuration Management
-
- Externalize configuration values
- Use environment variables for environment-specific settings
- Create configuration objects/interfaces
- Validate configuration at startup
- Provide sensible defaults
-
-**Example**:
-
-```typescript
-// Bad
-const apiUrl = "https://api.example.com";
-const timeout = 5000;
-
-// Good
-interface Config {
-  apiUrl: string;
-  timeout: number;
-  maxRetries: number;
-}
-
-const config: Config = {
-  apiUrl: process.env.API_URL || "https://api.example.com",
-  timeout: parseInt(process.env.TIMEOUT || "5000"),
-  maxRetries: parseInt(process.env.MAX_RETRIES || "3"),
-};
-```
-
---
-
-## Code Review Checklist
-
-When reviewing code, check for:
-
- [ ] No code duplication (DRY principle)
- [ ] Meaningful variable and function names
- [ ] No magic numbers or strings
- [ ] Functions are small and focused
- [ ] Proper error handling
- [ ] Type safety maintained
- [ ] Code is testable
- [ ] Documentation where needed
- [ ] Consistent code style
- [ ] Proper abstraction levels
- [ ] Extensibility considered
- [ ] Single responsibility principle followed
-
---
-
-## Summary
-
-Clean code is:
-
- **Readable**: Easy to understand at a glance
- **Maintainable**: Easy to modify and update
- **Testable**: Easy to write tests for
- **Extensible**: Easy to add new features
- **Reusable**: Can be used in multiple contexts
- **Well-documented**: Clear intent and purpose
- **Type-safe**: Leverages type system effectively
- **DRY**: No unnecessary repetition
- **Abstracted**: Proper separation of concerns
- **Configurable**: Uses constants and configuration over hardcoding
-
-Remember: Code is read far more often than it is written. Write code for your future self and your teammates.
--- a/.automaker/context/gemini.md
+++ b/.automaker/context/gemini.md
@@ -1,70 +0,0 @@
-You are a very strong reasoner and planner. Use these critical instructions to structure your plans, thoughts, and responses.
-
-Before taking any action (either tool calls or responses to the user), you must proactively, methodically, and independently plan and reason about:
-
-1. Logical dependencies and constraints:
-
-Analyze the intended action against the following factors. Resolve conflicts in order of importance:
-
-1.1) Policy-based rules, mandatory prerequisites, and constraints.
-1.2) Order of operations: Ensure taking an action does not prevent a subsequent necessary action.
- 1.2.1) The user may request actions in a random order, but you may need to reorder operations to maximize successful completion of the task.
-1.3) Other prerequisites (information and/or actions needed).
-1.4) Explicit user constraints or preferences.
-
-2. Risk assessment:
-
-What are the consequences of taking the action? Will the new state cause any future issues?
-
-2.1) For exploratory tasks (like searches), missing optional parameters is a LOW risk.
-Prefer calling the tool with the available information over asking the user, unless your Rule 1 (Logical Dependencies) reasoning determines that optional information is required for a later step in your plan.
-
-3. Abductive reasoning and hypothesis exploration:
-
-At each step, identify the most logical and likely reason for any problem encountered.
-
-3.1) Look beyond immediate or obvious causes. The most likely reason may not be the simplest and may require deeper inference.
-3.2) Hypotheses may require additional research. Each hypothesis may take multiple steps to test.
-3.3) Prioritize hypotheses based on likelihood, but do not discard less likely ones prematurely. A low-probability event may still be the root cause.
-
-4. Outcome evaluation and adaptability:
-
-Does the previous observation require any changes to your plan?
-
-4.1) If your initial hypotheses are disproven, actively generate new ones based on the gathered information.
-
-5. Information availability:
-
-Incorporate all applicable and alternative sources of information, including:
-
-5.1) Using available tools and their capabilities
-5.2) All policies, rules, checklists, and constraints
-5.3) Previous observations and conversation history
-5.4) Information only available by asking the user
-
-6. Precision and Grounding:
-
-Ensure your reasoning is extremely precise and relevant to each exact ongoing situation.
-
-6.1) Verify your claims by quoting the exact applicable information (including policies) when referring to them.
-
-7. Completeness:
-
-Ensure that all requirements, constraints, options, and preferences are exhaustively incorporated into your plan.
-
-7.1) Resolve conflicts using the order of importance in #1.
-7.2) Avoid premature conclusions: There may be multiple relevant options for a given situation.
- 7.2.1) To check for whether an option is relevant, reason about all information sources from #5.
- 7.2.2) You may need to consult the user to even know whether something is applicable. Do not assume it is not applicable without checking.
-7.3) Review applicable sources of information from #5 to confirm which are relevant to the current state.
-
-8. Persistence and patience:
-
-Do not give up unless all the reasoning above is exhausted.
-
-8.1) Don't be dissuaded by time taken or user frustration.
-8.2) This persistence must be intelligent: On transient errors (e.g. please try again), you must retry unless an explicit retry limit (e.g., max x tries) has been reached. If such a limit is hit, you must stop. On other errors, you must change your strategy or arguments, not repeat the same failed call.
-
-9. Inhibit your response:
-
-Only take an action after all the above reasoning is completed. Once you've taken an action, you cannot take it back.
--- a/.automaker/memory.md
+++ b/.automaker/memory.md
@@ -1,172 +0,0 @@
-# Agent Memory - Lessons Learned
-
-This file documents issues encountered by previous agents and their solutions. Read this before starting work to avoid repeating mistakes.
-
-## Testing Issues
-
-### Issue: Mock project setup not navigating to board view
-
-**Problem:** Setting `currentProject` in localStorage didn't automatically show the board view - app stayed on welcome view.
-**Fix:** The `currentView` state is not persisted in localStorage. Instead of trying to set it, have tests click on the recent project from the welcome view to trigger `setCurrentProject()` which handles the view transition properly.
-
-```typescript
-// Don't do this:
-await setupMockProject(page); // Sets localStorage
-await page.goto("/");
-await waitForElement(page, "board-view"); // ❌ Fails - still on welcome view
-
-// Do this instead:
-await setupMockProject(page);
-await page.goto("/");
-await waitForElement(page, "welcome-view");
-const recentProject = page.locator(
-  '[data-testid="recent-project-test-project-1"]'
-);
-await recentProject.click(); // ✅ Triggers proper view transition
-await waitForElement(page, "board-view");
-```
-
-### Issue: View output button test IDs are conditional
-
-**Problem:** Tests failed looking for `view-output-inprogress-${featureId}` when the actual button had `view-output-${featureId}`.
-**Fix:** The button test ID depends on whether the feature is actively running:
-
- `view-output-${featureId}` - shown when feature is in `runningAutoTasks` (actively running)
- `view-output-inprogress-${featureId}` - shown when status is "in_progress" but NOT actively running
-
-After dragging a feature to in_progress, wait for the `auto_mode_feature_start` event to fire before looking for the button:
-
-```typescript
-// Wait for feature to start running
-const viewOutputButton = page
-  .locator(
-    `[data-testid="view-output-${featureId}"], [data-testid="view-output-inprogress-${featureId}"]`
-  )
-  .first();
-await expect(viewOutputButton).toBeVisible({ timeout: 8000 });
-```
-
-### Issue: Elements not appearing due to async event timing
-
-**Problem:** Tests checked for UI elements before async events (like `auto_mode_feature_start`) had fired and updated the UI.
-**Fix:** Add appropriate timeouts when waiting for elements that depend on async events. The mock auto mode takes ~2.4 seconds to complete, so allow sufficient time:
-
-```typescript
-// Mock auto mode timing: ~2.4s + 1.5s delay = ~4s total
-await waitForAgentOutputModalHidden(page, { timeout: 10000 });
-```
-
-### Issue: Slider interaction testing
-
-**Problem:** Clicking on slider track didn't reliably set specific values.
-**Fix:** Use the slider's keyboard interaction or calculate the exact click position on the track. For max value, click on the rightmost edge of the track.
-
-### Issue: Port binding blocked in sandbox mode
-
-**Problem:** Playwright tests couldn't bind to port in sandbox mode.
-**Fix:** Tests don't need sandbox disabled - the issue was TEST_REUSE_SERVER environment variable. Make sure to start the dev server separately or let Playwright's webServer config handle it.
-
-## Code Architecture
-
-### Issue: Understanding store state persistence
-
-**Problem:** Not all store state is persisted to localStorage.
-**Fix:** Check the `partialize` function in `app-store.ts` to see which state is persisted:
-
-```typescript
-partialize: (state) => ({
-  projects: state.projects,
-  currentProject: state.currentProject,
-  theme: state.theme,
-  sidebarOpen: state.sidebarOpen,
-  apiKeys: state.apiKeys,
-  chatSessions: state.chatSessions,
-  chatHistoryOpen: state.chatHistoryOpen,
-  maxConcurrency: state.maxConcurrency, // Added for concurrency feature
-});
-```
-
-Note: `currentView` is NOT persisted - it's managed through actions.
-
-### Issue: Auto mode task lifecycle
-
-**Problem:** Confusion about when features are considered "running" vs "in_progress".
-**Fix:** Understand the task lifecycle:
-
-1. Feature dragged to "in_progress" column → status becomes "in_progress"
-2. `auto_mode_feature_start` event fires → feature added to `runningAutoTasks`
-3. Agent works on feature → periodic events sent
-4. `auto_mode_feature_complete` event fires → feature removed from `runningAutoTasks`
-5. If `passes: true` → status becomes "verified", if `passes: false` → stays "in_progress"
-
-### Issue: waiting_approval features not draggable when skipTests=true
-
-**Problem:** Features in `waiting_approval` status couldn't be dragged to `verified` column, even though the code appeared to handle it.
-**Fix:** The order of condition checks in `handleDragEnd` matters. The `skipTests` check was catching `waiting_approval` features before the `waiting_approval` status check could handle them. Move the `waiting_approval` status check **before** the `skipTests` check in `board-view.tsx`:
-
-```typescript
-// Correct order in handleDragEnd:
-if (draggedFeature.status === "backlog") {
-  // ...
-} else if (draggedFeature.status === "waiting_approval") {
-  // Handle waiting_approval BEFORE skipTests check
-  // because waiting_approval features often have skipTests=true
-} else if (draggedFeature.skipTests) {
-  // Handle other skipTests features
-}
-```
-
-## Best Practices Discovered
-
-### Testing utilities are critical
-
-Create comprehensive testing utilities in `tests/utils.ts` to avoid repeating selector logic:
-
- `waitForElement` - waits for elements to appear
- `waitForElementHidden` - waits for elements to disappear
- `setupMockProject` - sets up mock localStorage state
- `navigateToBoard` - handles navigation from welcome to board view
-
-### Always add data-testid attributes
-
-When implementing features, immediately add `data-testid` attributes to key UI elements. This makes tests more reliable and easier to write.
-
-### Test timeouts should be generous but not excessive
-
- Default timeout: 30s (set in playwright.config.ts)
- Element waits: 5-15s for critical elements
- Auto mode completion: 10s (accounts for ~4s mock duration)
- Don't increase timeouts past 10s for individual operations
-
-### Mock auto mode timing
-
-The mock auto mode in `electron.ts` has predictable timing:
-
- Total duration: ~2.4 seconds (300+500+300+300+500+500ms)
- Plus 1.5s delay before auto-closing modals
- Total: ~4 seconds from start to completion
-
-### Issue: HotkeyButton conflicting with useKeyboardShortcuts
-
-**Problem:** Adding `HotkeyButton` with a simple key (like "N") to buttons that already had keyboard shortcuts registered via `useKeyboardShortcuts` caused the hotkey to stop working. Both registered duplicate listeners, and the HotkeyButton's `stopPropagation()` call could interfere.
-**Fix:** When a simple single-key hotkey is already handled by `useKeyboardShortcuts`, set `hotkeyActive={false}` on the `HotkeyButton` so it only displays the indicator badge without registering a duplicate listener:
-
-```tsx
-// In views that already use useKeyboardShortcuts for the "N" key:
-<HotkeyButton
-  onClick={() => setShowAddDialog(true)}
-  hotkey={shortcuts.addFeature}
-  hotkeyActive={false}  // <-- Important! Prevents duplicate listener
->
-  Add Feature
-</HotkeyButton>
-
-// HotkeyButton should only actively listen when it's the sole handler (e.g., Cmd+Enter in dialogs)
-<HotkeyButton
-  onClick={handleSubmit}
-  hotkey={{ key: "Enter", cmdCtrl: true }}
-  hotkeyActive={isDialogOpen}  // Active when dialog is open
->
-  Submit
-</HotkeyButton>
-```
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ dist/
 .next/
 node_modules
 .automaker/images/
+.automaker/
+/.automaker/*
+/.automaker/
--- a/apps/app/electron/services/mcp-server-factory.js
+++ b/apps/app/electron/services/mcp-server-factory.js
@@ -22,7 +22,7 @@ class McpServerFactory {
          "Create or update a feature. Use this tool to create new features with detailed information or update existing feature status. When creating features, provide comprehensive description, category, and implementation steps.",
          {
            featureId: z.string().describe("The ID of the feature (lowercase, hyphens for spaces). Example: 'user-authentication', 'budget-tracking'"),
-            status: z.enum(["backlog", "todo", "in_progress", "verified"]).describe("The status for the feature. Use 'backlog' or 'todo' for new features."),
+            status: z.enum(["backlog", "todo", "in_progress", "verified"]).describe("The status for the feature. For NEW features, ONLY use 'backlog' or 'verified'. NEVER use 'in_progress' for new features - the user will manually start them."),
            summary: z.string().optional().describe("A brief summary of what was implemented/changed or what the feature does."),
            description: z.string().optional().describe("A detailed description of the feature. Be comprehensive - explain what the feature does, its purpose, and key functionality."),
            category: z.string().optional().describe("The category/phase for this feature. Example: 'Phase 1: Foundation', 'Phase 2: Core Logic', 'Phase 3: Polish', 'Authentication', 'UI/UX'"),
@@ -38,14 +38,16 @@ class McpServerFactory {
              const feature = features.find((f) => f.id === args.featureId);

              if (!feature) {
-                console.log(`[Feature Creation] Feature ${args.featureId} not found - this might be a new feature being created`);
-                // This might be a new feature - try to proceed anyway
+                console.log(`[Feature Creation] Feature ${args.featureId} not found - this is a new feature being created`);
+                // This is a new feature - enforce backlog status for any non-verified features
              }

              // If agent tries to mark as verified but feature has skipTests=true, convert to waiting_approval
              let finalStatus = args.status;
-              // Convert 'todo' to 'backlog' for consistency, but only for new features
-              if (!feature && finalStatus === "todo") {
+              // For NEW features: Convert 'todo' or 'in_progress' to 'backlog' for consistency
+              // New features should ALWAYS go to backlog first, user must manually start them
+              if (!feature && (finalStatus === "todo" || finalStatus === "in_progress")) {
+                console.log(`[Feature Creation] New feature ${args.featureId} - converting "${finalStatus}" to "backlog" (user must manually start features)`);
                finalStatus = "backlog";
              }
              if (feature && args.status === "verified" && feature.skipTests === true) {
--- a/apps/app/electron/services/spec-regeneration-service.js
+++ b/apps/app/electron/services/spec-regeneration-service.js
@@ -390,15 +390,13 @@ class SpecRegenerationService {
 3. For EACH feature in the implementation_roadmap:
   - Determine if it's ALREADY IMPLEMENTED (fully or partially)
   - If fully implemented: Create with status "verified" and note what's done
-   - If partially implemented: Create with status "in_progress" and note remaining work
-   - If not started: Create with status "backlog"
+   - If partially implemented OR not started: Create with status "backlog" and note what still needs to be done

 **IMPORTANT - For each feature you MUST provide:**
 - **featureId**: A descriptive ID (lowercase, hyphens for spaces). Example: "user-authentication", "budget-tracking"
 - **status**:
-  - "verified" if feature is fully implemented in the codebase
-  - "in_progress" if partially implemented
-  - "backlog" if not yet started
+  - "verified" ONLY if feature is 100% fully implemented in the codebase
+  - "backlog" for ALL features that need ANY work (partial or not started) - the user will manually start these
 - **description**: A DETAILED description (2-4 sentences) explaining what the feature does, its purpose, and key functionality
 - **category**: The phase from the roadmap (e.g., "Phase 1: Foundation", "Phase 2: Core Logic", "Phase 3: Polish")
 - **steps**: An array of 4-8 clear, actionable implementation steps. For verified features, these are what WAS done. For backlog, these are what NEEDS to be done.
@@ -407,10 +405,12 @@ class SpecRegenerationService {
 **Example of analyzing existing code:**
 If you find NextAuth.js configured in the codebase with working login pages, the user-authentication feature should be "verified" not "backlog".

-**Example of a well-defined feature:**
+**IMPORTANT: NEVER use "in_progress" status when creating features. Only use "verified" or "backlog".**
+
+**Example of a well-defined feature (verified - fully complete):**
 {
  "featureId": "user-authentication",
-  "status": "verified",  // Because we found it's already implemented
+  "status": "verified",  // Because we found it's 100% already implemented
  "description": "Secure user authentication system with email/password login and session management. Already implemented using NextAuth.js with email provider.",
  "category": "Phase 1: Foundation",
  "steps": [
@@ -422,6 +422,21 @@ If you find NextAuth.js configured in the codebase with working login pages, the
  "summary": "Authentication implemented with NextAuth.js email provider"
 }

+**Example of a feature that needs work (backlog):**
+{
+  "featureId": "user-profile",
+  "status": "backlog",  // Needs work - user will manually start this
+  "description": "User profile page where users can view and edit their account settings, change password, and manage preferences.",
+  "category": "Phase 2: Core Features",
+  "steps": [
+    "Create profile page component",
+    "Add form for editing user details",
+    "Implement password change functionality",
+    "Add avatar upload feature"
+  ],
+  "summary": "User profile management - needs implementation"
+}
+
 **Feature Storage:**
 Features are stored in .automaker/features/{id}/feature.json - each feature has its own folder.
 Use the UpdateFeatureStatus tool to create features with ALL the fields above.`,
@@ -453,13 +468,15 @@ Use the UpdateFeatureStatus tool to create features with ALL the fields above.`,
 2. **Then, read .automaker/app_spec.txt** to see the implementation roadmap

 3. **For EACH feature in the roadmap, determine its status:**
-   - Is it ALREADY IMPLEMENTED in the codebase? → status: "verified"
-   - Is it PARTIALLY IMPLEMENTED? → status: "in_progress"  
-   - Is it NOT STARTED? → status: "backlog"
+   - Is it 100% FULLY IMPLEMENTED in the codebase? → status: "verified"
+   - Is it PARTIALLY IMPLEMENTED or NOT STARTED? → status: "backlog"
+
+   **CRITICAL: NEVER use "in_progress" status. Only use "verified" or "backlog".**
+   The user will manually move features from backlog to in_progress when they want to start working on them.

 4. **Create each feature with UpdateFeatureStatus including ALL fields:**
   - featureId: Descriptive ID (lowercase, hyphens)
-   - status: "verified", "in_progress", or "backlog" based on your analysis
+   - status: "verified" or "backlog" ONLY (never in_progress)
   - description: 2-4 sentences explaining the feature
   - category: The phase name from the roadmap
   - steps: Array of 4-8 implementation steps
--- a/apps/server/src/routes/fs.ts
+++ b/apps/server/src/routes/fs.ts
@@ -5,6 +5,7 @@

 import { Router, type Request, type Response } from "express";
 import fs from "fs/promises";
+import os from "os";
 import path from "path";
 import { validatePath, addAllowedPath, isPathAllowed } from "../lib/security.js";
 import type { EventEmitter } from "../lib/events.js";
@@ -422,5 +423,123 @@ export function createFsRoutes(_events: EventEmitter): Router {
    }
  });

+  // Browse directories for file picker
+  // SECURITY: Restricted to home directory, allowed paths, and drive roots on Windows
+  router.post("/browse", async (req: Request, res: Response) => {
+    try {
+      const { dirPath } = req.body as { dirPath?: string };
+      const homeDir = os.homedir();
+
+      // Detect available drives on Windows
+      const detectDrives = async (): Promise<string[]> => {
+        if (os.platform() !== "win32") {
+          return [];
+        }
+
+        const drives: string[] = [];
+        const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+        for (const letter of letters) {
+          const drivePath = `${letter}:\\`;
+          try {
+            await fs.access(drivePath);
+            drives.push(drivePath);
+          } catch {
+            // Drive doesn't exist, skip it
+          }
+        }
+
+        return drives;
+      };
+
+      // Check if a path is safe to browse
+      const isSafePath = (targetPath: string): boolean => {
+        const resolved = path.resolve(targetPath);
+        const normalizedHome = path.resolve(homeDir);
+
+        // Allow browsing within home directory
+        if (resolved === normalizedHome || resolved.startsWith(normalizedHome + path.sep)) {
+          return true;
+        }
+
+        // Allow browsing already-allowed paths
+        if (isPathAllowed(resolved)) {
+          return true;
+        }
+
+        // On Windows, allow drive roots for initial navigation
+        if (os.platform() === "win32") {
+          const driveRootMatch = /^[A-Z]:\\$/i.test(resolved);
+          if (driveRootMatch) {
+            return true;
+          }
+        }
+
+        // On Unix, allow root for initial navigation (but only list, not read files)
+        if (os.platform() !== "win32" && resolved === "/") {
+          return true;
+        }
+
+        return false;
+      };
+
+      // Default to home directory if no path provided
+      const targetPath = dirPath ? path.resolve(dirPath) : homeDir;
+
+      // Security check: validate the path is safe to browse
+      if (!isSafePath(targetPath)) {
+        res.status(403).json({
+          success: false,
+          error: "Access denied: browsing is restricted to your home directory and allowed project paths",
+        });
+        return;
+      }
+
+      try {
+        const stats = await fs.stat(targetPath);
+
+        if (!stats.isDirectory()) {
+          res.status(400).json({ success: false, error: "Path is not a directory" });
+          return;
+        }
+
+        // Read directory contents
+        const entries = await fs.readdir(targetPath, { withFileTypes: true });
+
+        // Filter for directories only and exclude hidden directories
+        const directories = entries
+          .filter((entry) => entry.isDirectory() && !entry.name.startsWith("."))
+          .map((entry) => ({
+            name: entry.name,
+            path: path.join(targetPath, entry.name),
+          }))
+          .sort((a, b) => a.name.localeCompare(b.name));
+
+        // Get parent directory (only if parent is also safe to browse)
+        const parentPath = path.dirname(targetPath);
+        const hasParent = parentPath !== targetPath && isSafePath(parentPath);
+
+        // Get available drives on Windows
+        const drives = await detectDrives();
+
+        res.json({
+          success: true,
+          currentPath: targetPath,
+          parentPath: hasParent ? parentPath : null,
+          directories,
+          drives,
+        });
+      } catch (error) {
+        res.status(400).json({
+          success: false,
+          error: error instanceof Error ? error.message : "Failed to read directory",
+        });
+      }
+    } catch (error) {
+      const message = error instanceof Error ? error.message : "Unknown error";
+      res.status(500).json({ success: false, error: message });
+    }
+  });
+
  return router;
 }
--- a/apps/server/src/routes/spec-regeneration.ts
+++ b/apps/server/src/routes/spec-regeneration.ts
@@ -255,7 +255,7 @@ Format your response as markdown. Be specific and actionable.`;

  // Save spec
  const specDir = path.join(projectPath, ".automaker");
-  const specPath = path.join(specDir, "project-spec.md");
+  const specPath = path.join(specDir, "app_spec.txt");

  await fs.mkdir(specDir, { recursive: true });
  await fs.writeFile(specPath, responseText);
@@ -278,7 +278,7 @@ async function generateFeaturesFromSpec(
  abortController: AbortController
 ) {
  // Read existing spec
-  const specPath = path.join(projectPath, ".automaker", "project-spec.md");
+  const specPath = path.join(projectPath, ".automaker", "app_spec.txt");
  let spec: string;

  try {
@@ -382,7 +382,7 @@ async function parseAndCreateFeatures(
        id: feature.id,
        title: feature.title,
        description: feature.description,
-        status: "pending",
+        status: "backlog",  // Features go to backlog - user must manually start them
        priority: feature.priority || 2,
        complexity: feature.complexity || "moderate",
        dependencies: feature.dependencies || [],