chore: update .gitignore and remove obsolete automaker files

- Added .automaker/ to .gitignore to prevent tracking of the entire directory.
- Deleted outdated files including app_spec.txt, categories.json, memory.md, clean-code.md, and gemini.md from the .automaker context.
- Enhanced the mcp-server-factory.js and spec-regeneration-service.js to enforce status management for new features, ensuring they default to "backlog" and clarifying status handling in comments.
- Introduced a new file browsing endpoint in fs.ts to improve directory navigation while maintaining security constraints.
This commit is contained in:
Alec Koifman
2025-12-12 17:34:16 -05:00
parent a4c5567768
commit 28bbc3e0e1
10 changed files with 161 additions and 947 deletions

View File

@@ -1,202 +0,0 @@
<project_specification>
<project_name>Automaker - Autonomous AI Development Studio</project_name>
<overview>
Automaker is a sophisticated desktop application that empowers developers to build software autonomously through AI-powered agents. Built with Electron and Next.js, it provides an intelligent GUI for project management, feature tracking via Kanban boards, and autonomous code generation. The application leverages multiple AI models (Claude, GPT) and supports complex workflows including git worktree isolation, testing automation, and multi-model agent execution. It acts as a complete development orchestrator, managing the entire lifecycle from specification to verified implementation.
</overview>
<technology_stack>
<frontend>
<framework>Next.js 16.0.7 (App Router)</framework>
<ui_library>shadcn/ui with Radix UI primitives</ui_library>
<styling>Tailwind CSS 4.0</styling>
<state_management>Zustand with persistence</state_management>
<drag_drop>@dnd-kit for Kanban board</drag_drop>
<icons>Lucide React</icons>
<query_client>TanStack Query for server state</query_client>
</frontend>
<desktop_shell>
<framework>Electron 39.2.6</framework>
<language>TypeScript 5.x</language>
<inter_process_communication>Electron IPC with security sandboxing</inter_process_communication>
<file_system>Node.js fs/promises with path validation</file_system>
</desktop_shell>
<ai_engine>
<primary_model>Claude 3.5 (Opus, Sonnet, Haiku) via Anthropic Claude Agent SDK</primary_model>
<secondary_model>GPT-5.1 Codex family via OpenAI CLI</secondary_model>
<orchestration>Custom Agent Service with streaming responses</orchestration>
<model_registry>Dynamic model provider system with CLI detection</model_registry>
</ai_engine>
<testing>
<framework>Playwright for E2E testing</framework>
<unit>Jest/Vitest compatible</unit>
<integration>Agent-driven test execution and verification</integration>
</testing>
<version_control>
<system>Git with worktree isolation support</system>
<branching>Feature branch management</branching>
<workflow>Automated commit and merge capabilities</workflow>
</version_control>
</technology_stack>
<core_capabilities>
<project_management>
- Open and manage multiple local projects
- Project-specific themes and configurations
- Session management with project context
- Recently used project cycling (Q/E shortcuts)
- Project search and type-ahead selection
- Trash and restore functionality for projects
</project_management>
<intelligent_analysis>
- Auto-generation and updating of app_spec.txt
- Feature extraction from existing codebases
- Technology stack detection and documentation
- Project structure analysis with file tree visualization - "Project Ingestion": Analyzes existing codebases to understand structure
- Auto-generation of `.automaker/app_spec.txt` based on codebase analysis
- Auto-generation of features in `.automaker/features/{id}/feature.json`:
- Scans code for implemented features
- Creates test cases for existing features
- Marks existing features as "passes": true automatically
</intelligent_analysis>
<kanban_workflow>
- Visual representation of features from `.automaker/features/` folder
- Drag-and-drop interface to reprioritize tasks
- direct editing of feature details (steps, description) from the card
- Visual Kanban board with drag-and-drop functionality
- Multiple status columns: Backlog, In Progress, Waiting Approval, Verified
- Feature cards with detailed information display (3 detail levels)
- Real-time status updates during agent execution
- Search and filtering capabilities
- Category management and autocomplete
- Image attachment support for feature descriptions
</kanban_workflow>
<autonomous_agent_engine>
- Multi-model agent system with profile-based execution
- Streaming agent output with real-time logs
- Git worktree isolation for safe feature development
- Automatic testing and verification workflows
- Context-aware prompt generation
- Agent memory and learning capabilities
- Concurrent feature processing with configurable limits
- Follow-up and resume capabilities
</autonomous_agent_engine>
<advanced_workflows>
- Git worktree management for isolated development
- Feature-specific branching and merging
- Automated commit generation with file tracking
- Test-driven development support
- Code review and approval workflows
- Revert and rollback capabilities
</advanced_workflows>
<user_interface>
- Dark/Light theme support with 12 custom themes
- Per-project theme configurations
- Comprehensive keyboard shortcut system
- Sidebar navigation with project switching
- Multi-view architecture (Board, Spec, Agent, Context, Settings)
- Setup wizard for first-time configuration
- CLI integration status monitoring
</user_interface>
<extensibility>
- AI Profile system for model/thinking level presets
- Keyboard shortcut customization
- Model provider plugin architecture
- Context file management for agent guidance
- Feature suggestion generation
- Spec regeneration workflows
</extensibility>
</core_capabilities>
<ui_layout>
<window_structure>
- Sidebar: Project List, Settings, Logs, Plugins
- Main Content:
- **Spec View**: Split editor for `.automaker/app_spec.txt`
- **Board View**: Kanban board for `.automaker/features/` folder
- **Code View**: Read-only Monaco editor to see what the agent is writing
- **Agent View**: Chat-like interface showing agent thought process and tool usage. Also used for the "New Project Interview".
</window_structure>
<theme>
- Dark/Light mode support (system sync)
- "Hacker" aesthetic option (terminal-like)
- Professional/Clean default
</theme>
</ui_layout>
<development_workflow>
<local_testing>
- "Browser Mode": Run the Next.js frontend in a standard browser with mocked Electron IPC for rapid UI iteration.
- "Electron Mode": Full desktop app testing.
- Hot Reloading for both Main and Renderer processes.
</local_testing>
</development_workflow>
<implemented_features>
- Complete Kanban board with drag-and-drop functionality
- Multi-model AI agent execution (Claude + GPT/Codex)
- Git worktree isolation for features
- Real-time agent output streaming and logging
- Project management with session persistence
- Theme system with 12 themes + per-project themes
- Comprehensive settings panel with all configurations
- Feature image attachment and context system
- Agent profiles with model/thinking level presets
- Keyboard shortcut system with customization
- CLI integration detection (Claude Code + Codex CLI)
- Auto mode for autonomous feature processing
- Feature suggestions generation
- Spec regeneration and project analysis
- Context file management
- Chat history and session management
- File diff viewing and git integration
- Search and filtering across all features
- Category management and autocomplete
- Test automation and verification workflows
</implemented_features>
<implementation_roadmap>
<phase_1_foundation>
- Enhanced error handling and recovery mechanisms
- Performance optimization for large projects
- Improved memory management for long-running sessions
- Advanced logging and debugging capabilities
</phase_1_foundation>
<phase_2_core_logic>
- Plugin system for custom model providers
- Advanced workflow customization engine
- Team collaboration features
- Cloud synchronization capabilities
- Advanced project templates and scaffolding
</phase_2_core_logic>
<phase_3_kanban_and_interaction>
- Build Kanban board with drag-and-drop
- Connect Kanban state to `.automaker/features/` filesystem
- Implement "Run Feature" capability
- Integrate standard prompts library
</phase_3_kanban_and_interaction>
<phase_3_polish>
- Enhanced accessibility features
- Advanced theme customization
- Performance monitoring and analytics
- Documentation generation automation
- Integration with external development tools
- Advanced security auditing and sandboxing
</phase_3_polish>
<phase_4_polish>
- Advanced terminal integration
- Settings & Extensibility
- UI refinement
</phase_4_polish>
</implementation_roadmap>
</project_specification>

View File

@@ -1,9 +0,0 @@
[
"Agent Runner",
"Core",
"Kanban",
"Other",
"Settings",
"Uncategorized",
"ka"
]

View File

@@ -1,474 +0,0 @@
# Clean Code Guidelines
## Overview
This document serves as a comprehensive guide for writing clean, maintainable, and extensible code. It outlines principles and practices that ensure code quality, reusability, and long-term maintainability. When writing or reviewing code, follow these guidelines to create software that is easy to understand, modify, and extend. This file is used by LLMs to understand and enforce coding standards throughout the codebase.
---
## Core Principles
### 1. DRY (Don't Repeat Yourself)
**Principle**: Every piece of knowledge should have a single, unambiguous representation within a system.
**Practices**:
- Extract repeated logic into reusable functions, classes, or modules
- Use constants for repeated values
- Create shared utilities for common operations
- Avoid copy-pasting code blocks
- When you find yourself writing similar code more than twice, refactor it
**Example - Bad**:
```typescript
// Repeated validation logic
if (email.includes("@") && email.length > 5) {
// ...
}
if (email.includes("@") && email.length > 5) {
// ...
}
```
**Example - Good**:
```typescript
function isValidEmail(email: string): boolean {
return email.includes("@") && email.length > 5;
}
if (isValidEmail(email)) {
// ...
}
```
---
### 2. Code Reusability
**Principle**: Write code that can be used in multiple contexts without modification or with minimal adaptation.
**Practices**:
- Create generic, parameterized functions instead of specific ones
- Use composition over inheritance where appropriate
- Design functions to be pure (no side effects) when possible
- Create utility libraries for common operations
- Use dependency injection to make components reusable
- Design APIs that are flexible and configurable
**Example - Bad**:
```typescript
function calculateUserTotal(userId: string) {
const user = getUser(userId);
return user.items.reduce((sum, item) => sum + item.price, 0);
}
```
**Example - Good**:
```typescript
function calculateTotal<T extends { price: number }>(items: T[]): number {
return items.reduce((sum, item) => sum + item.price, 0);
}
function calculateUserTotal(userId: string) {
const user = getUser(userId);
return calculateTotal(user.items);
}
```
---
### 3. Abstract Functions and Abstractions
**Principle**: Create abstractions that hide implementation details and provide clear, simple interfaces.
**Practices**:
- Use interfaces and abstract classes to define contracts
- Create abstraction layers between different concerns
- Hide complex implementation behind simple function signatures
- Use dependency inversion - depend on abstractions, not concretions
- Create factory functions/classes for object creation
- Use strategy pattern for interchangeable algorithms
**Example - Bad**:
```typescript
function processPayment(amount: number, cardNumber: string, cvv: string) {
// Direct implementation tied to specific payment processor
fetch("https://stripe.com/api/charge", {
method: "POST",
body: JSON.stringify({ amount, cardNumber, cvv }),
});
}
```
**Example - Good**:
```typescript
interface PaymentProcessor {
processPayment(
amount: number,
details: PaymentDetails
): Promise<PaymentResult>;
}
class StripeProcessor implements PaymentProcessor {
async processPayment(
amount: number,
details: PaymentDetails
): Promise<PaymentResult> {
// Implementation
}
}
function processPayment(
processor: PaymentProcessor,
amount: number,
details: PaymentDetails
) {
return processor.processPayment(amount, details);
}
```
---
### 4. Extensibility
**Principle**: Design code that can be easily extended with new features without modifying existing code.
**Practices**:
- Follow the Open/Closed Principle: open for extension, closed for modification
- Use plugin architectures and hooks for extensibility
- Design with future requirements in mind (but don't over-engineer)
- Use configuration over hardcoding
- Create extension points through interfaces and callbacks
- Use composition and dependency injection
- Design APIs that can accommodate new parameters/options
**Example - Bad**:
```typescript
function sendNotification(user: User, type: string) {
if (type === "email") {
sendEmail(user.email);
} else if (type === "sms") {
sendSMS(user.phone);
}
// Adding new notification types requires modifying this function
}
```
**Example - Good**:
```typescript
interface NotificationChannel {
send(user: User): Promise<void>;
}
class EmailChannel implements NotificationChannel {
async send(user: User): Promise<void> {
// Implementation
}
}
class SMSChannel implements NotificationChannel {
async send(user: User): Promise<void> {
// Implementation
}
}
class NotificationService {
constructor(private channels: NotificationChannel[]) {}
async send(user: User): Promise<void> {
await Promise.all(this.channels.map((channel) => channel.send(user)));
}
}
// New notification types can be added without modifying existing code
```
---
### 5. Avoid Magic Numbers and Strings
**Principle**: Use named constants instead of hardcoded values to improve readability and maintainability.
**Practices**:
- Extract all magic numbers into named constants
- Use enums for related constants
- Create configuration objects for settings
- Use constants for API endpoints, timeouts, limits, etc.
- Document why specific values are used
**Example - Bad**:
```typescript
if (user.age >= 18) {
// What does 18 mean?
}
setTimeout(() => {
// What does 3000 mean?
}, 3000);
if (status === "active") {
// What are the valid statuses?
}
```
**Example - Good**:
```typescript
const MINIMUM_AGE_FOR_ADULTS = 18;
const SESSION_TIMEOUT_MS = 3000;
enum UserStatus {
ACTIVE = "active",
INACTIVE = "inactive",
SUSPENDED = "suspended",
}
if (user.age >= MINIMUM_AGE_FOR_ADULTS) {
// Clear intent
}
setTimeout(() => {
// Clear intent
}, SESSION_TIMEOUT_MS);
if (status === UserStatus.ACTIVE) {
// Type-safe and clear
}
```
---
## Additional Best Practices
### 6. Single Responsibility Principle
Each function, class, or module should have one reason to change.
**Example**:
```typescript
// Bad: Multiple responsibilities
class User {
save() {
/* database logic */
}
sendEmail() {
/* email logic */
}
validate() {
/* validation logic */
}
}
// Good: Single responsibility
class User {
validate() {
/* validation only */
}
}
class UserRepository {
save(user: User) {
/* database logic */
}
}
class EmailService {
sendToUser(user: User) {
/* email logic */
}
}
```
### 7. Meaningful Names
- Use descriptive names that reveal intent
- Avoid abbreviations unless they're widely understood
- Use verbs for functions, nouns for classes
- Be consistent with naming conventions
**Example**:
```typescript
// Bad
const d = new Date();
const u = getUser();
function calc(x, y) {}
// Good
const currentDate = new Date();
const currentUser = getUser();
function calculateTotal(price: number, quantity: number): number {}
```
### 8. Small Functions
- Functions should do one thing and do it well
- Keep functions short (ideally under 20 lines)
- Extract complex logic into separate functions
- Use descriptive function names instead of comments
### 9. Error Handling
- Handle errors explicitly
- Use appropriate error types
- Provide meaningful error messages
- Don't swallow errors silently
- Use try-catch appropriately
**Example**:
```typescript
// Bad
function divide(a: number, b: number) {
return a / b; // Can throw division by zero
}
// Good
function divide(a: number, b: number): number {
if (b === 0) {
throw new Error("Division by zero is not allowed");
}
return a / b;
}
```
### 10. Comments and Documentation
- Write self-documenting code (code should explain itself)
- Use comments to explain "why", not "what"
- Document complex algorithms or business logic
- Keep comments up-to-date with code changes
- Use JSDoc/TSDoc for public APIs
### 11. Type Safety
- Use TypeScript types/interfaces effectively
- Avoid `any` type unless absolutely necessary
- Use union types and discriminated unions
- Leverage type inference where appropriate
- Create custom types for domain concepts
**Example**:
```typescript
// Bad
function processUser(data: any) {
return data.name;
}
// Good
interface User {
id: string;
name: string;
email: string;
}
function processUser(user: User): string {
return user.name;
}
```
### 12. Testing Considerations
- Write testable code (pure functions, dependency injection)
- Keep functions small and focused
- Avoid hidden dependencies
- Use mocks and stubs appropriately
- Design for testability from the start
### 13. Performance vs. Readability
- Prefer readability over premature optimization
- Profile before optimizing
- Use clear algorithms first, optimize if needed
- Document performance-critical sections
- Balance between clean code and performance requirements
### 14. Code Organization
- Group related functionality together
- Use modules/packages to organize code
- Follow consistent file and folder structures
- Separate concerns (UI, business logic, data access)
- Use barrel exports (index files) appropriately
### 15. Configuration Management
- Externalize configuration values
- Use environment variables for environment-specific settings
- Create configuration objects/interfaces
- Validate configuration at startup
- Provide sensible defaults
**Example**:
```typescript
// Bad
const apiUrl = "https://api.example.com";
const timeout = 5000;
// Good
interface Config {
apiUrl: string;
timeout: number;
maxRetries: number;
}
const config: Config = {
apiUrl: process.env.API_URL || "https://api.example.com",
timeout: parseInt(process.env.TIMEOUT || "5000"),
maxRetries: parseInt(process.env.MAX_RETRIES || "3"),
};
```
---
## Code Review Checklist
When reviewing code, check for:
- [ ] No code duplication (DRY principle)
- [ ] Meaningful variable and function names
- [ ] No magic numbers or strings
- [ ] Functions are small and focused
- [ ] Proper error handling
- [ ] Type safety maintained
- [ ] Code is testable
- [ ] Documentation where needed
- [ ] Consistent code style
- [ ] Proper abstraction levels
- [ ] Extensibility considered
- [ ] Single responsibility principle followed
---
## Summary
Clean code is:
- **Readable**: Easy to understand at a glance
- **Maintainable**: Easy to modify and update
- **Testable**: Easy to write tests for
- **Extensible**: Easy to add new features
- **Reusable**: Can be used in multiple contexts
- **Well-documented**: Clear intent and purpose
- **Type-safe**: Leverages type system effectively
- **DRY**: No unnecessary repetition
- **Abstracted**: Proper separation of concerns
- **Configurable**: Uses constants and configuration over hardcoding
Remember: Code is read far more often than it is written. Write code for your future self and your teammates.

View File

@@ -1,70 +0,0 @@
You are a very strong reasoner and planner. Use these critical instructions to structure your plans, thoughts, and responses.
Before taking any action (either tool calls or responses to the user), you must proactively, methodically, and independently plan and reason about:
1. Logical dependencies and constraints:
Analyze the intended action against the following factors. Resolve conflicts in order of importance:
1.1) Policy-based rules, mandatory prerequisites, and constraints.
1.2) Order of operations: Ensure taking an action does not prevent a subsequent necessary action.
1.2.1) The user may request actions in a random order, but you may need to reorder operations to maximize successful completion of the task.
1.3) Other prerequisites (information and/or actions needed).
1.4) Explicit user constraints or preferences.
2. Risk assessment:
What are the consequences of taking the action? Will the new state cause any future issues?
2.1) For exploratory tasks (like searches), missing optional parameters is a LOW risk.
Prefer calling the tool with the available information over asking the user, unless your Rule 1 (Logical Dependencies) reasoning determines that optional information is required for a later step in your plan.
3. Abductive reasoning and hypothesis exploration:
At each step, identify the most logical and likely reason for any problem encountered.
3.1) Look beyond immediate or obvious causes. The most likely reason may not be the simplest and may require deeper inference.
3.2) Hypotheses may require additional research. Each hypothesis may take multiple steps to test.
3.3) Prioritize hypotheses based on likelihood, but do not discard less likely ones prematurely. A low-probability event may still be the root cause.
4. Outcome evaluation and adaptability:
Does the previous observation require any changes to your plan?
4.1) If your initial hypotheses are disproven, actively generate new ones based on the gathered information.
5. Information availability:
Incorporate all applicable and alternative sources of information, including:
5.1) Using available tools and their capabilities
5.2) All policies, rules, checklists, and constraints
5.3) Previous observations and conversation history
5.4) Information only available by asking the user
6. Precision and Grounding:
Ensure your reasoning is extremely precise and relevant to each exact ongoing situation.
6.1) Verify your claims by quoting the exact applicable information (including policies) when referring to them.
7. Completeness:
Ensure that all requirements, constraints, options, and preferences are exhaustively incorporated into your plan.
7.1) Resolve conflicts using the order of importance in #1.
7.2) Avoid premature conclusions: There may be multiple relevant options for a given situation.
7.2.1) To check for whether an option is relevant, reason about all information sources from #5.
7.2.2) You may need to consult the user to even know whether something is applicable. Do not assume it is not applicable without checking.
7.3) Review applicable sources of information from #5 to confirm which are relevant to the current state.
8. Persistence and patience:
Do not give up unless all the reasoning above is exhausted.
8.1) Don't be dissuaded by time taken or user frustration.
8.2) This persistence must be intelligent: On transient errors (e.g. please try again), you must retry unless an explicit retry limit (e.g., max x tries) has been reached. If such a limit is hit, you must stop. On other errors, you must change your strategy or arguments, not repeat the same failed call.
9. Inhibit your response:
Only take an action after all the above reasoning is completed. Once you've taken an action, you cannot take it back.

View File

@@ -1,172 +0,0 @@
# Agent Memory - Lessons Learned
This file documents issues encountered by previous agents and their solutions. Read this before starting work to avoid repeating mistakes.
## Testing Issues
### Issue: Mock project setup not navigating to board view
**Problem:** Setting `currentProject` in localStorage didn't automatically show the board view - app stayed on welcome view.
**Fix:** The `currentView` state is not persisted in localStorage. Instead of trying to set it, have tests click on the recent project from the welcome view to trigger `setCurrentProject()` which handles the view transition properly.
```typescript
// Don't do this:
await setupMockProject(page); // Sets localStorage
await page.goto("/");
await waitForElement(page, "board-view"); // ❌ Fails - still on welcome view
// Do this instead:
await setupMockProject(page);
await page.goto("/");
await waitForElement(page, "welcome-view");
const recentProject = page.locator(
'[data-testid="recent-project-test-project-1"]'
);
await recentProject.click(); // ✅ Triggers proper view transition
await waitForElement(page, "board-view");
```
### Issue: View output button test IDs are conditional
**Problem:** Tests failed looking for `view-output-inprogress-${featureId}` when the actual button had `view-output-${featureId}`.
**Fix:** The button test ID depends on whether the feature is actively running:
- `view-output-${featureId}` - shown when feature is in `runningAutoTasks` (actively running)
- `view-output-inprogress-${featureId}` - shown when status is "in_progress" but NOT actively running
After dragging a feature to in_progress, wait for the `auto_mode_feature_start` event to fire before looking for the button:
```typescript
// Wait for feature to start running
const viewOutputButton = page
.locator(
`[data-testid="view-output-${featureId}"], [data-testid="view-output-inprogress-${featureId}"]`
)
.first();
await expect(viewOutputButton).toBeVisible({ timeout: 8000 });
```
### Issue: Elements not appearing due to async event timing
**Problem:** Tests checked for UI elements before async events (like `auto_mode_feature_start`) had fired and updated the UI.
**Fix:** Add appropriate timeouts when waiting for elements that depend on async events. The mock auto mode takes ~2.4 seconds to complete, so allow sufficient time:
```typescript
// Mock auto mode timing: ~2.4s + 1.5s delay = ~4s total
await waitForAgentOutputModalHidden(page, { timeout: 10000 });
```
### Issue: Slider interaction testing
**Problem:** Clicking on slider track didn't reliably set specific values.
**Fix:** Use the slider's keyboard interaction or calculate the exact click position on the track. For max value, click on the rightmost edge of the track.
### Issue: Port binding blocked in sandbox mode
**Problem:** Playwright tests couldn't bind to port in sandbox mode.
**Fix:** Tests don't need sandbox disabled - the issue was TEST_REUSE_SERVER environment variable. Make sure to start the dev server separately or let Playwright's webServer config handle it.
## Code Architecture
### Issue: Understanding store state persistence
**Problem:** Not all store state is persisted to localStorage.
**Fix:** Check the `partialize` function in `app-store.ts` to see which state is persisted:
```typescript
partialize: (state) => ({
projects: state.projects,
currentProject: state.currentProject,
theme: state.theme,
sidebarOpen: state.sidebarOpen,
apiKeys: state.apiKeys,
chatSessions: state.chatSessions,
chatHistoryOpen: state.chatHistoryOpen,
maxConcurrency: state.maxConcurrency, // Added for concurrency feature
});
```
Note: `currentView` is NOT persisted - it's managed through actions.
### Issue: Auto mode task lifecycle
**Problem:** Confusion about when features are considered "running" vs "in_progress".
**Fix:** Understand the task lifecycle:
1. Feature dragged to "in_progress" column → status becomes "in_progress"
2. `auto_mode_feature_start` event fires → feature added to `runningAutoTasks`
3. Agent works on feature → periodic events sent
4. `auto_mode_feature_complete` event fires → feature removed from `runningAutoTasks`
5. If `passes: true` → status becomes "verified", if `passes: false` → stays "in_progress"
### Issue: waiting_approval features not draggable when skipTests=true
**Problem:** Features in `waiting_approval` status couldn't be dragged to `verified` column, even though the code appeared to handle it.
**Fix:** The order of condition checks in `handleDragEnd` matters. The `skipTests` check was catching `waiting_approval` features before the `waiting_approval` status check could handle them. Move the `waiting_approval` status check **before** the `skipTests` check in `board-view.tsx`:
```typescript
// Correct order in handleDragEnd:
if (draggedFeature.status === "backlog") {
// ...
} else if (draggedFeature.status === "waiting_approval") {
// Handle waiting_approval BEFORE skipTests check
// because waiting_approval features often have skipTests=true
} else if (draggedFeature.skipTests) {
// Handle other skipTests features
}
```
## Best Practices Discovered
### Testing utilities are critical
Create comprehensive testing utilities in `tests/utils.ts` to avoid repeating selector logic:
- `waitForElement` - waits for elements to appear
- `waitForElementHidden` - waits for elements to disappear
- `setupMockProject` - sets up mock localStorage state
- `navigateToBoard` - handles navigation from welcome to board view
### Always add data-testid attributes
When implementing features, immediately add `data-testid` attributes to key UI elements. This makes tests more reliable and easier to write.
### Test timeouts should be generous but not excessive
- Default timeout: 30s (set in playwright.config.ts)
- Element waits: 5-15s for critical elements
- Auto mode completion: 10s (accounts for ~4s mock duration)
- Don't increase timeouts past 10s for individual operations
### Mock auto mode timing
The mock auto mode in `electron.ts` has predictable timing:
- Total duration: ~2.4 seconds (300+500+300+300+500+500ms)
- Plus 1.5s delay before auto-closing modals
- Total: ~4 seconds from start to completion
### Issue: HotkeyButton conflicting with useKeyboardShortcuts
**Problem:** Adding `HotkeyButton` with a simple key (like "N") to buttons that already had keyboard shortcuts registered via `useKeyboardShortcuts` caused the hotkey to stop working. Both registered duplicate listeners, and the HotkeyButton's `stopPropagation()` call could interfere.
**Fix:** When a simple single-key hotkey is already handled by `useKeyboardShortcuts`, set `hotkeyActive={false}` on the `HotkeyButton` so it only displays the indicator badge without registering a duplicate listener:
```tsx
// In views that already use useKeyboardShortcuts for the "N" key:
<HotkeyButton
onClick={() => setShowAddDialog(true)}
hotkey={shortcuts.addFeature}
hotkeyActive={false} // <-- Important! Prevents duplicate listener
>
Add Feature
</HotkeyButton>
// HotkeyButton should only actively listen when it's the sole handler (e.g., Cmd+Enter in dialogs)
<HotkeyButton
onClick={handleSubmit}
hotkey={{ key: "Enter", cmdCtrl: true }}
hotkeyActive={isDialogOpen} // Active when dialog is open
>
Submit
</HotkeyButton>
```

3
.gitignore vendored
View File

@@ -9,3 +9,6 @@ dist/
.next/
node_modules
.automaker/images/
.automaker/
/.automaker/*
/.automaker/

View File

@@ -22,7 +22,7 @@ class McpServerFactory {
"Create or update a feature. Use this tool to create new features with detailed information or update existing feature status. When creating features, provide comprehensive description, category, and implementation steps.",
{
featureId: z.string().describe("The ID of the feature (lowercase, hyphens for spaces). Example: 'user-authentication', 'budget-tracking'"),
status: z.enum(["backlog", "todo", "in_progress", "verified"]).describe("The status for the feature. Use 'backlog' or 'todo' for new features."),
status: z.enum(["backlog", "todo", "in_progress", "verified"]).describe("The status for the feature. For NEW features, ONLY use 'backlog' or 'verified'. NEVER use 'in_progress' for new features - the user will manually start them."),
summary: z.string().optional().describe("A brief summary of what was implemented/changed or what the feature does."),
description: z.string().optional().describe("A detailed description of the feature. Be comprehensive - explain what the feature does, its purpose, and key functionality."),
category: z.string().optional().describe("The category/phase for this feature. Example: 'Phase 1: Foundation', 'Phase 2: Core Logic', 'Phase 3: Polish', 'Authentication', 'UI/UX'"),
@@ -38,14 +38,16 @@ class McpServerFactory {
const feature = features.find((f) => f.id === args.featureId);
if (!feature) {
console.log(`[Feature Creation] Feature ${args.featureId} not found - this might be a new feature being created`);
// This might be a new feature - try to proceed anyway
console.log(`[Feature Creation] Feature ${args.featureId} not found - this is a new feature being created`);
// This is a new feature - enforce backlog status for any non-verified features
}
// If agent tries to mark as verified but feature has skipTests=true, convert to waiting_approval
let finalStatus = args.status;
// Convert 'todo' to 'backlog' for consistency, but only for new features
if (!feature && finalStatus === "todo") {
// For NEW features: Convert 'todo' or 'in_progress' to 'backlog' for consistency
// New features should ALWAYS go to backlog first, user must manually start them
if (!feature && (finalStatus === "todo" || finalStatus === "in_progress")) {
console.log(`[Feature Creation] New feature ${args.featureId} - converting "${finalStatus}" to "backlog" (user must manually start features)`);
finalStatus = "backlog";
}
if (feature && args.status === "verified" && feature.skipTests === true) {

View File

@@ -390,15 +390,13 @@ class SpecRegenerationService {
3. For EACH feature in the implementation_roadmap:
- Determine if it's ALREADY IMPLEMENTED (fully or partially)
- If fully implemented: Create with status "verified" and note what's done
- If partially implemented: Create with status "in_progress" and note remaining work
- If not started: Create with status "backlog"
- If partially implemented OR not started: Create with status "backlog" and note what still needs to be done
**IMPORTANT - For each feature you MUST provide:**
- **featureId**: A descriptive ID (lowercase, hyphens for spaces). Example: "user-authentication", "budget-tracking"
- **status**:
- "verified" if feature is fully implemented in the codebase
- "in_progress" if partially implemented
- "backlog" if not yet started
- "verified" ONLY if feature is 100% fully implemented in the codebase
- "backlog" for ALL features that need ANY work (partial or not started) - the user will manually start these
- **description**: A DETAILED description (2-4 sentences) explaining what the feature does, its purpose, and key functionality
- **category**: The phase from the roadmap (e.g., "Phase 1: Foundation", "Phase 2: Core Logic", "Phase 3: Polish")
- **steps**: An array of 4-8 clear, actionable implementation steps. For verified features, these are what WAS done. For backlog, these are what NEEDS to be done.
@@ -407,10 +405,12 @@ class SpecRegenerationService {
**Example of analyzing existing code:**
If you find NextAuth.js configured in the codebase with working login pages, the user-authentication feature should be "verified" not "backlog".
**Example of a well-defined feature:**
**IMPORTANT: NEVER use "in_progress" status when creating features. Only use "verified" or "backlog".**
**Example of a well-defined feature (verified - fully complete):**
{
"featureId": "user-authentication",
"status": "verified", // Because we found it's already implemented
"status": "verified", // Because we found it's 100% already implemented
"description": "Secure user authentication system with email/password login and session management. Already implemented using NextAuth.js with email provider.",
"category": "Phase 1: Foundation",
"steps": [
@@ -422,6 +422,21 @@ If you find NextAuth.js configured in the codebase with working login pages, the
"summary": "Authentication implemented with NextAuth.js email provider"
}
**Example of a feature that needs work (backlog):**
{
"featureId": "user-profile",
"status": "backlog", // Needs work - user will manually start this
"description": "User profile page where users can view and edit their account settings, change password, and manage preferences.",
"category": "Phase 2: Core Features",
"steps": [
"Create profile page component",
"Add form for editing user details",
"Implement password change functionality",
"Add avatar upload feature"
],
"summary": "User profile management - needs implementation"
}
**Feature Storage:**
Features are stored in .automaker/features/{id}/feature.json - each feature has its own folder.
Use the UpdateFeatureStatus tool to create features with ALL the fields above.`,
@@ -453,13 +468,15 @@ Use the UpdateFeatureStatus tool to create features with ALL the fields above.`,
2. **Then, read .automaker/app_spec.txt** to see the implementation roadmap
3. **For EACH feature in the roadmap, determine its status:**
- Is it ALREADY IMPLEMENTED in the codebase? → status: "verified"
- Is it PARTIALLY IMPLEMENTED? → status: "in_progress"
- Is it NOT STARTED? → status: "backlog"
- Is it 100% FULLY IMPLEMENTED in the codebase? → status: "verified"
- Is it PARTIALLY IMPLEMENTED or NOT STARTED? → status: "backlog"
**CRITICAL: NEVER use "in_progress" status. Only use "verified" or "backlog".**
The user will manually move features from backlog to in_progress when they want to start working on them.
4. **Create each feature with UpdateFeatureStatus including ALL fields:**
- featureId: Descriptive ID (lowercase, hyphens)
- status: "verified", "in_progress", or "backlog" based on your analysis
- status: "verified" or "backlog" ONLY (never in_progress)
- description: 2-4 sentences explaining the feature
- category: The phase name from the roadmap
- steps: Array of 4-8 implementation steps

View File

@@ -5,6 +5,7 @@
import { Router, type Request, type Response } from "express";
import fs from "fs/promises";
import os from "os";
import path from "path";
import { validatePath, addAllowedPath, isPathAllowed } from "../lib/security.js";
import type { EventEmitter } from "../lib/events.js";
@@ -422,5 +423,123 @@ export function createFsRoutes(_events: EventEmitter): Router {
}
});
// Browse directories for file picker
// SECURITY: Restricted to home directory, allowed paths, and drive roots on Windows
router.post("/browse", async (req: Request, res: Response) => {
try {
const { dirPath } = req.body as { dirPath?: string };
const homeDir = os.homedir();
// Detect available drives on Windows
const detectDrives = async (): Promise<string[]> => {
if (os.platform() !== "win32") {
return [];
}
const drives: string[] = [];
const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
for (const letter of letters) {
const drivePath = `${letter}:\\`;
try {
await fs.access(drivePath);
drives.push(drivePath);
} catch {
// Drive doesn't exist, skip it
}
}
return drives;
};
// Check if a path is safe to browse
const isSafePath = (targetPath: string): boolean => {
const resolved = path.resolve(targetPath);
const normalizedHome = path.resolve(homeDir);
// Allow browsing within home directory
if (resolved === normalizedHome || resolved.startsWith(normalizedHome + path.sep)) {
return true;
}
// Allow browsing already-allowed paths
if (isPathAllowed(resolved)) {
return true;
}
// On Windows, allow drive roots for initial navigation
if (os.platform() === "win32") {
const driveRootMatch = /^[A-Z]:\\$/i.test(resolved);
if (driveRootMatch) {
return true;
}
}
// On Unix, allow root for initial navigation (but only list, not read files)
if (os.platform() !== "win32" && resolved === "/") {
return true;
}
return false;
};
// Default to home directory if no path provided
const targetPath = dirPath ? path.resolve(dirPath) : homeDir;
// Security check: validate the path is safe to browse
if (!isSafePath(targetPath)) {
res.status(403).json({
success: false,
error: "Access denied: browsing is restricted to your home directory and allowed project paths",
});
return;
}
try {
const stats = await fs.stat(targetPath);
if (!stats.isDirectory()) {
res.status(400).json({ success: false, error: "Path is not a directory" });
return;
}
// Read directory contents
const entries = await fs.readdir(targetPath, { withFileTypes: true });
// Filter for directories only and exclude hidden directories
const directories = entries
.filter((entry) => entry.isDirectory() && !entry.name.startsWith("."))
.map((entry) => ({
name: entry.name,
path: path.join(targetPath, entry.name),
}))
.sort((a, b) => a.name.localeCompare(b.name));
// Get parent directory (only if parent is also safe to browse)
const parentPath = path.dirname(targetPath);
const hasParent = parentPath !== targetPath && isSafePath(parentPath);
// Get available drives on Windows
const drives = await detectDrives();
res.json({
success: true,
currentPath: targetPath,
parentPath: hasParent ? parentPath : null,
directories,
drives,
});
} catch (error) {
res.status(400).json({
success: false,
error: error instanceof Error ? error.message : "Failed to read directory",
});
}
} catch (error) {
const message = error instanceof Error ? error.message : "Unknown error";
res.status(500).json({ success: false, error: message });
}
});
return router;
}

View File

@@ -255,7 +255,7 @@ Format your response as markdown. Be specific and actionable.`;
// Save spec
const specDir = path.join(projectPath, ".automaker");
const specPath = path.join(specDir, "project-spec.md");
const specPath = path.join(specDir, "app_spec.txt");
await fs.mkdir(specDir, { recursive: true });
await fs.writeFile(specPath, responseText);
@@ -278,7 +278,7 @@ async function generateFeaturesFromSpec(
abortController: AbortController
) {
// Read existing spec
const specPath = path.join(projectPath, ".automaker", "project-spec.md");
const specPath = path.join(projectPath, ".automaker", "app_spec.txt");
let spec: string;
try {
@@ -382,7 +382,7 @@ async function parseAndCreateFeatures(
id: feature.id,
title: feature.title,
description: feature.description,
status: "pending",
status: "backlog", // Features go to backlog - user must manually start them
priority: feature.priority || 2,
complexity: feature.complexity || "moderate",
dependencies: feature.dependencies || [],