diff --git a/.claude/commands/create-spec.md b/.claude/commands/create-spec.md index f8a1b96..9c23abe 100644 --- a/.claude/commands/create-spec.md +++ b/.claude/commands/create-spec.md @@ -8,7 +8,7 @@ This command **requires** the project directory as an argument via `$ARGUMENTS`. **Example:** `/create-spec generations/my-app` -**Output location:** `$ARGUMENTS/prompts/app_spec.txt` and `$ARGUMENTS/prompts/initializer_prompt.md` +**Output location:** `$ARGUMENTS/.autocoder/prompts/app_spec.txt` and `$ARGUMENTS/.autocoder/prompts/initializer_prompt.md` If `$ARGUMENTS` is empty, inform the user they must provide a project path and exit. @@ -347,13 +347,13 @@ First ask in conversation if they want to make changes. ## Output Directory -The output directory is: `$ARGUMENTS/prompts/` +The output directory is: `$ARGUMENTS/.autocoder/prompts/` Once the user approves, generate these files: ## 1. Generate `app_spec.txt` -**Output path:** `$ARGUMENTS/prompts/app_spec.txt` +**Output path:** `$ARGUMENTS/.autocoder/prompts/app_spec.txt` Create a new file using this XML structure: @@ -489,7 +489,7 @@ Create a new file using this XML structure: ## 2. Update `initializer_prompt.md` -**Output path:** `$ARGUMENTS/prompts/initializer_prompt.md` +**Output path:** `$ARGUMENTS/.autocoder/prompts/initializer_prompt.md` If the output directory has an existing `initializer_prompt.md`, read it and update the feature count. If not, copy from `.claude/templates/initializer_prompt.template.md` first, then update. @@ -512,7 +512,7 @@ After: **CRITICAL:** You must create exactly **25** features using the `feature ## 3. Write Status File (REQUIRED - Do This Last) -**Output path:** `$ARGUMENTS/prompts/.spec_status.json` +**Output path:** `$ARGUMENTS/.autocoder/prompts/.spec_status.json` **CRITICAL:** After you have completed ALL requested file changes, write this status file to signal completion to the UI. This is required for the "Continue to Project" button to appear. @@ -524,8 +524,8 @@ Write this JSON file: "version": 1, "timestamp": "[current ISO 8601 timestamp, e.g., 2025-01-15T14:30:00.000Z]", "files_written": [ - "prompts/app_spec.txt", - "prompts/initializer_prompt.md" + ".autocoder/prompts/app_spec.txt", + ".autocoder/prompts/initializer_prompt.md" ], "feature_count": [the feature count from Phase 4L] } @@ -539,9 +539,9 @@ Write this JSON file: "version": 1, "timestamp": "2025-01-15T14:30:00.000Z", "files_written": [ - "prompts/app_spec.txt", - "prompts/initializer_prompt.md", - "prompts/coding_prompt.md" + ".autocoder/prompts/app_spec.txt", + ".autocoder/prompts/initializer_prompt.md", + ".autocoder/prompts/coding_prompt.md" ], "feature_count": 35 } @@ -559,11 +559,11 @@ Write this JSON file: Once files are generated, tell the user what to do next: -> "Your specification files have been created in `$ARGUMENTS/prompts/`! +> "Your specification files have been created in `$ARGUMENTS/.autocoder/prompts/`! > > **Files created:** -> - `$ARGUMENTS/prompts/app_spec.txt` -> - `$ARGUMENTS/prompts/initializer_prompt.md` +> - `$ARGUMENTS/.autocoder/prompts/app_spec.txt` +> - `$ARGUMENTS/.autocoder/prompts/initializer_prompt.md` > > The **Continue to Project** button should now appear. Click it to start the autonomous coding agent! > diff --git a/.claude/commands/expand-project.md b/.claude/commands/expand-project.md index e8005b2..0ddf027 100644 --- a/.claude/commands/expand-project.md +++ b/.claude/commands/expand-project.md @@ -42,7 +42,7 @@ You are the **Project Expansion Assistant** - an expert at understanding existin # FIRST: Read and Understand Existing Project **Step 1:** Read the existing specification: -- Read `$ARGUMENTS/prompts/app_spec.txt` +- Read `$ARGUMENTS/.autocoder/prompts/app_spec.txt` **Step 2:** Present a summary to the user: @@ -231,4 +231,4 @@ If they want to add more, go back to Phase 1. # BEGIN -Start by reading the app specification file at `$ARGUMENTS/prompts/app_spec.txt`, then greet the user with a summary of their existing project and ask what they want to add. +Start by reading the app specification file at `$ARGUMENTS/.autocoder/prompts/app_spec.txt`, then greet the user with a summary of their existing project and ask what they want to add. diff --git a/.claude/commands/gsd-to-autocoder-spec.md b/.claude/commands/gsd-to-autocoder-spec.md index fc41cee..dbaeff6 100644 --- a/.claude/commands/gsd-to-autocoder-spec.md +++ b/.claude/commands/gsd-to-autocoder-spec.md @@ -5,6 +5,6 @@ description: Convert GSD codebase mapping to Autocoder app_spec.txt # GSD to Autocoder Spec -Convert `.planning/codebase/*.md` (from `/gsd:map-codebase`) to Autocoder's `prompts/app_spec.txt`. +Convert `.planning/codebase/*.md` (from `/gsd:map-codebase`) to Autocoder's `.autocoder/prompts/app_spec.txt`. @.claude/skills/gsd-to-autocoder-spec/SKILL.md diff --git a/.claude/commands/review-pr.md b/.claude/commands/review-pr.md index 9c9098f..027c807 100644 --- a/.claude/commands/review-pr.md +++ b/.claude/commands/review-pr.md @@ -40,15 +40,36 @@ Pull request(s): $ARGUMENTS - For Medium PRs: spawn 1-2 agents focusing on the most impacted areas - For Complex PRs: spawn up to 3 agents to cover security, performance, and architectural concerns -4. **Vision Alignment Check** +4. **PR Scope & Title Alignment Check** + - Compare the PR title and description against the actual diff content + - Check whether the PR is focused on a single coherent change or contains multiple unrelated changes + - If the title/description describe one thing but the PR contains significantly more (e.g., title says "fix typo in README" but the diff touches 20 files across multiple domains), flag this as a **scope mismatch** + - A scope mismatch is a **merge blocker** — recommend the author split the PR into smaller, focused PRs + - Suggest specific ways to split the PR (e.g., "separate the refactor from the feature addition") + - Reviewing large, unfocused PRs is impractical and error-prone; the review cannot provide adequate assurance for such changes + +5. **Vision Alignment Check** - Read the project's README.md and CLAUDE.md to understand the application's core purpose - Assess whether this PR aligns with the application's intended functionality - If the changes deviate significantly from the core vision or add functionality that doesn't serve the application's purpose, note this in the review - This is not a blocker, but should be flagged for the reviewer's consideration -5. **Safety Assessment** +6. **Safety Assessment** - Provide a review on whether the PR is safe to merge as-is - Provide any feedback in terms of risk level -6. **Improvements** - - Propose any improvements in terms of importance and complexity \ No newline at end of file +7. **Improvements** + - Propose any improvements in terms of importance and complexity + +8. **Merge Recommendation** + - Based on all findings, provide a clear merge/don't-merge recommendation + - If all concerns are minor (cosmetic issues, naming suggestions, small style nits, missing comments, etc.), recommend **merging the PR** and note that the reviewer can address these minor concerns themselves with a quick follow-up commit pushed directly to master + - If there are significant concerns (bugs, security issues, architectural problems, scope mismatch), recommend **not merging** and explain what needs to be resolved first + +9. **TLDR** + - End the review with a `## TLDR` section + - In 3-5 bullet points maximum, summarize: + - What this PR is actually about (one sentence) + - The key concerns, if any (or "no significant concerns") + - **Verdict: MERGE** / **MERGE (with minor follow-up)** / **DON'T MERGE** with a one-line reason + - This section should be scannable in under 10 seconds \ No newline at end of file diff --git a/.claude/skills/gsd-to-autocoder-spec/SKILL.md b/.claude/skills/gsd-to-autocoder-spec/SKILL.md index d4fba24..167caf0 100644 --- a/.claude/skills/gsd-to-autocoder-spec/SKILL.md +++ b/.claude/skills/gsd-to-autocoder-spec/SKILL.md @@ -9,7 +9,7 @@ description: | # GSD to Autocoder Spec Converter -Converts `.planning/codebase/*.md` (GSD mapping output) to `prompts/app_spec.txt` (Autocoder format). +Converts `.planning/codebase/*.md` (GSD mapping output) to `.autocoder/prompts/app_spec.txt` (Autocoder format). ## When to Use @@ -84,7 +84,7 @@ Extract: Create `prompts/` directory: ```bash -mkdir -p prompts +mkdir -p .autocoder/prompts ``` **Mapping GSD Documents to Autocoder Spec:** @@ -114,7 +114,7 @@ mkdir -p prompts **Write the spec file** using the XML format from [references/app-spec-format.md](references/app-spec-format.md): ```bash -cat > prompts/app_spec.txt << 'EOF' +cat > .autocoder/prompts/app_spec.txt << 'EOF' {from package.json or directory} @@ -173,9 +173,9 @@ EOF ### Step 5: Verify Generated Spec ```bash -head -100 prompts/app_spec.txt +head -100 .autocoder/prompts/app_spec.txt echo "---" -grep -c "User can\|System\|API\|Feature" prompts/app_spec.txt || echo "0" +grep -c "User can\|System\|API\|Feature" .autocoder/prompts/app_spec.txt || echo "0" ``` **Validation checklist:** @@ -194,7 +194,7 @@ Output: app_spec.txt generated from GSD codebase mapping. Source: .planning/codebase/*.md -Output: prompts/app_spec.txt +Output: .autocoder/prompts/app_spec.txt Next: Start Autocoder diff --git a/.claude/templates/coding_prompt.template.md b/.claude/templates/coding_prompt.template.md index 9322404..c8d3ba6 100644 --- a/.claude/templates/coding_prompt.template.md +++ b/.claude/templates/coding_prompt.template.md @@ -49,51 +49,21 @@ Otherwise, start servers manually and document the process. #### TEST-DRIVEN DEVELOPMENT MINDSET (CRITICAL) -Features are **test cases** that drive development. This is test-driven development: +Features are **test cases** that drive development. If functionality doesn't exist, **BUILD IT** -- you are responsible for implementing ALL required functionality. Missing pages, endpoints, database tables, or components are NOT blockers; they are your job to create. -- **If you can't test a feature because functionality doesn't exist → BUILD IT** -- You are responsible for implementing ALL required functionality -- Never assume another process will build it later -- "Missing functionality" is NOT a blocker - it's your job to create it - -**Example:** Feature says "User can filter flashcards by difficulty level" -- WRONG: "Flashcard page doesn't exist yet" → skip feature -- RIGHT: "Flashcard page doesn't exist yet" → build flashcard page → implement filter → test feature - -**Note:** Your feature has been pre-assigned by the orchestrator. Use `feature_get_by_id` with your assigned feature ID to get the details. - -Once you've retrieved the feature, **mark it as in-progress** (if not already): +**Note:** Your feature has been pre-assigned by the orchestrator. Use `feature_get_by_id` with your assigned feature ID to get the details. Then mark it as in-progress: ``` -# Mark feature as in-progress Use the feature_mark_in_progress tool with feature_id={your_assigned_id} ``` If you get "already in-progress" error, that's OK - continue with implementation. -Focus on completing one feature perfectly and completing its testing steps in this session before moving on to other features. -It's ok if you only complete one feature in this session, as there will be more sessions later that continue to make progress. +Focus on completing one feature perfectly in this session. It's ok if you only complete one feature, as more sessions will follow. #### When to Skip a Feature (EXTREMELY RARE) -**Skipping should almost NEVER happen.** Only skip for truly external blockers you cannot control: - -- **External API not configured**: Third-party service credentials missing (e.g., Stripe keys, OAuth secrets) -- **External service unavailable**: Dependency on service that's down or inaccessible -- **Environment limitation**: Hardware or system requirement you cannot fulfill - -**NEVER skip because:** - -| Situation | Wrong Action | Correct Action | -|-----------|--------------|----------------| -| "Page doesn't exist" | Skip | Create the page | -| "API endpoint missing" | Skip | Implement the endpoint | -| "Database table not ready" | Skip | Create the migration | -| "Component not built" | Skip | Build the component | -| "No data to test with" | Skip | Create test data or build data entry flow | -| "Feature X needs to be done first" | Skip | Build feature X as part of this feature | - -If a feature requires building other functionality first, **build that functionality**. You are the coding agent - your job is to make the feature work, not to defer it. +Only skip for truly external blockers: missing third-party credentials (Stripe keys, OAuth secrets), unavailable external services, or unfulfillable environment requirements. **NEVER** skip because a page, endpoint, component, or data doesn't exist yet -- build it. If a feature requires other functionality first, build that functionality as part of this feature. If you must skip (truly external blocker only): @@ -139,130 +109,22 @@ Use browser automation tools: ### STEP 5.5: MANDATORY VERIFICATION CHECKLIST (BEFORE MARKING ANY TEST PASSING) -**You MUST complete ALL of these checks before marking any feature as "passes": true** +**Complete ALL applicable checks before marking any feature as passing:** -#### Security Verification (for protected features) - -- [ ] Feature respects user role permissions -- [ ] Unauthenticated access is blocked (redirects to login) -- [ ] API endpoint checks authorization (returns 401/403 appropriately) -- [ ] Cannot access other users' data by manipulating URLs - -#### Real Data Verification (CRITICAL - NO MOCK DATA) - -- [ ] Created unique test data via UI (e.g., "TEST_12345_VERIFY_ME") -- [ ] Verified the EXACT data I created appears in UI -- [ ] Refreshed page - data persists (proves database storage) -- [ ] Deleted the test data - verified it's gone everywhere -- [ ] NO unexplained data appeared (would indicate mock data) -- [ ] Dashboard/counts reflect real numbers after my changes -- [ ] **Ran extended mock data grep (STEP 5.6) - no hits in src/ (excluding tests)** -- [ ] **Verified no globalThis, devStore, or dev-store patterns** -- [ ] **Server restart test passed (STEP 5.7) - data persists across restart** - -#### Navigation Verification - -- [ ] All buttons on this page link to existing routes -- [ ] No 404 errors when clicking any interactive element -- [ ] Back button returns to correct previous page -- [ ] Related links (edit, view, delete) have correct IDs in URLs - -#### Integration Verification - -- [ ] Console shows ZERO JavaScript errors -- [ ] Network tab shows successful API calls (no 500s) -- [ ] Data returned from API matches what UI displays -- [ ] Loading states appeared during API calls -- [ ] Error states handle failures gracefully +- **Security:** Feature respects role permissions; unauthenticated access blocked; API checks auth (401/403); no cross-user data leaks via URL manipulation +- **Real Data:** Create unique test data via UI, verify it appears, refresh to confirm persistence, delete and verify removal. No unexplained data (indicates mocks). Dashboard counts reflect real numbers +- **Mock Data Grep:** Run STEP 5.6 grep checks - no hits in src/ (excluding tests). No globalThis, devStore, or dev-store patterns +- **Server Restart:** For data features, run STEP 5.7 - data persists across server restart +- **Navigation:** All buttons link to existing routes, no 404s, back button works, edit/view/delete links have correct IDs +- **Integration:** Zero JS console errors, no 500s in network tab, API data matches UI, loading/error states work ### STEP 5.6: MOCK DATA DETECTION (Before marking passing) -**Run ALL these grep checks. Any hits in src/ (excluding test files) require investigation:** - -```bash -# Common exclusions for test files -EXCLUDE="--exclude=*.test.* --exclude=*.spec.* --exclude=*__test__* --exclude=*__mocks__*" - -# 1. In-memory storage patterns (CRITICAL - catches dev-store) -grep -r "globalThis\." --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ -grep -r "dev-store\|devStore\|DevStore\|mock-db\|mockDb" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ - -# 2. Mock data variables -grep -r "mockData\|fakeData\|sampleData\|dummyData\|testData" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ - -# 3. TODO/incomplete markers -grep -r "TODO.*real\|TODO.*database\|TODO.*API\|STUB\|MOCK" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ - -# 4. Development-only conditionals -grep -r "isDevelopment\|isDev\|process\.env\.NODE_ENV.*development" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ - -# 5. In-memory collections as data stores -grep -r "new Map\(\)\|new Set\(\)" --include="*.ts" --include="*.tsx" --include="*.js" $EXCLUDE src/ 2>/dev/null -``` - -**Rule:** If ANY grep returns results in production code → investigate → FIX before marking passing. - -**Runtime verification:** -1. Create unique data (e.g., "TEST_12345") → verify in UI → delete → verify gone -2. Check database directly - all displayed data must come from real DB queries -3. If unexplained data appears, it's mock data - fix before marking passing. +Before marking a feature passing, grep for mock/placeholder data patterns in src/ (excluding test files): `globalThis`, `devStore`, `dev-store`, `mockDb`, `mockData`, `fakeData`, `sampleData`, `dummyData`, `testData`, `TODO.*real`, `TODO.*database`, `STUB`, `MOCK`, `isDevelopment`, `isDev`. Any hits in production code must be investigated and fixed. Also create unique test data (e.g., "TEST_12345"), verify it appears in UI, then delete and confirm removal - unexplained data indicates mock implementations. ### STEP 5.7: SERVER RESTART PERSISTENCE TEST (MANDATORY for data features) -**When required:** Any feature involving CRUD operations or data persistence. - -**This test is NON-NEGOTIABLE. It catches in-memory storage implementations that pass all other tests.** - -**Steps:** - -1. Create unique test data via UI or API (e.g., item named "RESTART_TEST_12345") -2. Verify data appears in UI and API response - -3. **STOP the server completely:** - ```bash - # Kill by port (safer - only kills the dev server, not VS Code/Claude Code/etc.) - # Unix/macOS: - lsof -ti :${PORT:-3000} | xargs kill -TERM 2>/dev/null || true - sleep 3 - lsof -ti :${PORT:-3000} | xargs kill -9 2>/dev/null || true - sleep 2 - - # Windows alternative (use if lsof not available): - # netstat -ano | findstr :${PORT:-3000} | findstr LISTENING - # taskkill /F /PID 2>nul - - # Verify server is stopped - if lsof -ti :${PORT:-3000} > /dev/null 2>&1; then - echo "ERROR: Server still running on port ${PORT:-3000}!" - exit 1 - fi - ``` - -4. **RESTART the server:** - ```bash - ./init.sh & - sleep 15 # Allow server to fully start - # Verify server is responding - if ! curl -f http://localhost:${PORT:-3000}/api/health && ! curl -f http://localhost:${PORT:-3000}; then - echo "ERROR: Server failed to start after restart" - exit 1 - fi - ``` - -5. **Query for test data - it MUST still exist** - - Via UI: Navigate to data location, verify data appears - - Via API: `curl http://localhost:${PORT:-3000}/api/items` - verify data in response - -6. **If data is GONE:** Implementation uses in-memory storage → CRITICAL FAIL - - Run all grep commands from STEP 5.6 to identify the mock pattern - - You MUST fix the in-memory storage implementation before proceeding - - Replace in-memory storage with real database queries - -7. **Clean up test data** after successful verification - -**Why this test exists:** In-memory stores like `globalThis.devStore` pass all other tests because data persists during a single server run. Only a full server restart reveals this bug. Skipping this step WILL allow dev-store implementations to slip through. - -**YOLO Mode Note:** Even in YOLO mode, this verification is MANDATORY for data features. Use curl instead of browser automation. +For any feature involving CRUD or data persistence: create unique test data (e.g., "RESTART_TEST_12345"), verify it exists, then fully stop and restart the dev server. After restart, verify the test data still exists. If data is gone, the implementation uses in-memory storage -- run STEP 5.6 greps, find the mock pattern, and replace with real database queries. Clean up test data after verification. This test catches in-memory stores like `globalThis.devStore` that pass all other tests but lose data on restart. ### STEP 6: UPDATE FEATURE STATUS (CAREFULLY!) diff --git a/.claude/templates/testing_prompt.template.md b/.claude/templates/testing_prompt.template.md index a7e2bbe..c8011a3 100644 --- a/.claude/templates/testing_prompt.template.md +++ b/.claude/templates/testing_prompt.template.md @@ -1,58 +1,29 @@ ## YOUR ROLE - TESTING AGENT -You are a **testing agent** responsible for **regression testing** previously-passing features. +You are a **testing agent** responsible for **regression testing** previously-passing features. If you find a regression, you must fix it. -Your job is to ensure that features marked as "passing" still work correctly. If you find a regression (a feature that no longer works), you must fix it. +## ASSIGNED FEATURES FOR REGRESSION TESTING -### STEP 1: GET YOUR BEARINGS (MANDATORY) +You are assigned to test the following features: {{TESTING_FEATURE_IDS}} -Start by orienting yourself: +### Workflow for EACH feature: +1. Call `feature_get_by_id` with the feature ID +2. Read the feature's verification steps +3. Test the feature in the browser +4. Call `feature_mark_passing` or `feature_mark_failing` +5. Move to the next feature -```bash -# 1. See your working directory -pwd +--- -# 2. List files to understand project structure -ls -la +### STEP 1: GET YOUR ASSIGNED FEATURE(S) -# 3. Read progress notes from previous sessions (last 200 lines) -tail -200 claude-progress.txt - -# 4. Check recent git history -git log --oneline -10 -``` - -Then use MCP tools to check feature status: +Your features have been pre-assigned by the orchestrator. For each feature ID listed above, use `feature_get_by_id` to get the details: ``` -# 5. Get progress statistics -Use the feature_get_stats tool +Use the feature_get_by_id tool with feature_id= ``` -### STEP 2: START SERVERS (IF NOT RUNNING) - -If `init.sh` exists, run it: - -```bash -chmod +x init.sh -./init.sh -``` - -Otherwise, start servers manually. - -### STEP 3: GET YOUR ASSIGNED FEATURE - -Your feature has been pre-assigned by the orchestrator. Use `feature_get_by_id` to get the details: - -``` -Use the feature_get_by_id tool with feature_id={your_assigned_id} -``` - -The orchestrator has already claimed this feature for testing (set `testing_in_progress=true`). - -**CRITICAL:** You MUST call `feature_release_testing` when done, regardless of pass/fail. - -### STEP 4: VERIFY THE FEATURE +### STEP 2: VERIFY THE FEATURE **CRITICAL:** You MUST verify the feature through the actual UI using browser automation. @@ -81,21 +52,11 @@ Use browser automation tools: - browser_console_messages - Get browser console output (check for errors) - browser_network_requests - Monitor API calls -### STEP 5: HANDLE RESULTS +### STEP 3: HANDLE RESULTS #### If the feature PASSES: -The feature still works correctly. Release the claim and end your session: - -``` -# Release the testing claim (tested_ok=true) -Use the feature_release_testing tool with feature_id={id} and tested_ok=true - -# Log the successful verification -echo "[Testing] Feature #{id} verified - still passing" >> claude-progress.txt -``` - -**DO NOT** call feature_mark_passing again - it's already passing. +The feature still works correctly. **DO NOT** call feature_mark_passing again -- it's already passing. End your session. #### If the feature FAILS (regression found): @@ -125,13 +86,7 @@ A regression has been introduced. You MUST fix it: Use the feature_mark_passing tool with feature_id={id} ``` -6. **Release the testing claim:** - ``` - Use the feature_release_testing tool with feature_id={id} and tested_ok=false - ``` - Note: tested_ok=false because we found a regression (even though we fixed it). - -7. **Commit the fix:** +6. **Commit the fix:** ```bash git add . git commit -m "Fix regression in [feature name] @@ -141,14 +96,6 @@ A regression has been introduced. You MUST fix it: - Verified with browser automation" ``` -### STEP 6: UPDATE PROGRESS AND END - -Update `claude-progress.txt`: - -```bash -echo "[Testing] Session complete - verified/fixed feature #{id}" >> claude-progress.txt -``` - --- ## AVAILABLE MCP TOOLS @@ -156,12 +103,11 @@ echo "[Testing] Session complete - verified/fixed feature #{id}" >> claude-progr ### Feature Management - `feature_get_stats` - Get progress overview (passing/in_progress/total counts) - `feature_get_by_id` - Get your assigned feature details -- `feature_release_testing` - **REQUIRED** - Release claim after testing (pass tested_ok=true/false) - `feature_mark_failing` - Mark a feature as failing (when you find a regression) - `feature_mark_passing` - Mark a feature as passing (after fixing a regression) ### Browser Automation (Playwright) -All interaction tools have **built-in auto-wait** - no manual timeouts needed. +All interaction tools have **built-in auto-wait** -- no manual timeouts needed. - `browser_navigate` - Navigate to URL - `browser_take_screenshot` - Capture screenshot @@ -178,9 +124,7 @@ All interaction tools have **built-in auto-wait** - no manual timeouts needed. ## IMPORTANT REMINDERS -**Your Goal:** Verify that passing features still work, and fix any regressions found. - -**This Session's Goal:** Test ONE feature thoroughly. +**Your Goal:** Test each assigned feature thoroughly. Verify it still works, and fix any regression found. Process ALL features in your list before ending your session. **Quality Bar:** - Zero console errors @@ -188,21 +132,15 @@ All interaction tools have **built-in auto-wait** - no manual timeouts needed. - Visual appearance correct - API calls succeed -**CRITICAL - Always release your claim:** -- Call `feature_release_testing` when done, whether pass or fail -- Pass `tested_ok=true` if the feature passed -- Pass `tested_ok=false` if you found a regression - **If you find a regression:** 1. Mark the feature as failing immediately 2. Fix the issue 3. Verify the fix with browser automation 4. Mark as passing only after thorough verification -5. Release the testing claim with `tested_ok=false` -6. Commit the fix +5. Commit the fix -**You have one iteration.** Focus on testing ONE feature thoroughly. +**You have one iteration.** Test all assigned features before ending. --- -Begin by running Step 1 (Get Your Bearings). +Begin by running Step 1 for the first feature in your assigned list. diff --git a/.gitignore b/.gitignore index bb20118..2639f8d 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,8 @@ ui/playwright-report/ .dmypy.json dmypy.json +.ruff_cache/ + # =================== # Claude Code # =================== diff --git a/CLAUDE.md b/CLAUDE.md index d92db4e..ef1d7d0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -54,6 +54,12 @@ python autonomous_agent_demo.py --project-dir my-app --yolo # Parallel mode: run multiple agents concurrently (1-5 agents) python autonomous_agent_demo.py --project-dir my-app --parallel --max-concurrency 3 + +# Batch mode: implement multiple features per agent session (1-3) +python autonomous_agent_demo.py --project-dir my-app --batch-size 3 + +# Batch specific features by ID +python autonomous_agent_demo.py --project-dir my-app --batch-features 1,2,3 ``` ### YOLO Mode (Rapid Prototyping) @@ -68,7 +74,7 @@ python autonomous_agent_demo.py --project-dir my-app --yolo ``` **What's different in YOLO mode:** -- No regression testing (skips `feature_get_for_regression`) +- No regression testing - No Playwright MCP server (browser automation disabled) - Features marked passing after lint/type-check succeeds - Faster iteration for prototyping @@ -97,10 +103,13 @@ npm run lint # Run ESLint ### Python ```bash -ruff check . # Lint -mypy . # Type check -python test_security.py # Security unit tests (163 tests) -python test_security_integration.py # Integration tests (9 tests) +ruff check . # Lint +mypy . # Type check +python test_security.py # Security unit tests (12 tests) +python test_security_integration.py # Integration tests (9 tests) +python -m pytest test_client.py # Client tests (20 tests) +python -m pytest test_dependency_resolver.py # Dependency resolver tests (12 tests) +python -m pytest test_rate_limit_utils.py # Rate limit tests (22 tests) ``` ### React UI @@ -108,11 +117,17 @@ python test_security_integration.py # Integration tests (9 tests) ```bash cd ui npm run lint # ESLint -npm run build # Type check + build +npm run build # Type check + build (Vite 7) npm run test:e2e # Playwright end-to-end tests npm run test:e2e:ui # Playwright tests with UI ``` +### CI/CD + +GitHub Actions (`.github/workflows/ci.yml`) runs on push/PR to master: +- **Python job**: ruff lint + security tests +- **UI job**: ESLint + TypeScript build + ### Code Quality Configuration in `pyproject.toml`: @@ -124,15 +139,21 @@ Configuration in `pyproject.toml`: ### Core Python Modules - `start.py` - CLI launcher with project creation/selection menu -- `autonomous_agent_demo.py` - Entry point for running the agent +- `autonomous_agent_demo.py` - Entry point for running the agent (supports `--yolo`, `--parallel`, `--batch-size`, `--batch-features`) +- `autocoder_paths.py` - Central path resolution with dual-path backward compatibility and migration - `agent.py` - Agent session loop using Claude Agent SDK -- `client.py` - ClaudeSDKClient configuration with security hooks and MCP servers +- `client.py` - ClaudeSDKClient configuration with security hooks, MCP servers, and Vertex AI support - `security.py` - Bash command allowlist validation (ALLOWED_COMMANDS whitelist) -- `prompts.py` - Prompt template loading with project-specific fallback +- `prompts.py` - Prompt template loading with project-specific fallback and batch feature prompts - `progress.py` - Progress tracking, database queries, webhook notifications -- `registry.py` - Project registry for mapping names to paths (cross-platform) +- `registry.py` - Project registry for mapping names to paths (cross-platform), global settings model - `parallel_orchestrator.py` - Concurrent agent execution with dependency-aware scheduling +- `auth.py` - Authentication error detection for Claude CLI +- `env_constants.py` - Shared environment variable constants (API_ENV_VARS) used by client.py and chat sessions +- `rate_limit_utils.py` - Rate limit detection, retry parsing, exponential backoff with jitter +- `api/database.py` - SQLAlchemy models (Feature, Schedule, ScheduleOverride) - `api/dependency_resolver.py` - Cycle detection (Kahn's algorithm + DFS) and dependency validation +- `api/migration.py` - JSON-to-SQLite migration utility ### Project Registry @@ -146,13 +167,36 @@ The registry uses: ### Server API (server/) -The FastAPI server provides REST endpoints for the UI: +The FastAPI server provides REST and WebSocket endpoints for the UI: -- `server/routers/projects.py` - Project CRUD with registry integration -- `server/routers/features.py` - Feature management -- `server/routers/agent.py` - Agent control (start/stop/pause/resume) -- `server/routers/filesystem.py` - Filesystem browser API with security controls -- `server/routers/spec_creation.py` - WebSocket for interactive spec creation +**Routers** (`server/routers/`): +- `projects.py` - Project CRUD with registry integration +- `features.py` - Feature management +- `agent.py` - Agent control (start/stop/pause/resume) +- `filesystem.py` - Filesystem browser API with security controls +- `spec_creation.py` - WebSocket for interactive spec creation +- `expand_project.py` - Interactive project expansion via natural language +- `assistant_chat.py` - Read-only project assistant chat (WebSocket/REST) +- `terminal.py` - Interactive terminal I/O with PTY support (WebSocket bidirectional) +- `devserver.py` - Dev server control (start/stop) and config +- `schedules.py` - CRUD for time-based agent scheduling +- `settings.py` - Global settings management (model selection, YOLO, batch size, headless browser) + +**Services** (`server/services/`): +- `process_manager.py` - Agent process lifecycle management +- `project_config.py` - Project type detection and dev command management +- `terminal_manager.py` - Terminal session management with PTY (`pywinpty` on Windows) +- `scheduler_service.py` - APScheduler-based automated agent scheduling +- `dev_server_manager.py` - Dev server lifecycle management +- `assistant_chat_session.py` / `assistant_database.py` - Assistant chat sessions with SQLite persistence +- `spec_chat_session.py` - Spec creation chat sessions +- `expand_chat_session.py` - Expand project chat sessions +- `chat_constants.py` - Shared constants for chat services + +**Utilities** (`server/utils/`): +- `process_utils.py` - Process management utilities +- `project_helpers.py` - Project path resolution helpers +- `validation.py` - Project name validation ### Feature Management @@ -163,18 +207,26 @@ Features are stored in SQLite (`features.db`) via SQLAlchemy. The agent interact MCP tools available to the agent: - `feature_get_stats` - Progress statistics -- `feature_get_next` - Get highest-priority pending feature (respects dependencies) -- `feature_claim_next` - Atomically claim next available feature (for parallel mode) -- `feature_get_for_regression` - Random passing features for regression testing +- `feature_get_by_id` - Get a single feature by ID +- `feature_get_summary` - Get summary of all features +- `feature_get_ready` - Get features ready to work on (dependencies met) +- `feature_get_blocked` - Get features blocked by unmet dependencies +- `feature_get_graph` - Get full dependency graph +- `feature_claim_and_get` - Atomically claim next available feature (for parallel mode) +- `feature_mark_in_progress` - Mark feature as in progress - `feature_mark_passing` - Mark feature complete +- `feature_mark_failing` - Mark feature as failing - `feature_skip` - Move feature to end of queue +- `feature_clear_in_progress` - Clear in-progress status - `feature_create_bulk` - Initialize all features (used by initializer) +- `feature_create` - Create a single feature - `feature_add_dependency` - Add dependency between features (with cycle detection) - `feature_remove_dependency` - Remove a dependency +- `feature_set_dependencies` - Set all dependencies for a feature at once ### React UI (ui/) -- Tech stack: React 19, TypeScript, TanStack Query, Tailwind CSS v4, Radix UI, dagre (graph layout) +- Tech stack: React 19, TypeScript, Vite 7, TanStack Query, Tailwind CSS v4, Radix UI, dagre (graph layout), xterm.js (terminal) - `src/App.tsx` - Main app with project selection, kanban board, agent controls - `src/hooks/useWebSocket.ts` - Real-time updates via WebSocket (progress, agent status, logs, agent updates) - `src/hooks/useProjects.ts` - React Query hooks for API calls @@ -186,6 +238,12 @@ Key components: - `DependencyGraph.tsx` - Interactive node graph visualization with dagre layout - `CelebrationOverlay.tsx` - Confetti animation on feature completion - `FolderBrowser.tsx` - Server-side filesystem browser for project folder selection +- `Terminal.tsx` / `TerminalTabs.tsx` - xterm.js-based multi-tab terminal +- `AssistantPanel.tsx` / `AssistantChat.tsx` - AI assistant for project Q&A +- `ExpandProjectModal.tsx` / `ExpandProjectChat.tsx` - Add features via natural language +- `DevServerControl.tsx` - Dev server start/stop control +- `ScheduleModal.tsx` - Schedule management UI +- `SettingsModal.tsx` - Global settings panel Keyboard shortcuts (press `?` for help): - `D` - Toggle debug panel @@ -197,12 +255,17 @@ Keyboard shortcuts (press `?` for help): ### Project Structure for Generated Apps Projects can be stored in any directory (registered in `~/.autocoder/registry.db`). Each project contains: -- `prompts/app_spec.txt` - Application specification (XML format) -- `prompts/initializer_prompt.md` - First session prompt -- `prompts/coding_prompt.md` - Continuation session prompt -- `features.db` - SQLite database with feature test cases -- `.agent.lock` - Lock file to prevent multiple agent instances +- `.autocoder/prompts/app_spec.txt` - Application specification (XML format) +- `.autocoder/prompts/initializer_prompt.md` - First session prompt +- `.autocoder/prompts/coding_prompt.md` - Continuation session prompt +- `.autocoder/features.db` - SQLite database with feature test cases +- `.autocoder/.agent.lock` - Lock file to prevent multiple agent instances - `.autocoder/allowed_commands.yaml` - Project-specific bash command allowlist (optional) +- `.autocoder/.gitignore` - Ignores runtime files +- `CLAUDE.md` - Stays at project root (SDK convention) +- `app_spec.txt` - Root copy for agent template compatibility + +Legacy projects with files at root level (e.g., `features.db`, `prompts/`) are auto-migrated to `.autocoder/` on next agent start. Dual-path resolution ensures old and new layouts work transparently. ### Security Model @@ -242,15 +305,6 @@ The following directories (relative to home) are always blocked: - `.docker`, `.config/gcloud` - Container/cloud configs - `.npmrc`, `.pypirc`, `.netrc` - Package manager credentials -**Example Output:** - -``` -Created security settings at /path/to/project/.claude_settings.json - - Sandbox enabled (OS-level bash isolation) - - Filesystem restricted to: /path/to/project - - Extra read paths (validated): /Users/me/docs, /opt/shared-libs -``` - #### Per-Project Allowed Commands The agent's bash command access is controlled through a hierarchical configuration system: @@ -312,13 +366,28 @@ blocked_commands: **Files:** - `security.py` - Command validation logic and hardcoded blocklist -- `test_security.py` - Unit tests for security system (136 tests) -- `test_security_integration.py` - Integration tests with real hooks (9 tests) -- `TEST_SECURITY.md` - Quick testing reference guide +- `test_security.py` - Unit tests for security system +- `test_security_integration.py` - Integration tests with real hooks - `examples/project_allowed_commands.yaml` - Project config example (all commented by default) - `examples/org_config.yaml` - Org config example (all commented by default) - `examples/README.md` - Comprehensive guide with use cases, testing, and troubleshooting -- `PHASE3_SPEC.md` - Specification for mid-session approval feature (future enhancement) + +### Vertex AI Configuration (Optional) + +Run coding agents via Google Cloud Vertex AI: + +1. Install and authenticate gcloud CLI: `gcloud auth application-default login` +2. Configure `.env`: + ``` + CLAUDE_CODE_USE_VERTEX=1 + CLOUD_ML_REGION=us-east5 + ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id + ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4-5@20251101 + ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4-5@20250929 + ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-3-5-haiku@20241022 + ``` + +**Note:** Use `@` instead of `-` in model names for Vertex AI. ### Ollama Local Models (Optional) @@ -355,8 +424,24 @@ Run coding agents using local models via Ollama v0.14.0+: ## Claude Code Integration -- `.claude/commands/create-spec.md` - `/create-spec` slash command for interactive spec creation -- `.claude/skills/frontend-design/SKILL.md` - Skill for distinctive UI design +**Slash commands** (`.claude/commands/`): +- `/create-spec` - Interactive spec creation for new projects +- `/expand-project` - Expand existing project with new features +- `/gsd-to-autocoder-spec` - Convert GSD codebase mapping to app_spec.txt +- `/check-code` - Run lint and type-check for code quality +- `/checkpoint` - Create comprehensive checkpoint commit +- `/review-pr` - Review pull requests + +**Custom agents** (`.claude/agents/`): +- `coder.md` - Elite software architect agent for code implementation (Opus) +- `code-review.md` - Code review agent for quality/security/performance analysis (Opus) +- `deep-dive.md` - Technical investigator for deep analysis and debugging (Opus) + +**Skills** (`.claude/skills/`): +- `frontend-design` - Distinctive, production-grade UI design +- `gsd-to-autocoder-spec` - Convert GSD codebase mapping to Autocoder app_spec format + +**Other:** - `.claude/templates/` - Prompt templates copied to new projects - `examples/` - Configuration examples and documentation for security settings @@ -364,12 +449,12 @@ Run coding agents using local models via Ollama v0.14.0+: ### Prompt Loading Fallback Chain -1. Project-specific: `{project_dir}/prompts/{name}.md` +1. Project-specific: `{project_dir}/.autocoder/prompts/{name}.md` (or legacy `{project_dir}/prompts/{name}.md`) 2. Base template: `.claude/templates/{name}.template.md` ### Agent Session Flow -1. Check if `features.db` has features (determines initializer vs coding agent) +1. Check if `.autocoder/features.db` has features (determines initializer vs coding agent) 2. Create ClaudeSDKClient with security settings 3. Send prompt and stream response 4. Auto-continue with 3-second delay between sessions @@ -387,7 +472,7 @@ The UI receives updates via WebSocket (`/ws/projects/{project_name}`): When running with `--parallel`, the orchestrator: 1. Spawns multiple Claude agents as subprocesses (up to `--max-concurrency`) -2. Each agent claims features atomically via `feature_claim_next` +2. Each agent claims features atomically via `feature_claim_and_get` 3. Features blocked by unmet dependencies are skipped 4. Browser contexts are isolated per agent using `--isolated` flag 5. AgentTracker parses output and emits `agent_update` messages for UI @@ -400,6 +485,16 @@ The orchestrator enforces strict bounds on concurrent processes: - Testing agents are capped at `max_concurrency` (same as coding agents) - Total process count never exceeds 11 Python processes (1 orchestrator + 5 coding + 5 testing) +### Multi-Feature Batching + +Agents can implement multiple features per session using `--batch-size` (1-3, default: 3): +- `--batch-size N` - Max features per coding agent batch +- `--testing-batch-size N` - Features per testing batch (1-5, default: 3) +- `--batch-features 1,2,3` - Specific feature IDs for batch implementation +- `--testing-batch-features 1,2,3` - Specific feature IDs for batch regression testing +- `prompts.py` provides `get_batch_feature_prompt()` for multi-feature prompt generation +- Configurable in UI via settings panel + ### Design System The UI uses a **neobrutalism** design with Tailwind CSS v4: diff --git a/CUSTOM_UPDATES.md b/CUSTOM_UPDATES.md deleted file mode 100644 index f211696..0000000 --- a/CUSTOM_UPDATES.md +++ /dev/null @@ -1,228 +0,0 @@ -# Custom Updates - AutoCoder - -This document tracks all customizations made to AutoCoder that deviate from the upstream repository. Reference this file before any updates to preserve these changes. - ---- - -## Table of Contents - -1. [UI Theme Customization](#1-ui-theme-customization) -2. [Playwright Browser Configuration](#2-playwright-browser-configuration) -3. [Update Checklist](#update-checklist) - ---- - -## 1. UI Theme Customization - -### Overview - -The UI has been customized from the default **neobrutalism** style to a clean **Twitter/Supabase-style** design. - -**Design Changes:** -- No shadows -- Thin borders (1px) -- Rounded corners (1.3rem base) -- Blue accent color (Twitter blue) -- Clean typography (Open Sans) - -### Modified Files - -#### `ui/src/styles/custom-theme.css` - -**Purpose:** Main theme override file that replaces neo design with clean Twitter style. - -**Key Changes:** -- All `--shadow-neo-*` variables set to `none` -- All status colors (`pending`, `progress`, `done`) use Twitter blue -- Rounded corners: `--radius-neo-lg: 1.3rem` -- Font: Open Sans -- Removed all transform effects on hover -- Dark mode with proper contrast - -**CSS Variables (Light Mode):** -```css ---color-neo-accent: oklch(0.6723 0.1606 244.9955); /* Twitter blue */ ---color-neo-pending: oklch(0.6723 0.1606 244.9955); ---color-neo-progress: oklch(0.6723 0.1606 244.9955); ---color-neo-done: oklch(0.6723 0.1606 244.9955); -``` - -**CSS Variables (Dark Mode):** -```css ---color-neo-bg: oklch(0.08 0 0); ---color-neo-card: oklch(0.16 0.005 250); ---color-neo-border: oklch(0.30 0 0); -``` - -**How to preserve:** This file should NOT be overwritten. It loads after `globals.css` and overrides it. - ---- - -#### `ui/src/components/KanbanColumn.tsx` - -**Purpose:** Modified to support themeable kanban columns without inline styles. - -**Changes:** - -1. **colorMap changed from inline colors to CSS classes:** -```tsx -// BEFORE (original): -const colorMap = { - pending: 'var(--color-neo-pending)', - progress: 'var(--color-neo-progress)', - done: 'var(--color-neo-done)', -} - -// AFTER (customized): -const colorMap = { - pending: 'kanban-header-pending', - progress: 'kanban-header-progress', - done: 'kanban-header-done', -} -``` - -2. **Column div uses CSS class instead of inline style:** -```tsx -// BEFORE: -
- -// AFTER: -
-``` - -3. **Header div simplified (removed duplicate color class):** -```tsx -// BEFORE: -
- -// AFTER: -
-``` - -4. **Title text color:** -```tsx -// BEFORE: -text-[var(--color-neo-text-on-bright)] - -// AFTER: -text-[var(--color-neo-text)] -``` - ---- - -## 2. Playwright Browser Configuration - -### Overview - -Changed default Playwright settings for better performance: -- **Default browser:** Firefox (lower CPU usage) -- **Default mode:** Headless (saves resources) - -### Modified Files - -#### `client.py` - -**Changes:** - -```python -# BEFORE: -DEFAULT_PLAYWRIGHT_HEADLESS = False - -# AFTER: -DEFAULT_PLAYWRIGHT_HEADLESS = True -DEFAULT_PLAYWRIGHT_BROWSER = "firefox" -``` - -**New function added:** -```python -def get_playwright_browser() -> str: - """ - Get the browser to use for Playwright. - Options: chrome, firefox, webkit, msedge - Firefox is recommended for lower CPU usage. - """ - return os.getenv("PLAYWRIGHT_BROWSER", DEFAULT_PLAYWRIGHT_BROWSER).lower() -``` - -**Playwright args updated:** -```python -playwright_args = [ - "@playwright/mcp@latest", - "--viewport-size", "1280x720", - "--browser", browser, # NEW: configurable browser -] -``` - ---- - -#### `.env.example` - -**Updated documentation:** -```bash -# PLAYWRIGHT_BROWSER: Which browser to use for testing -# - firefox: Lower CPU usage, recommended (default) -# - chrome: Google Chrome -# - webkit: Safari engine -# - msedge: Microsoft Edge -# PLAYWRIGHT_BROWSER=firefox - -# PLAYWRIGHT_HEADLESS: Run browser without visible window -# - true: Browser runs in background, saves CPU (default) -# - false: Browser opens a visible window (useful for debugging) -# PLAYWRIGHT_HEADLESS=true -``` - ---- - -## 3. Update Checklist - -When updating AutoCoder from upstream, verify these items: - -### UI Changes -- [ ] `ui/src/styles/custom-theme.css` is preserved -- [ ] `ui/src/components/KanbanColumn.tsx` changes are preserved -- [ ] Run `npm run build` in `ui/` directory -- [ ] Test both light and dark modes - -### Backend Changes -- [ ] `client.py` - Playwright browser/headless defaults preserved -- [ ] `.env.example` - Documentation updates preserved - -### General -- [ ] Verify Playwright uses Firefox by default -- [ ] Check that browser runs headless by default - ---- - -## Reverting to Defaults - -### UI Only -```bash -rm ui/src/styles/custom-theme.css -git checkout ui/src/components/KanbanColumn.tsx -cd ui && npm run build -``` - -### Backend Only -```bash -git checkout client.py .env.example -``` - ---- - -## Files Summary - -| File | Type | Change Description | -|------|------|-------------------| -| `ui/src/styles/custom-theme.css` | UI | Twitter-style theme | -| `ui/src/components/KanbanColumn.tsx` | UI | Themeable kanban columns | -| `ui/src/main.tsx` | UI | Imports custom theme | -| `client.py` | Backend | Firefox + headless defaults | -| `.env.example` | Config | Updated documentation | - ---- - -## Last Updated - -**Date:** January 2026 -**PR:** #93 - Twitter-style UI theme with custom theme override system diff --git a/PHASE3_SPEC.md b/PHASE3_SPEC.md deleted file mode 100644 index 7c0c64b..0000000 --- a/PHASE3_SPEC.md +++ /dev/null @@ -1,1591 +0,0 @@ -# Phase 3: Mid-Session Command Approval - Implementation Specification - -**Status:** Not yet implemented (Phases 1 & 2 complete) -**Estimated Effort:** 2-3 days for experienced developer -**Priority:** Medium (nice-to-have, not blocking) - ---- - -## Table of Contents - -- [Executive Summary](#executive-summary) -- [User Experience](#user-experience) -- [Technical Architecture](#technical-architecture) -- [Implementation Checklist](#implementation-checklist) -- [Detailed Implementation Guide](#detailed-implementation-guide) -- [Testing Strategy](#testing-strategy) -- [Security Considerations](#security-considerations) -- [Future Enhancements](#future-enhancements) - ---- - -## Executive Summary - -### What is Phase 3? - -Phase 3 adds **mid-session approval** for bash commands that aren't in the allowlist. Instead of immediately blocking unknown commands, the agent can request user approval in real-time. - -### Current State (Phases 1 & 2) - -The agent can only run commands that are: -1. In the hardcoded allowlist (npm, git, ls, etc.) -2. In project config (`.autocoder/allowed_commands.yaml`) -3. In org config (`~/.autocoder/config.yaml`) - -If the agent tries an unknown command → **immediately blocked**. - -### Phase 3 Vision - -If the agent tries an unknown command → **request approval**: -- **CLI mode**: Rich TUI overlay shows approval dialog -- **UI mode**: React banner/toast prompts user -- **User decides**: Session-only, Permanent (save to YAML), or Deny -- **Timeout**: Auto-deny after 5 minutes (configurable) - -### Benefits - -1. **Flexibility**: Don't need to pre-configure every possible command -2. **Discovery**: See what commands the agent actually needs -3. **Safety**: Still requires explicit approval (not automatic) -4. **Persistence**: Can save approved commands to config for future sessions - -### Non-Goals - -- **NOT** auto-approval (always requires user confirmation) -- **NOT** bypassing hardcoded blocklist (sudo, dd, etc. are NEVER allowed) -- **NOT** bypassing org-level blocklist (those remain final) - ---- - -## User Experience - -### CLI Mode Flow - -``` -Agent is working... -Agent tries: xcodebuild -project MyApp.xcodeproj - -┌─────────────────────────────────────────────────────────────┐ -│ ⚠️ COMMAND APPROVAL REQUIRED │ -├─────────────────────────────────────────────────────────────┤ -│ The agent is requesting permission to run: │ -│ │ -│ xcodebuild -project MyApp.xcodeproj │ -│ │ -│ This command is not in your allowed commands list. │ -│ │ -│ Options: │ -│ [S] Allow for this Session only │ -│ [P] Allow Permanently (save to config) │ -│ [D] Deny (default in 5 minutes) │ -│ │ -│ Your choice (S/P/D): │ -└─────────────────────────────────────────────────────────────┘ -``` - -**For dangerous commands** (aws, kubectl, sudo*): - -``` -╔═══════════════════════════════════════════════════════════════╗ -║ ⚠️ DANGER: PRIVILEGED COMMAND REQUESTED ║ -╠═══════════════════════════════════════════════════════════════╣ -║ The agent is requesting: aws s3 ls ║ -║ ║ -║ aws is a CLOUD CLI that can: ║ -║ • Access production infrastructure ║ -║ • Modify or delete cloud resources ║ -║ • Incur significant costs ║ -║ ║ -║ This action could have SERIOUS consequences. ║ -║ ║ -║ Type CONFIRM to allow, or press Enter to deny: ║ -╚═══════════════════════════════════════════════════════════════╝ -``` - -*Note: sudo would still be in hardcoded blocklist, but this shows the UX pattern - -### UI Mode Flow - -**React UI Banner** (top of screen): - -``` -┌─────────────────────────────────────────────────────────────┐ -│ ⚠️ Agent requesting permission: xcodebuild │ -│ │ -│ [Session Only] [Save to Config] [Deny] │ -│ │ -│ Auto-denies in: 4:32 │ -└─────────────────────────────────────────────────────────────┘ -``` - -**Multiple requests queued:** - -``` -┌─────────────────────────────────────────────────────────────┐ -│ ⚠️ 3 approval requests pending │ -│ │ -│ 1. xcodebuild -project MyApp.xcodeproj │ -│ [Session] [Save] [Deny] │ -│ │ -│ 2. swift package resolve │ -│ [Session] [Save] [Deny] │ -│ │ -│ 3. xcrun simctl list devices │ -│ [Session] [Save] [Deny] │ -└─────────────────────────────────────────────────────────────┘ -``` - -### Response Behavior - -| User Action | Agent Behavior | Config Updated | -|-------------|----------------|----------------| -| Session Only | Command allowed this session | No | -| Permanent | Command allowed forever | Yes - appended to YAML | -| Deny | Command blocked, agent sees error | No | -| Timeout (5 min) | Command blocked, agent sees timeout | No | - ---- - -## Technical Architecture - -### Data Flow - -``` -┌─────────────────────────────────────────────────────────────┐ -│ 1. Agent tries command: xcodebuild │ -└────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 2. bash_security_hook() checks allowlist │ -│ → Not found, not in blocklist │ -└────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 3. Hook returns: {"decision": "pending", │ -│ "request_id": "req_123", │ -│ "command": "xcodebuild"} │ -└────────────────────┬────────────────────────────────────────┘ - │ - ┌──────────┴──────────┐ - │ │ - ▼ ▼ -┌─────────────────────┐ ┌─────────────────────┐ -│ CLI Mode │ │ UI Mode │ -│ │ │ │ -│ approval_tui.py │ │ WebSocket message │ -│ shows Rich dialog │ │ → React banner │ -└──────────┬──────────┘ └──────────┬──────────┘ - │ │ - └────────┬───────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 4. User responds: "session" / "permanent" / "deny" │ -└────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 5. approval_manager.respond(request_id, decision) │ -│ → If permanent: persist_command() │ -│ → If session: add to in-memory set │ -└────────────────────┬────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────┐ -│ 6. Hook gets response, returns to agent: │ -│ → "allow" or "block" │ -└─────────────────────────────────────────────────────────────┘ -``` - -### State Management - -**ApprovalManager** (new class in `security.py`): - -```python -class ApprovalManager: - """ - Manages pending approval requests and responses. - Thread-safe for concurrent access. - """ - - def __init__(self): - self._pending: Dict[str, PendingRequest] = {} - self._session_allowed: Set[str] = set() - self._lock = threading.Lock() - - def request_approval( - self, - command: str, - is_dangerous: bool = False - ) -> str: - """ - Create a new approval request. - Returns request_id. - """ - ... - - def wait_for_response( - self, - request_id: str, - timeout_seconds: int = 300 - ) -> ApprovalDecision: - """ - Block until user responds or timeout. - Returns: "allow_session", "allow_permanent", "deny", "timeout" - """ - ... - - def respond( - self, - request_id: str, - decision: ApprovalDecision - ): - """ - Called by UI/CLI to respond to a request. - """ - ... -``` - -### File Locking for Persistence - -When user chooses "Permanent", append to YAML with exclusive file lock: - -```python -import fcntl # Unix -import msvcrt # Windows - -def persist_command(project_dir: Path, command: str, description: str = None): - """ - Atomically append command to project YAML. - Uses platform-specific file locking. - """ - config_path = project_dir / ".autocoder" / "allowed_commands.yaml" - - # Ensure file exists - if not config_path.exists(): - config_path.write_text("version: 1\ncommands: []\n") - - with open(config_path, "r+") as f: - # Acquire exclusive lock - if sys.platform == "win32": - msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1) - else: - fcntl.flock(f.fileno(), fcntl.LOCK_EX) - - try: - # Load current config - config = yaml.safe_load(f) or {"version": 1, "commands": []} - - # Add new command - new_entry = {"name": command} - if description: - new_entry["description"] = description - - config.setdefault("commands", []).append(new_entry) - - # Validate doesn't exceed 50 commands - if len(config["commands"]) > 50: - raise ValueError("Cannot add command: 50 command limit reached") - - # Write back - f.seek(0) - f.truncate() - yaml.dump(config, f, default_flow_style=False) - - finally: - # Release lock - if sys.platform == "win32": - msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1) - else: - fcntl.flock(f.fileno(), fcntl.LOCK_UN) -``` - ---- - -## Implementation Checklist - -### Core Security Module - -- [ ] Create `ApprovalManager` class in `security.py` - - [ ] Thread-safe pending request storage - - [ ] Session-only allowed commands set - - [ ] Timeout handling with threading.Timer - - [ ] Request/response API - -- [ ] Modify `bash_security_hook()` to support pending state - - [ ] Check if command needs approval - - [ ] Create approval request - - [ ] Wait for response (with timeout) - - [ ] Return appropriate decision - -- [ ] Implement `persist_command()` with file locking - - [ ] Platform-specific locking (fcntl/msvcrt) - - [ ] Atomic YAML append - - [ ] 50 command limit validation - - [ ] Auto-generate description if not provided - -- [ ] Add `is_dangerous_command()` helper - - [ ] Check against DANGEROUS_COMMANDS set - - [ ] Return emphatic warning text - -- [ ] Update DANGEROUS_COMMANDS set - - [ ] Move from hardcoded blocklist to dangerous list - - [ ] Commands: aws, gcloud, az, kubectl, docker-compose - - [ ] Keep sudo, dd, etc. in BLOCKED_COMMANDS (never allowed) - -### CLI Approval Interface - -- [ ] Create `approval_tui.py` module - - [ ] Use Rich library for TUI - - [ ] Overlay design (doesn't clear screen) - - [ ] Keyboard input handling (S/P/D keys) - - [ ] Timeout display (countdown timer) - - [ ] Different layouts for normal vs dangerous commands - -- [ ] Integrate with agent.py - - [ ] Detect if running in CLI mode (not UI) - - [ ] Pass approval callback to client - - [ ] Handle approval responses - -- [ ] Add `rich` to requirements.txt - - [ ] Version: `rich>=13.0.0` - -### React UI Components - -- [ ] Create `ApprovalBanner.tsx` component - - [ ] Banner at top of screen - - [ ] Queue multiple requests - - [ ] Session/Permanent/Deny buttons - - [ ] Countdown timer display - - [ ] Dangerous command warning variant - -- [ ] Update `useWebSocket.ts` hook - - [ ] Handle `approval_request` message type - - [ ] Send `approval_response` message - - [ ] Queue management for multiple requests - -- [ ] Update WebSocket message types in `types.ts` - ```typescript - type ApprovalRequest = { - request_id: string; - command: string; - is_dangerous: boolean; - timeout_seconds: number; - warning_text?: string; - }; - - type ApprovalResponse = { - request_id: string; - decision: "session" | "permanent" | "deny"; - }; - ``` - -### Backend WebSocket Integration - -- [ ] Update `server/routers/agent.py` - - [ ] Add `approval_request` message sender - - [ ] Add `approval_response` message handler - - [ ] Wire to ApprovalManager - -- [ ] Thread-safe WebSocket message queue - - [ ] Handle approval requests from agent thread - - [ ] Handle approval responses from WebSocket thread - -### MCP Tool for Agent Introspection - -- [ ] Add `list_allowed_commands` tool to feature MCP - - [ ] Returns current allowed commands - - [ ] Indicates which are from project/org/global - - [ ] Shows if approval is available - - [ ] Agent can proactively query before trying commands - -- [ ] Tool response format: - ```python - { - "commands": [ - {"name": "swift", "source": "project"}, - {"name": "npm", "source": "global"}, - {"name": "jq", "source": "org"} - ], - "blocked_count": 15, - "can_request_approval": True, - "approval_timeout_minutes": 5 - } - ``` - -### Configuration - -- [ ] Add approval settings to org config - - [ ] `approval_timeout_minutes` (default: 5) - - [ ] `approval_enabled` (default: true) - - [ ] `dangerous_command_requires_confirmation` (default: true) - -- [ ] Validate org config settings - - [ ] Timeout must be 1-30 minutes - - [ ] Boolean flags properly typed - -### Testing - -- [ ] Unit tests for ApprovalManager - - [ ] Request creation - - [ ] Response handling - - [ ] Timeout behavior - - [ ] Thread safety - -- [ ] Unit tests for file locking - - [ ] Concurrent append operations - - [ ] Platform-specific locking - - [ ] Error handling - -- [ ] Integration tests for approval flow - - [ ] CLI approval (mocked input) - - [ ] WebSocket approval (mocked messages) - - [ ] Session vs permanent vs deny - - [ ] Timeout scenarios - -- [ ] UI component tests - - [ ] ApprovalBanner rendering - - [ ] Queue management - - [ ] Button interactions - - [ ] Timer countdown - -### Documentation - -- [ ] Update `CLAUDE.md` - - [ ] Document approval flow - - [ ] Update security model section - - [ ] Add Phase 3 to architecture - -- [ ] Update `examples/README.md` - - [ ] Add mid-session approval examples - - [ ] Document timeout configuration - - [ ] Troubleshooting approval issues - -- [ ] Create user guide for approvals - - [ ] When/why to use session vs permanent - - [ ] How to handle dangerous commands - - [ ] Keyboard shortcuts for CLI - ---- - -## Detailed Implementation Guide - -### Step 1: Core ApprovalManager (2-3 hours) - -**File:** `security.py` - -```python -from dataclasses import dataclass -from enum import Enum -import threading -import time -from typing import Dict, Set, Optional -import uuid - -class ApprovalDecision(Enum): - ALLOW_SESSION = "session" - ALLOW_PERMANENT = "permanent" - DENY = "deny" - TIMEOUT = "timeout" - -@dataclass -class PendingRequest: - request_id: str - command: str - is_dangerous: bool - timestamp: float - response_event: threading.Event - decision: Optional[ApprovalDecision] = None - -class ApprovalManager: - """ - Singleton manager for approval requests. - Thread-safe for concurrent access from agent and UI. - """ - - _instance = None - _lock = threading.Lock() - - def __new__(cls): - if cls._instance is None: - with cls._lock: - if cls._instance is None: - cls._instance = super().__new__(cls) - cls._instance._initialized = False - return cls._instance - - def __init__(self): - if self._initialized: - return - - self._pending: Dict[str, PendingRequest] = {} - self._session_allowed: Set[str] = set() - self._state_lock = threading.Lock() - self._initialized = True - - def request_approval( - self, - command: str, - is_dangerous: bool = False, - timeout_seconds: int = 300 - ) -> str: - """ - Create a new approval request. - - Args: - command: The command needing approval - is_dangerous: True if command is in DANGEROUS_COMMANDS - timeout_seconds: How long to wait before auto-deny - - Returns: - request_id to use for waiting/responding - """ - request_id = f"req_{uuid.uuid4().hex[:8]}" - - with self._state_lock: - request = PendingRequest( - request_id=request_id, - command=command, - is_dangerous=is_dangerous, - timestamp=time.time(), - response_event=threading.Event() - ) - self._pending[request_id] = request - - # Start timeout timer - timer = threading.Timer( - timeout_seconds, - self._handle_timeout, - args=[request_id] - ) - timer.daemon = True - timer.start() - - # Emit notification (CLI or WebSocket) - self._emit_approval_request(request) - - return request_id - - def wait_for_response( - self, - request_id: str, - timeout_seconds: int = 300 - ) -> ApprovalDecision: - """ - Block until user responds or timeout. - - Returns: - ApprovalDecision (session/permanent/deny/timeout) - """ - with self._state_lock: - request = self._pending.get(request_id) - if not request: - return ApprovalDecision.DENY - - # Wait for response event - request.response_event.wait(timeout=timeout_seconds) - - with self._state_lock: - request = self._pending.get(request_id) - if not request or not request.decision: - return ApprovalDecision.TIMEOUT - - decision = request.decision - - # Handle permanent approval - if decision == ApprovalDecision.ALLOW_PERMANENT: - # This will be handled by caller (needs project_dir) - pass - elif decision == ApprovalDecision.ALLOW_SESSION: - self._session_allowed.add(request.command) - - # Clean up - del self._pending[request_id] - - return decision - - def respond( - self, - request_id: str, - decision: ApprovalDecision - ): - """ - Called by UI/CLI to respond to a request. - """ - with self._state_lock: - request = self._pending.get(request_id) - if not request: - return - - request.decision = decision - request.response_event.set() - - def is_session_allowed(self, command: str) -> bool: - """Check if command was approved for this session.""" - with self._state_lock: - return command in self._session_allowed - - def _handle_timeout(self, request_id: str): - """Called by timer thread when request times out.""" - self.respond(request_id, ApprovalDecision.TIMEOUT) - - def _emit_approval_request(self, request: PendingRequest): - """ - Emit approval request to CLI or WebSocket. - To be implemented based on execution mode. - """ - # This is called by approval_callback in client.py - pass - -# Global singleton instance -_approval_manager = ApprovalManager() - -def get_approval_manager() -> ApprovalManager: - """Get the global ApprovalManager singleton.""" - return _approval_manager -``` - -### Step 2: Modify bash_security_hook (1 hour) - -**File:** `security.py` - -```python -async def bash_security_hook(input_data, tool_use_id=None, context=None): - """ - Pre-tool-use hook that validates bash commands. - - Phase 3: Supports mid-session approval for unknown commands. - """ - if input_data.get("tool_name") != "Bash": - return {} - - command = input_data.get("tool_input", {}).get("command", "") - if not command: - return {} - - # Extract commands - commands = extract_commands(command) - if not commands: - return { - "decision": "block", - "reason": f"Could not parse command: {command}", - } - - # Get project directory and effective commands - project_dir = None - if context and isinstance(context, dict): - project_dir_str = context.get("project_dir") - if project_dir_str: - project_dir = Path(project_dir_str) - - allowed_commands, blocked_commands = get_effective_commands(project_dir) - segments = split_command_segments(command) - - # Check each command - for cmd in commands: - # Check blocklist (highest priority) - if cmd in blocked_commands: - return { - "decision": "block", - "reason": f"Command '{cmd}' is blocked and cannot be approved.", - } - - # Check if allowed (allowlist or session) - approval_mgr = get_approval_manager() - if is_command_allowed(cmd, allowed_commands) or approval_mgr.is_session_allowed(cmd): - # Additional validation for sensitive commands - if cmd in COMMANDS_NEEDING_EXTRA_VALIDATION: - cmd_segment = get_command_for_validation(cmd, segments) - # ... existing validation code ... - continue - - # PHASE 3: Request approval - is_dangerous = cmd in DANGEROUS_COMMANDS - request_id = approval_mgr.request_approval( - command=cmd, - is_dangerous=is_dangerous, - timeout_seconds=300 # TODO: Get from org config - ) - - decision = approval_mgr.wait_for_response(request_id) - - if decision == ApprovalDecision.DENY: - return { - "decision": "block", - "reason": f"Command '{cmd}' was denied.", - } - elif decision == ApprovalDecision.TIMEOUT: - return { - "decision": "block", - "reason": f"Command '{cmd}' was denied (approval timeout after 5 minutes).", - } - elif decision == ApprovalDecision.ALLOW_PERMANENT: - # Persist to YAML - if project_dir: - try: - persist_command( - project_dir, - cmd, - description=f"Added via mid-session approval" - ) - except Exception as e: - # If persist fails, still allow for session - print(f"Warning: Could not save to config: {e}") - # If ALLOW_SESSION, already added to session set by wait_for_response - - return {} # Allow -``` - -### Step 3: CLI Approval Interface (3-4 hours) - -**File:** `approval_tui.py` - -```python -""" -CLI approval interface using Rich library. -Displays an overlay when approval is needed. -""" - -from rich.console import Console -from rich.panel import Panel -from rich.prompt import Prompt -from rich.live import Live -from rich.text import Text -import sys -import threading -import time - -console = Console() - -def show_approval_dialog( - command: str, - is_dangerous: bool, - timeout_seconds: int, - on_response: callable -): - """ - Show approval dialog in CLI. - - Args: - command: The command requesting approval - is_dangerous: True if dangerous command - timeout_seconds: Timeout in seconds - on_response: Callback(decision: str) - "session"/"permanent"/"deny" - """ - - if is_dangerous: - _show_dangerous_dialog(command, timeout_seconds, on_response) - else: - _show_normal_dialog(command, timeout_seconds, on_response) - -def _show_normal_dialog(command: str, timeout_seconds: int, on_response: callable): - """Standard approval dialog.""" - - start_time = time.time() - - while True: - elapsed = time.time() - start_time - remaining = timeout_seconds - elapsed - - if remaining <= 0: - on_response("deny") - console.print("[red]⏱️ Request timed out - command denied[/red]") - return - - # Build dialog - content = f"""[bold yellow]⚠️ COMMAND APPROVAL REQUIRED[/bold yellow] - -The agent is requesting permission to run: - - [cyan]{command}[/cyan] - -This command is not in your allowed commands list. - -Options: - [green][S][/green] Allow for this [green]Session only[/green] - [blue][P][/blue] Allow [blue]Permanently[/blue] (save to config) - [red][D][/red] [red]Deny[/red] (default in {int(remaining)}s) - -Your choice (S/P/D): """ - - console.print(Panel(content, border_style="yellow", expand=False)) - - # Get input with timeout - choice = _get_input_with_timeout("", timeout=1.0) - - if choice: - choice = choice.upper() - if choice == "S": - on_response("session") - console.print("[green]✅ Allowed for this session[/green]") - return - elif choice == "P": - on_response("permanent") - console.print("[blue]✅ Saved to config permanently[/blue]") - return - elif choice == "D": - on_response("deny") - console.print("[red]❌ Command denied[/red]") - return - else: - console.print("[yellow]Invalid choice. Use S, P, or D.[/yellow]") - -def _show_dangerous_dialog(command: str, timeout_seconds: int, on_response: callable): - """Emphatic dialog for dangerous commands.""" - - # Determine warning text based on command - warnings = { - "aws": "AWS CLI can:\n • Access production infrastructure\n • Modify or delete cloud resources\n • Incur significant costs", - "gcloud": "Google Cloud CLI can:\n • Access production GCP resources\n • Modify or delete cloud infrastructure\n • Incur significant costs", - "kubectl": "Kubernetes CLI can:\n • Access production clusters\n • Deploy or delete workloads\n • Disrupt running services", - } - - cmd_name = command.split()[0] - warning = warnings.get(cmd_name, "This command can make significant system changes.") - - content = f"""[bold red on white] ⚠️ DANGER: PRIVILEGED COMMAND REQUESTED [/bold red on white] - -The agent is requesting: [red bold]{command}[/red bold] - -[yellow]{warning}[/yellow] - -[bold]This action could have SERIOUS consequences.[/bold] - -Type [bold]CONFIRM[/bold] to allow, or press Enter to deny:""" - - console.print(Panel(content, border_style="red", expand=False)) - - confirmation = Prompt.ask("", default="deny") - - if confirmation.upper() == "CONFIRM": - # Ask session vs permanent - choice = Prompt.ask( - "Allow for [S]ession or [P]ermanent?", - choices=["S", "P", "s", "p"], - default="S" - ) - if choice.upper() == "P": - on_response("permanent") - console.print("[blue]✅ Saved to config permanently[/blue]") - else: - on_response("session") - console.print("[green]✅ Allowed for this session[/green]") - else: - on_response("deny") - console.print("[red]❌ Command denied[/red]") - -def _get_input_with_timeout(prompt: str, timeout: float) -> str: - """ - Get input with timeout (non-blocking). - Returns empty string if timeout. - """ - import select - - sys.stdout.write(prompt) - sys.stdout.flush() - - # Check if input available (Unix only, Windows needs different approach) - if sys.platform != "win32": - ready, _, _ = select.select([sys.stdin], [], [], timeout) - if ready: - return sys.stdin.readline().strip() - else: - # Windows: use msvcrt.kbhit() and msvcrt.getch() - import msvcrt - start = time.time() - chars = [] - while time.time() - start < timeout: - if msvcrt.kbhit(): - char = msvcrt.getch() - if char == b'\r': # Enter - return ''.join(chars) - elif char == b'\x08': # Backspace - if chars: - chars.pop() - sys.stdout.write('\b \b') - else: - chars.append(char.decode('utf-8')) - sys.stdout.write(char.decode('utf-8')) - time.sleep(0.01) - - return "" -``` - -### Step 4: React UI Components (4-5 hours) - -**File:** `ui/src/components/ApprovalBanner.tsx` - -```tsx -import React, { useState, useEffect } from 'react'; -import { X, AlertTriangle, Clock } from 'lucide-react'; - -interface ApprovalRequest { - request_id: string; - command: string; - is_dangerous: boolean; - timeout_seconds: number; - warning_text?: string; - timestamp: number; -} - -interface ApprovalBannerProps { - requests: ApprovalRequest[]; - onRespond: (requestId: string, decision: 'session' | 'permanent' | 'deny') => void; -} - -export function ApprovalBanner({ requests, onRespond }: ApprovalBannerProps) { - const [remainingTimes, setRemainingTimes] = useState>({}); - - // Update countdown timers - useEffect(() => { - const interval = setInterval(() => { - const now = Date.now(); - const newTimes: Record = {}; - - requests.forEach(req => { - const elapsed = (now - req.timestamp) / 1000; - const remaining = Math.max(0, req.timeout_seconds - elapsed); - newTimes[req.request_id] = remaining; - - // Auto-deny on timeout - if (remaining === 0) { - onRespond(req.request_id, 'deny'); - } - }); - - setRemainingTimes(newTimes); - }, 100); - - return () => clearInterval(interval); - }, [requests, onRespond]); - - if (requests.length === 0) return null; - - const formatTime = (seconds: number): string => { - const mins = Math.floor(seconds / 60); - const secs = Math.floor(seconds % 60); - return `${mins}:${secs.toString().padStart(2, '0')}`; - }; - - return ( -
-
- {requests.length === 1 ? ( - - ) : ( - - )} -
-
- ); -} - -function SingleRequestView({ - request, - remaining, - onRespond, - formatTime, -}: { - request: ApprovalRequest; - remaining: number; - onRespond: (requestId: string, decision: 'session' | 'permanent' | 'deny') => void; - formatTime: (seconds: number) => string; -}) { - const isDangerous = request.is_dangerous; - - return ( -
- {isDangerous && ( -
- - DANGER: PRIVILEGED COMMAND -
- )} - -
-
-
- Agent requesting permission: - - {request.command} - -
- - {request.warning_text && ( -

- {request.warning_text} -

- )} -
- -
- - - - - - -
- - {formatTime(remaining)} -
-
-
-
- ); -} - -function MultipleRequestsView({ - requests, - remainingTimes, - onRespond, - formatTime, -}: { - requests: ApprovalRequest[]; - remainingTimes: Record; - onRespond: (requestId: string, decision: 'session' | 'permanent' | 'deny') => void; - formatTime: (seconds: number) => string; -}) { - return ( -
-
- ⚠️ {requests.length} approval requests pending -
- -
- {requests.map(req => ( -
- - {req.command} - - -
- - - - - - - - {formatTime(remainingTimes[req.request_id] || 0)} - -
-
- ))} -
-
- ); -} -``` - -**File:** `ui/src/hooks/useWebSocket.ts` (add approval handling) - -```typescript -// Add to message types -type ApprovalRequestMessage = { - type: 'approval_request'; - request_id: string; - command: string; - is_dangerous: boolean; - timeout_seconds: number; - warning_text?: string; -}; - -// Add to useWebSocket hook -const [approvalRequests, setApprovalRequests] = useState([]); - -// In message handler -if (data.type === 'approval_request') { - setApprovalRequests(prev => [ - ...prev, - { - ...data, - timestamp: Date.now(), - }, - ]); -} - -// Approval response function -const respondToApproval = useCallback( - (requestId: string, decision: 'session' | 'permanent' | 'deny') => { - if (ws.current?.readyState === WebSocket.OPEN) { - ws.current.send( - JSON.stringify({ - type: 'approval_response', - request_id: requestId, - decision, - }) - ); - } - - // Remove from queue - setApprovalRequests(prev => - prev.filter(req => req.request_id !== requestId) - ); - }, - [] -); - -return { - // ... existing returns - approvalRequests, - respondToApproval, -}; -``` - -### Step 5: Backend WebSocket (2-3 hours) - -**File:** `server/routers/agent.py` - -```python -# Add to WebSocket message handlers - -async def handle_approval_response(websocket: WebSocket, data: dict): - """ - Handle approval response from UI. - - Message format: - { - "type": "approval_response", - "request_id": "req_abc123", - "decision": "session" | "permanent" | "deny" - } - """ - request_id = data.get("request_id") - decision = data.get("decision") - - if not request_id or not decision: - return - - # Convert string to enum - decision_map = { - "session": ApprovalDecision.ALLOW_SESSION, - "permanent": ApprovalDecision.ALLOW_PERMANENT, - "deny": ApprovalDecision.DENY, - } - - approval_decision = decision_map.get(decision, ApprovalDecision.DENY) - - # Respond to approval manager - from security import get_approval_manager - approval_mgr = get_approval_manager() - approval_mgr.respond(request_id, approval_decision) - - -async def send_approval_request( - websocket: WebSocket, - request_id: str, - command: str, - is_dangerous: bool, - timeout_seconds: int, - warning_text: str = None -): - """ - Send approval request to UI via WebSocket. - """ - await websocket.send_json({ - "type": "approval_request", - "request_id": request_id, - "command": command, - "is_dangerous": is_dangerous, - "timeout_seconds": timeout_seconds, - "warning_text": warning_text, - }) -``` - ---- - -## Testing Strategy - -### Unit Tests - -**File:** `test_approval.py` - -```python -def test_approval_manager_request(): - """Test creating approval request.""" - mgr = ApprovalManager() - request_id = mgr.request_approval("swift", is_dangerous=False) - assert request_id.startswith("req_") - -def test_approval_manager_respond(): - """Test responding to approval.""" - mgr = ApprovalManager() - request_id = mgr.request_approval("swift", is_dangerous=False, timeout_seconds=1) - - # Respond in separate thread - import threading - def respond(): - time.sleep(0.1) - mgr.respond(request_id, ApprovalDecision.ALLOW_SESSION) - - t = threading.Thread(target=respond) - t.start() - - decision = mgr.wait_for_response(request_id, timeout_seconds=2) - assert decision == ApprovalDecision.ALLOW_SESSION - t.join() - -def test_approval_timeout(): - """Test approval timeout.""" - mgr = ApprovalManager() - request_id = mgr.request_approval("swift", is_dangerous=False, timeout_seconds=1) - - # Don't respond, let it timeout - decision = mgr.wait_for_response(request_id, timeout_seconds=2) - assert decision == ApprovalDecision.TIMEOUT - -def test_session_allowed(): - """Test session-allowed commands.""" - mgr = ApprovalManager() - assert not mgr.is_session_allowed("swift") - - # Approve for session - request_id = mgr.request_approval("swift", is_dangerous=False, timeout_seconds=1) - mgr.respond(request_id, ApprovalDecision.ALLOW_SESSION) - mgr.wait_for_response(request_id) - - assert mgr.is_session_allowed("swift") -``` - -### Integration Tests - -**File:** `test_security_integration.py` (add Phase 3 tests) - -```python -def test_approval_flow_session(): - """Test mid-session approval with session-only.""" - # Create project with no config - # Mock approval response: session - # Try command → should be allowed - # Try same command again → should still be allowed (session) - pass - -def test_approval_flow_permanent(): - """Test mid-session approval with permanent save.""" - # Create project with empty config - # Mock approval response: permanent - # Try command → should be allowed - # Check YAML file → command should be added - # Create new session → command should still be allowed - pass - -def test_approval_flow_deny(): - """Test mid-session approval denial.""" - # Create project - # Mock approval response: deny - # Try command → should be blocked - pass - -def test_approval_timeout(): - """Test approval timeout auto-deny.""" - # Create project - # Don't respond to approval - # Wait for timeout - # Command should be blocked with timeout message - pass - -def test_concurrent_approvals(): - """Test multiple simultaneous approval requests.""" - # Create project - # Try 3 commands at once - # All should queue - # Respond to each individually - # Verify all handled correctly - pass -``` - -### Manual Testing Checklist - -- [ ] CLI mode: Request approval for unknown command -- [ ] CLI mode: Press S → command works this session -- [ ] CLI mode: Press P → command saved to YAML -- [ ] CLI mode: Press D → command denied -- [ ] CLI mode: Wait 5 minutes → timeout, command denied -- [ ] CLI mode: Dangerous command shows emphatic warning -- [ ] UI mode: Banner appears at top -- [ ] UI mode: Click "Session Only" → command works -- [ ] UI mode: Click "Save to Config" → YAML updated -- [ ] UI mode: Click "Deny" → command blocked -- [ ] UI mode: Multiple requests → all shown in queue -- [ ] UI mode: Countdown timer updates -- [ ] Concurrent access: Multiple agents, file locking works -- [ ] Config validation: 50 command limit enforced -- [ ] Session persistence: Session commands available until restart -- [ ] Permanent persistence: Saved commands available after restart - ---- - -## Security Considerations - -### 1. Hardcoded Blocklist is Final - -**NEVER** allow approval for hardcoded blocklist commands: -- `sudo`, `su`, `doas` -- `dd`, `mkfs`, `fdisk` -- `shutdown`, `reboot`, `halt` -- etc. - -These bypass approval entirely - immediate block. - -### 2. Org Blocklist Cannot Be Overridden - -If org config blocks a command, approval is not even requested. - -### 3. Dangerous Commands Require Extra Confirmation - -Commands like `aws`, `kubectl` should: -- Show emphatic warning -- Require typing "CONFIRM" (not just button click) -- Explain potential consequences - -### 4. Timeout is Critical - -Default 5-minute timeout prevents: -- Stale approval requests -- Forgotten dialogs -- Unattended approval accumulation - -### 5. Session vs Permanent - -**Session-only:** -- ✅ Safe for experimentation -- ✅ Doesn't persist across restarts -- ✅ Good for one-off commands - -**Permanent:** -- ⚠️ Saved to YAML forever -- ⚠️ Available to all future sessions -- ⚠️ User should understand impact - -### 6. File Locking is Essential - -Multiple agents or concurrent modifications require: -- Exclusive file locks (fcntl/msvcrt) -- Atomic read-modify-write -- Proper error handling - -Without locking → race conditions → corrupted YAML - -### 7. Audit Trail - -Consider logging all approval decisions: -``` -[2026-01-22 10:30:45] User approved 'swift' (session-only) -[2026-01-22 10:32:12] User approved 'xcodebuild' (permanent) -[2026-01-22 10:35:00] Approval timeout for 'wget' (denied) -``` - ---- - -## Future Enhancements - -Beyond Phase 3 scope, but possible extensions: - -### 1. Approval Profiles - -Pre-defined approval sets: -```yaml -profiles: - ios-dev: - - swift* - - xcodebuild - - xcrun - - rust-dev: - - cargo - - rustc - - clippy -``` - -User can activate profile with one click. - -### 2. Smart Recommendations - -Agent AI suggests commands to add based on: -- Project type detection (iOS, Rust, Python) -- Frequently denied commands -- Similar projects - -### 3. Approval History - -Show past approvals in UI: -- What was approved -- When -- Session vs permanent -- By which agent - -### 4. Bulk Approve/Deny - -When agent requests multiple commands: -- "Approve all for session" -- "Save all to config" -- "Deny all" - -### 5. Temporary Time-Based Approval - -"Allow for next 1 hour" option: -- Not session-only (survives restarts) -- Not permanent (expires) -- Good for contractors/temporary access - -### 6. Command Arguments Validation - -Phase 1 has placeholder, could be fully implemented: -```yaml -- name: rm - description: Remove files - args_whitelist: - - "-rf ./build/*" - - "-rf ./dist/*" -``` - -### 7. Remote Approval - -For team environments: -- Agent requests approval -- Notification sent to team lead -- Lead approves/denies remotely -- Agent proceeds based on decision - ---- - -## Questions for Implementer - -Before starting Phase 3, consider: - -1. **CLI vs UI priority?** - - Implement CLI first (simpler)? - - Or UI first (more users)? - -2. **Approval persistence format?** - - Separate log file for audit trail? - - Just YAML modifications? - -3. **Dangerous commands list?** - - Current list correct? - - Need org-specific dangerous commands? - -4. **Timeout default?** - - 5 minutes reasonable? - - Different for dangerous commands? - -5. **UI placement?** - - Top banner (blocks view)? - - Modal dialog (more prominent)? - - Sidebar notification? - -6. **Multiple agents?** - - How to attribute approvals? - - Show which agent requested? - -7. **Undo permanent approvals?** - - UI for removing saved commands? - - Or manual YAML editing only? - ---- - -## Success Criteria - -Phase 3 is complete when: - -- ✅ Agent can request approval for unknown commands -- ✅ CLI shows Rich TUI dialog with countdown -- ✅ UI shows React banner with buttons -- ✅ Session-only approval works (in-memory) -- ✅ Permanent approval persists to YAML -- ✅ Dangerous commands show emphatic warnings -- ✅ Timeout auto-denies after configured time -- ✅ Multiple requests can queue -- ✅ File locking prevents corruption -- ✅ All tests pass (unit + integration) -- ✅ Documentation updated -- ✅ Backward compatible (Phase 1/2 still work) - ---- - -## Estimated Timeline - -| Task | Time | Dependencies | -|------|------|--------------| -| ApprovalManager core | 2-3 hours | None | -| Modify bash_security_hook | 1 hour | ApprovalManager | -| File locking + persist | 1-2 hours | None | -| CLI approval TUI | 3-4 hours | ApprovalManager | -| React components | 4-5 hours | None | -| WebSocket integration | 2-3 hours | React components | -| Unit tests | 3-4 hours | All core features | -| Integration tests | 2-3 hours | Full implementation | -| Documentation | 2-3 hours | None | -| Manual testing + polish | 4-6 hours | Full implementation | - -**Total: 24-36 hours (3-4.5 days)** - ---- - -## Getting Started - -To implement Phase 3: - -1. **Read this document fully** -2. **Review Phase 1 & 2 code** (`security.py`, `client.py`) -3. **Run existing tests** to understand current behavior -4. **Start with ApprovalManager** (core functionality) -5. **Add file locking** (critical for safety) -6. **Choose CLI or UI** (whichever you're more comfortable with) -7. **Write tests as you go** (don't leave for end) -8. **Manual test frequently** (approval UX needs polish) - -Good luck! 🚀 - ---- - -**Document Version:** 1.0 -**Last Updated:** 2026-01-22 -**Author:** Phase 1 & 2 implementation team -**Status:** Ready for implementation diff --git a/SAMPLE_PROMPT.md b/SAMPLE_PROMPT.md deleted file mode 100644 index 284a4bf..0000000 --- a/SAMPLE_PROMPT.md +++ /dev/null @@ -1,22 +0,0 @@ -Let's call it Simple Todo. This is a really simple web app that I can use to track my to-do items using a Kanban -board. I should be able to add to-dos and then drag and drop them through the Kanban board. The different columns in -the Kanban board are: - -- To Do -- In Progress -- Done - -The app should use a neobrutalism design. - -There is no need for user authentication either. All the to-dos will be stored in local storage, so each user has -access to all of their to-dos when they open their browser. So do not worry about implementing a backend with user -authentication or a database. Simply store everything in local storage. As for the design, please try to avoid AI -slop, so use your front-end design skills to design something beautiful and practical. As for the content of the -to-dos, we should store: - -- The name or the title at the very least -- Optionally, we can also set tags, due dates, and priorities which should be represented as beautiful little badges - on the to-do card Users should have the ability to easily clear out all the completed To-Dos. They should also be - able to filter and search for To-Dos as well. - -You choose the rest. Keep it simple. Should be 25 features. diff --git a/agent.py b/agent.py index 7d90473..a3daaf8 100644 --- a/agent.py +++ b/agent.py @@ -23,14 +23,27 @@ if sys.platform == "win32": sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding="utf-8", errors="replace", line_buffering=True) from client import create_client -from progress import count_passing_tests, has_features, print_progress_summary, print_session_header +from progress import ( + count_passing_tests, + has_features, + print_progress_summary, + print_session_header, +) from prompts import ( copy_spec_to_project, + get_batch_feature_prompt, get_coding_prompt, get_initializer_prompt, get_single_feature_prompt, get_testing_prompt, ) +from rate_limit_utils import ( + calculate_error_backoff, + calculate_rate_limit_backoff, + clamp_retry_delay, + is_rate_limit_error, + parse_retry_after, +) # Configuration AUTO_CONTINUE_DELAY_SECONDS = 3 @@ -106,8 +119,19 @@ async def run_agent_session( return "continue", response_text except Exception as e: - print(f"Error during agent session: {e}") - return "error", str(e) + error_str = str(e) + print(f"Error during agent session: {error_str}") + + # Detect rate limit errors from exception message + if is_rate_limit_error(error_str): + # Try to extract retry-after time from error + retry_seconds = parse_retry_after(error_str) + if retry_seconds is not None: + return "rate_limit", str(retry_seconds) + else: + return "rate_limit", "unknown" + + return "error", error_str async def run_autonomous_agent( @@ -116,8 +140,10 @@ async def run_autonomous_agent( max_iterations: Optional[int] = None, yolo_mode: bool = False, feature_id: Optional[int] = None, + feature_ids: Optional[list[int]] = None, agent_type: Optional[str] = None, testing_feature_id: Optional[int] = None, + testing_feature_ids: Optional[list[int]] = None, ) -> None: """ Run the autonomous agent loop. @@ -128,8 +154,10 @@ async def run_autonomous_agent( max_iterations: Maximum number of iterations (None for unlimited) yolo_mode: If True, skip browser testing in coding agent prompts feature_id: If set, work only on this specific feature (used by orchestrator for coding agents) + feature_ids: If set, work on these features in batch (used by orchestrator for batch mode) agent_type: Type of agent: "initializer", "coding", "testing", or None (auto-detect) - testing_feature_id: For testing agents, the pre-claimed feature ID to test + testing_feature_id: For testing agents, the pre-claimed feature ID to test (legacy single mode) + testing_feature_ids: For testing agents, list of feature IDs to batch test """ print("\n" + "=" * 70) print(" AUTONOMOUS CODING AGENT") @@ -140,7 +168,9 @@ async def run_autonomous_agent( print(f"Agent type: {agent_type}") if yolo_mode: print("Mode: YOLO (testing agents disabled)") - if feature_id: + if feature_ids and len(feature_ids) > 1: + print(f"Feature batch: {', '.join(f'#{fid}' for fid in feature_ids)}") + elif feature_id: print(f"Feature assignment: #{feature_id}") if max_iterations: print(f"Max iterations: {max_iterations}") @@ -183,6 +213,8 @@ async def run_autonomous_agent( # Main loop iteration = 0 + rate_limit_retries = 0 # Track consecutive rate limit errors for exponential backoff + error_retries = 0 # Track consecutive non-rate-limit errors while True: iteration += 1 @@ -212,23 +244,29 @@ async def run_autonomous_agent( import os if agent_type == "testing": agent_id = f"testing-{os.getpid()}" # Unique ID for testing agents + elif feature_ids and len(feature_ids) > 1: + agent_id = f"batch-{feature_ids[0]}" elif feature_id: agent_id = f"feature-{feature_id}" else: agent_id = None - client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_id=agent_id) + client = create_client(project_dir, model, yolo_mode=yolo_mode, agent_id=agent_id, agent_type=agent_type) # Choose prompt based on agent type if agent_type == "initializer": prompt = get_initializer_prompt(project_dir) elif agent_type == "testing": - prompt = get_testing_prompt(project_dir, testing_feature_id) - elif feature_id: + prompt = get_testing_prompt(project_dir, testing_feature_id, testing_feature_ids) + elif feature_ids and len(feature_ids) > 1: + # Batch mode (used by orchestrator for multi-feature coding agents) + prompt = get_batch_feature_prompt(feature_ids, project_dir, yolo_mode) + elif feature_id or (feature_ids is not None and len(feature_ids) == 1): # Single-feature mode (used by orchestrator for coding agents) - prompt = get_single_feature_prompt(feature_id, project_dir, yolo_mode) + fid = feature_id if feature_id is not None else feature_ids[0] # type: ignore[index] + prompt = get_single_feature_prompt(fid, project_dir, yolo_mode) else: # General coding prompt (legacy path) - prompt = get_coding_prompt(project_dir) + prompt = get_coding_prompt(project_dir, yolo_mode=yolo_mode) # Run session with async context manager # Wrap in try/except to handle MCP server startup failures gracefully @@ -250,13 +288,28 @@ async def run_autonomous_agent( # Handle status if status == "continue": + # Reset error retries on success; rate-limit retries reset only if no signal + error_retries = 0 + reset_rate_limit_retries = True + delay_seconds = AUTO_CONTINUE_DELAY_SECONDS target_time_str = None - if "limit reached" in response.lower(): - print("Claude Agent SDK indicated limit reached.") + # Check for rate limit indicators in response text + if is_rate_limit_error(response): + print("Claude Agent SDK indicated rate limit reached.") + reset_rate_limit_retries = False - # Try to parse reset time from response + # Try to extract retry-after from response text first + retry_seconds = parse_retry_after(response) + if retry_seconds is not None: + delay_seconds = clamp_retry_delay(retry_seconds) + else: + # Use exponential backoff when retry-after unknown + delay_seconds = calculate_rate_limit_backoff(rate_limit_retries) + rate_limit_retries += 1 + + # Try to parse reset time from response (more specific format) match = re.search( r"(?i)\bresets(?:\s+at)?\s+(\d+)(?::(\d+))?\s*(am|pm)\s*\(([^)]+)\)", response, @@ -285,9 +338,7 @@ async def run_autonomous_agent( target += timedelta(days=1) delta = target - now - delay_seconds = min( - delta.total_seconds(), 24 * 60 * 60 - ) # Clamp to 24 hours max + delay_seconds = min(max(int(delta.total_seconds()), 1), 24 * 60 * 60) target_time_str = target.strftime("%B %d, %Y at %I:%M %p %Z") except Exception as e: @@ -316,20 +367,56 @@ async def run_autonomous_agent( print("The autonomous agent has finished its work.") break - # Single-feature mode OR testing agent: exit after one session - if feature_id is not None or agent_type == "testing": + # Single-feature mode, batch mode, or testing agent: exit after one session + if feature_ids and len(feature_ids) > 1: + print(f"\nBatch mode: Features {', '.join(f'#{fid}' for fid in feature_ids)} session complete.") + break + elif feature_id is not None or (feature_ids is not None and len(feature_ids) == 1): + fid = feature_id if feature_id is not None else feature_ids[0] # type: ignore[index] if agent_type == "testing": print("\nTesting agent complete. Terminating session.") else: - print(f"\nSingle-feature mode: Feature #{feature_id} session complete.") + print(f"\nSingle-feature mode: Feature #{fid} session complete.") break + elif agent_type == "testing": + print("\nTesting agent complete. Terminating session.") + break + + # Reset rate limit retries only if no rate limit signal was detected + if reset_rate_limit_retries: + rate_limit_retries = 0 + + await asyncio.sleep(delay_seconds) + + elif status == "rate_limit": + # Smart rate limit handling with exponential backoff + # Reset error counter so mixed events don't inflate delays + error_retries = 0 + if response != "unknown": + try: + delay_seconds = clamp_retry_delay(int(response)) + except (ValueError, TypeError): + # Malformed value - fall through to exponential backoff + response = "unknown" + if response == "unknown": + # Use exponential backoff when retry-after unknown or malformed + delay_seconds = calculate_rate_limit_backoff(rate_limit_retries) + rate_limit_retries += 1 + print(f"\nRate limit hit. Backoff wait: {delay_seconds} seconds (attempt #{rate_limit_retries})...") + else: + print(f"\nRate limit hit. Waiting {delay_seconds} seconds before retry...") await asyncio.sleep(delay_seconds) elif status == "error": + # Non-rate-limit errors: linear backoff capped at 5 minutes + # Reset rate limit counter so mixed events don't inflate delays + rate_limit_retries = 0 + error_retries += 1 + delay_seconds = calculate_error_backoff(error_retries) print("\nSession encountered an error") - print("Will retry with a fresh session...") - await asyncio.sleep(AUTO_CONTINUE_DELAY_SECONDS) + print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...") + await asyncio.sleep(delay_seconds) # Small delay between sessions if max_iterations is None or iteration < max_iterations: diff --git a/api/database.py b/api/database.py index 90dc49a..4c5ef42 100644 --- a/api/database.py +++ b/api/database.py @@ -8,7 +8,7 @@ SQLite database schema for feature storage using SQLAlchemy. import sys from datetime import datetime, timezone from pathlib import Path -from typing import Optional +from typing import Generator, Optional def _utc_now() -> datetime: @@ -26,13 +26,16 @@ from sqlalchemy import ( String, Text, create_engine, + event, text, ) -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import Session, relationship, sessionmaker +from sqlalchemy.orm import DeclarativeBase, Session, relationship, sessionmaker from sqlalchemy.types import JSON -Base = declarative_base() + +class Base(DeclarativeBase): + """SQLAlchemy 2.0 style declarative base.""" + pass class Feature(Base): @@ -180,7 +183,8 @@ class ScheduleOverride(Base): def get_database_path(project_dir: Path) -> Path: """Return the path to the SQLite database for a project.""" - return project_dir / "features.db" + from autocoder_paths import get_features_db_path + return get_features_db_path(project_dir) def get_database_url(project_dir: Path) -> str: @@ -307,11 +311,11 @@ def _migrate_add_schedules_tables(engine) -> None: # Create schedules table if missing if "schedules" not in existing_tables: - Schedule.__table__.create(bind=engine) + Schedule.__table__.create(bind=engine) # type: ignore[attr-defined] # Create schedule_overrides table if missing if "schedule_overrides" not in existing_tables: - ScheduleOverride.__table__.create(bind=engine) + ScheduleOverride.__table__.create(bind=engine) # type: ignore[attr-defined] # Add crash_count column if missing (for upgrades) if "schedules" in existing_tables: @@ -332,6 +336,35 @@ def _migrate_add_schedules_tables(engine) -> None: conn.commit() +def _configure_sqlite_immediate_transactions(engine) -> None: + """Configure engine for IMMEDIATE transactions via event hooks. + + Per SQLAlchemy docs: https://docs.sqlalchemy.org/en/20/dialects/sqlite.html + + This replaces fragile pysqlite implicit transaction handling with explicit + BEGIN IMMEDIATE at transaction start. Benefits: + - Acquires write lock immediately, preventing stale reads + - Works correctly regardless of prior ORM operations + - Future-proof: won't break when pysqlite legacy mode is removed in Python 3.16 + """ + @event.listens_for(engine, "connect") + def do_connect(dbapi_connection, connection_record): + # Disable pysqlite's implicit transaction handling + dbapi_connection.isolation_level = None + + # Set busy_timeout on raw connection before any transactions + cursor = dbapi_connection.cursor() + try: + cursor.execute("PRAGMA busy_timeout=30000") + finally: + cursor.close() + + @event.listens_for(engine, "begin") + def do_begin(conn): + # Use IMMEDIATE for all transactions to prevent stale reads + conn.exec_driver_sql("BEGIN IMMEDIATE") + + def create_database(project_dir: Path) -> tuple: """ Create database and return engine + session maker. @@ -351,21 +384,41 @@ def create_database(project_dir: Path) -> tuple: return _engine_cache[cache_key] db_url = get_database_url(project_dir) - engine = create_engine(db_url, connect_args={ - "check_same_thread": False, - "timeout": 30 # Wait up to 30s for locks - }) - Base.metadata.create_all(bind=engine) + + # Ensure parent directory exists (for .autocoder/ layout) + db_path = get_database_path(project_dir) + db_path.parent.mkdir(parents=True, exist_ok=True) # Choose journal mode based on filesystem type # WAL mode doesn't work reliably on network filesystems and can cause corruption is_network = _is_network_path(project_dir) journal_mode = "DELETE" if is_network else "WAL" + engine = create_engine(db_url, connect_args={ + "check_same_thread": False, + "timeout": 30 # Wait up to 30s for locks + }) + + # Set journal mode BEFORE configuring event hooks + # PRAGMA journal_mode must run outside of a transaction, and our event hooks + # start a transaction with BEGIN IMMEDIATE on every operation with engine.connect() as conn: - conn.execute(text(f"PRAGMA journal_mode={journal_mode}")) - conn.execute(text("PRAGMA busy_timeout=30000")) - conn.commit() + # Get raw DBAPI connection to execute PRAGMA outside transaction + raw_conn = conn.connection.dbapi_connection + if raw_conn is None: + raise RuntimeError("Failed to get raw DBAPI connection") + cursor = raw_conn.cursor() + try: + cursor.execute(f"PRAGMA journal_mode={journal_mode}") + cursor.execute("PRAGMA busy_timeout=30000") + finally: + cursor.close() + + # Configure IMMEDIATE transactions via event hooks AFTER setting PRAGMAs + # This must happen before create_all() and migrations run + _configure_sqlite_immediate_transactions(engine) + + Base.metadata.create_all(bind=engine) # Migrate existing databases _migrate_add_in_progress_column(engine) @@ -417,7 +470,7 @@ def set_session_maker(session_maker: sessionmaker) -> None: _session_maker = session_maker -def get_db() -> Session: +def get_db() -> Generator[Session, None, None]: """ Dependency for FastAPI to get database session. @@ -429,5 +482,55 @@ def get_db() -> Session: db = _session_maker() try: yield db + except Exception: + db.rollback() + raise finally: db.close() + + +# ============================================================================= +# Atomic Transaction Helpers for Parallel Mode +# ============================================================================= +# These helpers prevent database corruption when multiple processes access the +# same SQLite database concurrently. They use IMMEDIATE transactions which +# acquire write locks at the start (preventing stale reads) and atomic +# UPDATE ... WHERE clauses (preventing check-then-modify races). + + +from contextlib import contextmanager + + +@contextmanager +def atomic_transaction(session_maker): + """Context manager for atomic SQLite transactions. + + Acquires a write lock immediately via BEGIN IMMEDIATE (configured by + engine event hooks), preventing stale reads in read-modify-write patterns. + This is essential for preventing race conditions in parallel mode. + + Args: + session_maker: SQLAlchemy sessionmaker + + Yields: + SQLAlchemy session with automatic commit/rollback + + Example: + with atomic_transaction(session_maker) as session: + # All reads in this block are protected by write lock + feature = session.query(Feature).filter(...).first() + feature.priority = new_priority + # Commit happens automatically on exit + """ + session = session_maker() + try: + yield session + session.commit() + except Exception: + try: + session.rollback() + except Exception: + pass # Don't let rollback failure mask original error + raise + finally: + session.close() diff --git a/api/dependency_resolver.py b/api/dependency_resolver.py index 6b09244..9cc8082 100644 --- a/api/dependency_resolver.py +++ b/api/dependency_resolver.py @@ -7,6 +7,7 @@ Includes cycle detection, validation, and helper functions for dependency manage """ import heapq +from collections import deque from typing import TypedDict # Security: Prevent DoS via excessive dependencies @@ -301,19 +302,20 @@ def compute_scheduling_scores(features: list[dict]) -> dict[int, float]: # Calculate depths via BFS from roots # Use visited set to prevent infinite loops from circular dependencies + # Use deque for O(1) popleft instead of list.pop(0) which is O(n) depths: dict[int, int] = {} visited: set[int] = set() roots = [f["id"] for f in features if not parents[f["id"]]] - queue = [(root, 0) for root in roots] - while queue: - node_id, depth = queue.pop(0) + bfs_queue: deque[tuple[int, int]] = deque((root, 0) for root in roots) + while bfs_queue: + node_id, depth = bfs_queue.popleft() if node_id in visited: continue # Skip already visited nodes (handles cycles) visited.add(node_id) depths[node_id] = depth for child_id in children[node_id]: if child_id not in visited: - queue.append((child_id, depth + 1)) + bfs_queue.append((child_id, depth + 1)) # Handle orphaned nodes (shouldn't happen but be safe) for f in features: diff --git a/autocoder_paths.py b/autocoder_paths.py new file mode 100644 index 0000000..7d1db6f --- /dev/null +++ b/autocoder_paths.py @@ -0,0 +1,290 @@ +""" +Autocoder Path Resolution +========================= + +Central module for resolving paths to autocoder-generated files within a project. + +Implements a dual-path resolution strategy for backward compatibility: + + 1. Check ``project_dir / ".autocoder" / X`` (new layout) + 2. Check ``project_dir / X`` (legacy root-level layout) + 3. Default to the new location for fresh projects + +This allows existing projects with root-level ``features.db``, ``.agent.lock``, +etc. to keep working while new projects store everything under ``.autocoder/``. + +The ``migrate_project_layout`` function can move an old-layout project to the +new layout safely, with full integrity checks for SQLite databases. +""" + +import logging +import shutil +import sqlite3 +from pathlib import Path + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# .gitignore content written into every .autocoder/ directory +# --------------------------------------------------------------------------- +_GITIGNORE_CONTENT = """\ +# Autocoder runtime files +features.db +features.db-wal +features.db-shm +assistant.db +assistant.db-wal +assistant.db-shm +.agent.lock +.devserver.lock +.claude_settings.json +.claude_assistant_settings.json +.claude_settings.expand.*.json +.progress_cache +""" + + +# --------------------------------------------------------------------------- +# Private helpers +# --------------------------------------------------------------------------- + +def _resolve_path(project_dir: Path, filename: str) -> Path: + """Resolve a file path using dual-path strategy. + + Checks the new ``.autocoder/`` location first, then falls back to the + legacy root-level location. If neither exists, returns the new location + so that newly-created files land in ``.autocoder/``. + """ + new = project_dir / ".autocoder" / filename + if new.exists(): + return new + old = project_dir / filename + if old.exists(): + return old + return new # default for new projects + + +def _resolve_dir(project_dir: Path, dirname: str) -> Path: + """Resolve a directory path using dual-path strategy. + + Same logic as ``_resolve_path`` but intended for directories such as + ``prompts/``. + """ + new = project_dir / ".autocoder" / dirname + if new.exists(): + return new + old = project_dir / dirname + if old.exists(): + return old + return new + + +# --------------------------------------------------------------------------- +# .autocoder directory management +# --------------------------------------------------------------------------- + +def get_autocoder_dir(project_dir: Path) -> Path: + """Return the ``.autocoder`` directory path. Does NOT create it.""" + return project_dir / ".autocoder" + + +def ensure_autocoder_dir(project_dir: Path) -> Path: + """Create the ``.autocoder/`` directory (if needed) and write its ``.gitignore``. + + Returns: + The path to the ``.autocoder`` directory. + """ + autocoder_dir = get_autocoder_dir(project_dir) + autocoder_dir.mkdir(parents=True, exist_ok=True) + + gitignore_path = autocoder_dir / ".gitignore" + gitignore_path.write_text(_GITIGNORE_CONTENT, encoding="utf-8") + + return autocoder_dir + + +# --------------------------------------------------------------------------- +# Dual-path file helpers +# --------------------------------------------------------------------------- + +def get_features_db_path(project_dir: Path) -> Path: + """Resolve the path to ``features.db``.""" + return _resolve_path(project_dir, "features.db") + + +def get_assistant_db_path(project_dir: Path) -> Path: + """Resolve the path to ``assistant.db``.""" + return _resolve_path(project_dir, "assistant.db") + + +def get_agent_lock_path(project_dir: Path) -> Path: + """Resolve the path to ``.agent.lock``.""" + return _resolve_path(project_dir, ".agent.lock") + + +def get_devserver_lock_path(project_dir: Path) -> Path: + """Resolve the path to ``.devserver.lock``.""" + return _resolve_path(project_dir, ".devserver.lock") + + +def get_claude_settings_path(project_dir: Path) -> Path: + """Resolve the path to ``.claude_settings.json``.""" + return _resolve_path(project_dir, ".claude_settings.json") + + +def get_claude_assistant_settings_path(project_dir: Path) -> Path: + """Resolve the path to ``.claude_assistant_settings.json``.""" + return _resolve_path(project_dir, ".claude_assistant_settings.json") + + +def get_progress_cache_path(project_dir: Path) -> Path: + """Resolve the path to ``.progress_cache``.""" + return _resolve_path(project_dir, ".progress_cache") + + +def get_prompts_dir(project_dir: Path) -> Path: + """Resolve the path to the ``prompts/`` directory.""" + return _resolve_dir(project_dir, "prompts") + + +# --------------------------------------------------------------------------- +# Non-dual-path helpers (always use new location) +# --------------------------------------------------------------------------- + +def get_expand_settings_path(project_dir: Path, uuid_hex: str) -> Path: + """Return the path for an ephemeral expand-session settings file. + + These files are short-lived and always stored in ``.autocoder/``. + """ + return project_dir / ".autocoder" / f".claude_settings.expand.{uuid_hex}.json" + + +# --------------------------------------------------------------------------- +# Lock-file safety check +# --------------------------------------------------------------------------- + +def has_agent_running(project_dir: Path) -> bool: + """Check whether any agent or dev-server lock file exists at either location. + + Inspects both the legacy root-level paths and the new ``.autocoder/`` + paths so that a running agent is detected regardless of project layout. + + Returns: + ``True`` if any ``.agent.lock`` or ``.devserver.lock`` exists. + """ + lock_names = (".agent.lock", ".devserver.lock") + for name in lock_names: + if (project_dir / name).exists(): + return True + if (project_dir / ".autocoder" / name).exists(): + return True + return False + + +# --------------------------------------------------------------------------- +# Migration +# --------------------------------------------------------------------------- + +def migrate_project_layout(project_dir: Path) -> list[str]: + """Migrate a project from the legacy root-level layout to ``.autocoder/``. + + The migration is incremental and safe: + + * If the agent is running (lock files present) the migration is skipped + entirely to avoid corrupting in-use databases. + * Each file/directory is migrated independently. If any single step + fails the error is logged and migration continues with the remaining + items. Partial migration is safe because the dual-path resolution + strategy will find files at whichever location they ended up in. + + Returns: + A list of human-readable descriptions of what was migrated, e.g. + ``["prompts/ -> .autocoder/prompts/", "features.db -> .autocoder/features.db"]``. + An empty list means nothing was migrated (either everything is + already migrated, or the agent is running). + """ + # Safety: refuse to migrate while an agent is running + if has_agent_running(project_dir): + logger.warning("Migration skipped: agent or dev-server is running for %s", project_dir) + return [] + + autocoder_dir = ensure_autocoder_dir(project_dir) + migrated: list[str] = [] + + # --- 1. Migrate prompts/ directory ----------------------------------- + try: + old_prompts = project_dir / "prompts" + new_prompts = autocoder_dir / "prompts" + if old_prompts.exists() and old_prompts.is_dir() and not new_prompts.exists(): + shutil.copytree(str(old_prompts), str(new_prompts)) + shutil.rmtree(str(old_prompts)) + migrated.append("prompts/ -> .autocoder/prompts/") + logger.info("Migrated prompts/ -> .autocoder/prompts/") + except Exception: + logger.warning("Failed to migrate prompts/ directory", exc_info=True) + + # --- 2. Migrate SQLite databases (features.db, assistant.db) --------- + db_names = ("features.db", "assistant.db") + for db_name in db_names: + try: + old_db = project_dir / db_name + new_db = autocoder_dir / db_name + if old_db.exists() and not new_db.exists(): + # Flush WAL to ensure all data is in the main database file + conn = sqlite3.connect(str(old_db)) + try: + cursor = conn.cursor() + cursor.execute("PRAGMA wal_checkpoint(TRUNCATE)") + finally: + conn.close() + + # Copy the main database file (WAL is now flushed) + shutil.copy2(str(old_db), str(new_db)) + + # Verify the copy is intact + verify_conn = sqlite3.connect(str(new_db)) + try: + verify_cursor = verify_conn.cursor() + result = verify_cursor.execute("PRAGMA integrity_check").fetchone() + if result is None or result[0] != "ok": + logger.error( + "Integrity check failed for migrated %s: %s", + db_name, result, + ) + # Remove the broken copy; old file stays in place + new_db.unlink(missing_ok=True) + continue + finally: + verify_conn.close() + + # Remove old database files (.db, .db-wal, .db-shm) + old_db.unlink(missing_ok=True) + for suffix in ("-wal", "-shm"): + wal_file = project_dir / f"{db_name}{suffix}" + wal_file.unlink(missing_ok=True) + + migrated.append(f"{db_name} -> .autocoder/{db_name}") + logger.info("Migrated %s -> .autocoder/%s", db_name, db_name) + except Exception: + logger.warning("Failed to migrate %s", db_name, exc_info=True) + + # --- 3. Migrate simple files ----------------------------------------- + simple_files = ( + ".agent.lock", + ".devserver.lock", + ".claude_settings.json", + ".claude_assistant_settings.json", + ".progress_cache", + ) + for filename in simple_files: + try: + old_file = project_dir / filename + new_file = autocoder_dir / filename + if old_file.exists() and not new_file.exists(): + shutil.move(str(old_file), str(new_file)) + migrated.append(f"{filename} -> .autocoder/{filename}") + logger.info("Migrated %s -> .autocoder/%s", filename, filename) + except Exception: + logger.warning("Failed to migrate %s", filename, exc_info=True) + + return migrated diff --git a/autonomous_agent_demo.py b/autonomous_agent_demo.py index 16702f5..ed3db37 100644 --- a/autonomous_agent_demo.py +++ b/autonomous_agent_demo.py @@ -133,6 +133,13 @@ Authentication: help="Work on a specific feature ID only (used by orchestrator for coding agents)", ) + parser.add_argument( + "--feature-ids", + type=str, + default=None, + help="Comma-separated feature IDs to implement in batch (e.g., '5,8,12')", + ) + # Agent type for subprocess mode parser.add_argument( "--agent-type", @@ -145,7 +152,14 @@ Authentication: "--testing-feature-id", type=int, default=None, - help="Feature ID to regression test (used by orchestrator for testing agents)", + help="Feature ID to regression test (used by orchestrator for testing agents, legacy single mode)", + ) + + parser.add_argument( + "--testing-feature-ids", + type=str, + default=None, + help="Comma-separated feature IDs to regression test in batch (e.g., '5,12,18')", ) # Testing agent configuration @@ -156,6 +170,20 @@ Authentication: help="Testing agents per coding agent (0-3, default: 1). Set to 0 to disable testing agents.", ) + parser.add_argument( + "--testing-batch-size", + type=int, + default=3, + help="Number of features per testing batch (1-5, default: 3)", + ) + + parser.add_argument( + "--batch-size", + type=int, + default=3, + help="Max features per coding agent batch (1-3, default: 3)", + ) + return parser.parse_args() @@ -193,6 +221,30 @@ def main() -> None: print("Use an absolute path or register the project first.") return + # Migrate project layout to .autocoder/ if needed (idempotent, safe) + from autocoder_paths import migrate_project_layout + migrated = migrate_project_layout(project_dir) + if migrated: + print(f"Migrated project files to .autocoder/: {', '.join(migrated)}", flush=True) + + # Parse batch testing feature IDs (comma-separated string -> list[int]) + testing_feature_ids: list[int] | None = None + if args.testing_feature_ids: + try: + testing_feature_ids = [int(x.strip()) for x in args.testing_feature_ids.split(",") if x.strip()] + except ValueError: + print(f"Error: --testing-feature-ids must be comma-separated integers, got: {args.testing_feature_ids}") + return + + # Parse batch coding feature IDs (comma-separated string -> list[int]) + coding_feature_ids: list[int] | None = None + if args.feature_ids: + try: + coding_feature_ids = [int(x.strip()) for x in args.feature_ids.split(",") if x.strip()] + except ValueError: + print(f"Error: --feature-ids must be comma-separated integers, got: {args.feature_ids}") + return + try: if args.agent_type: # Subprocess mode - spawned by orchestrator for a specific role @@ -203,8 +255,10 @@ def main() -> None: max_iterations=args.max_iterations or 1, yolo_mode=args.yolo, feature_id=args.feature_id, + feature_ids=coding_feature_ids, agent_type=args.agent_type, testing_feature_id=args.testing_feature_id, + testing_feature_ids=testing_feature_ids, ) ) else: @@ -223,6 +277,8 @@ def main() -> None: model=args.model, yolo_mode=args.yolo, testing_agent_ratio=args.testing_ratio, + testing_batch_size=args.testing_batch_size, + batch_size=args.batch_size, ) ) except KeyboardInterrupt: diff --git a/client.py b/client.py index f394ebb..d31b5ad 100644 --- a/client.py +++ b/client.py @@ -16,7 +16,8 @@ from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from claude_agent_sdk.types import HookContext, HookInput, HookMatcher, SyncHookJSONOutput from dotenv import load_dotenv -from security import bash_security_hook +from env_constants import API_ENV_VARS +from security import SENSITIVE_DIRECTORIES, bash_security_hook # Load environment variables from .env file if present load_dotenv() @@ -31,43 +32,15 @@ DEFAULT_PLAYWRIGHT_HEADLESS = True # Firefox is recommended for lower CPU usage DEFAULT_PLAYWRIGHT_BROWSER = "firefox" -# Environment variables to pass through to Claude CLI for API configuration -# These allow using alternative API endpoints (e.g., GLM via z.ai, Vertex AI) without -# affecting the user's global Claude Code settings -API_ENV_VARS = [ - "ANTHROPIC_BASE_URL", # Custom API endpoint (e.g., https://api.z.ai/api/anthropic) - "ANTHROPIC_AUTH_TOKEN", # API authentication token - "API_TIMEOUT_MS", # Request timeout in milliseconds - "ANTHROPIC_DEFAULT_SONNET_MODEL", # Model override for Sonnet - "ANTHROPIC_DEFAULT_OPUS_MODEL", # Model override for Opus - "ANTHROPIC_DEFAULT_HAIKU_MODEL", # Model override for Haiku - # Vertex AI configuration - "CLAUDE_CODE_USE_VERTEX", # Enable Vertex AI mode (set to "1") - "CLOUD_ML_REGION", # GCP region (e.g., us-east5) - "ANTHROPIC_VERTEX_PROJECT_ID", # GCP project ID -] - # Extra read paths for cross-project file access (read-only) # Set EXTRA_READ_PATHS environment variable with comma-separated absolute paths # Example: EXTRA_READ_PATHS=/Volumes/Data/dev,/Users/shared/libs EXTRA_READ_PATHS_VAR = "EXTRA_READ_PATHS" -# Sensitive directories that should never be allowed via EXTRA_READ_PATHS -# These contain credentials, keys, or system-critical files -EXTRA_READ_PATHS_BLOCKLIST = { - ".ssh", - ".aws", - ".azure", - ".kube", - ".gnupg", - ".gpg", - ".password-store", - ".docker", - ".config/gcloud", - ".npmrc", - ".pypirc", - ".netrc", -} +# Sensitive directories that should never be allowed via EXTRA_READ_PATHS. +# Delegates to the canonical SENSITIVE_DIRECTORIES set in security.py so that +# this blocklist and the filesystem browser API share a single source of truth. +EXTRA_READ_PATHS_BLOCKLIST = SENSITIVE_DIRECTORIES def convert_model_for_vertex(model: str) -> str: """ @@ -209,32 +182,55 @@ def get_extra_read_paths() -> list[Path]: return validated_paths -# Feature MCP tools for feature/test management -FEATURE_MCP_TOOLS = [ - # Core feature operations +# Per-agent-type MCP tool lists. +# Only expose the tools each agent type actually needs, reducing tool schema +# overhead and preventing agents from calling tools meant for other roles. +# +# Tools intentionally omitted from ALL agent lists (UI/orchestrator only): +# feature_get_ready, feature_get_blocked, feature_get_graph, +# feature_remove_dependency +# +# The ghost tool "feature_release_testing" was removed entirely -- it was +# listed here but never implemented in mcp_server/feature_mcp.py. + +CODING_AGENT_TOOLS = [ "mcp__features__feature_get_stats", - "mcp__features__feature_get_by_id", # Get assigned feature details - "mcp__features__feature_get_summary", # Lightweight: id, name, status, deps only + "mcp__features__feature_get_by_id", + "mcp__features__feature_get_summary", + "mcp__features__feature_claim_and_get", "mcp__features__feature_mark_in_progress", - "mcp__features__feature_claim_and_get", # Atomic claim + get details "mcp__features__feature_mark_passing", - "mcp__features__feature_mark_failing", # Mark regression detected + "mcp__features__feature_mark_failing", "mcp__features__feature_skip", - "mcp__features__feature_create_bulk", - "mcp__features__feature_create", "mcp__features__feature_clear_in_progress", - "mcp__features__feature_release_testing", # Release testing claim - # Dependency management - "mcp__features__feature_add_dependency", - "mcp__features__feature_remove_dependency", - "mcp__features__feature_set_dependencies", - # Query tools - "mcp__features__feature_get_ready", - "mcp__features__feature_get_blocked", - "mcp__features__feature_get_graph", ] -# Playwright MCP tools for browser automation +TESTING_AGENT_TOOLS = [ + "mcp__features__feature_get_stats", + "mcp__features__feature_get_by_id", + "mcp__features__feature_get_summary", + "mcp__features__feature_mark_passing", + "mcp__features__feature_mark_failing", +] + +INITIALIZER_AGENT_TOOLS = [ + "mcp__features__feature_get_stats", + "mcp__features__feature_create_bulk", + "mcp__features__feature_create", + "mcp__features__feature_add_dependency", + "mcp__features__feature_set_dependencies", +] + +# Union of all agent tool lists -- used for permissions (all tools remain +# *permitted* so the MCP server can respond, but only the agent-type-specific +# list is included in allowed_tools, which controls what the LLM sees). +ALL_FEATURE_MCP_TOOLS = sorted( + set(CODING_AGENT_TOOLS) | set(TESTING_AGENT_TOOLS) | set(INITIALIZER_AGENT_TOOLS) +) + +# Playwright MCP tools for browser automation. +# Full set of tools for comprehensive UI testing including drag-and-drop, +# hover menus, file uploads, tab management, etc. PLAYWRIGHT_TOOLS = [ # Core navigation & screenshots "mcp__playwright__browser_navigate", @@ -247,9 +243,10 @@ PLAYWRIGHT_TOOLS = [ "mcp__playwright__browser_type", "mcp__playwright__browser_fill_form", "mcp__playwright__browser_select_option", - "mcp__playwright__browser_hover", - "mcp__playwright__browser_drag", "mcp__playwright__browser_press_key", + "mcp__playwright__browser_drag", + "mcp__playwright__browser_hover", + "mcp__playwright__browser_file_upload", # JavaScript & debugging "mcp__playwright__browser_evaluate", @@ -258,16 +255,17 @@ PLAYWRIGHT_TOOLS = [ "mcp__playwright__browser_network_requests", # Browser management - "mcp__playwright__browser_close", "mcp__playwright__browser_resize", - "mcp__playwright__browser_tabs", "mcp__playwright__browser_wait_for", "mcp__playwright__browser_handle_dialog", - "mcp__playwright__browser_file_upload", "mcp__playwright__browser_install", + "mcp__playwright__browser_close", + "mcp__playwright__browser_tabs", ] -# Built-in tools +# Built-in tools available to agents. +# WebFetch and WebSearch are included so coding agents can look up current +# documentation for frameworks and libraries they are implementing. BUILTIN_TOOLS = [ "Read", "Write", @@ -285,6 +283,7 @@ def create_client( model: str, yolo_mode: bool = False, agent_id: str | None = None, + agent_type: str = "coding", ): """ Create a Claude Agent SDK client with multi-layered security. @@ -295,6 +294,8 @@ def create_client( yolo_mode: If True, skip Playwright MCP server for rapid prototyping agent_id: Optional unique identifier for browser isolation in parallel mode. When provided, each agent gets its own browser profile. + agent_type: One of "coding", "testing", or "initializer". Controls which + MCP tools are exposed and the max_turns limit. Returns: Configured ClaudeSDKClient (from claude_agent_sdk) @@ -308,13 +309,34 @@ def create_client( Note: Authentication is handled by start.bat/start.sh before this runs. The Claude SDK auto-detects credentials from the Claude CLI configuration """ - # Build allowed tools list based on mode - # In YOLO mode, exclude Playwright tools for faster prototyping - allowed_tools = [*BUILTIN_TOOLS, *FEATURE_MCP_TOOLS] + # Select the feature MCP tools appropriate for this agent type + feature_tools_map = { + "coding": CODING_AGENT_TOOLS, + "testing": TESTING_AGENT_TOOLS, + "initializer": INITIALIZER_AGENT_TOOLS, + } + feature_tools = feature_tools_map.get(agent_type, CODING_AGENT_TOOLS) + + # Select max_turns based on agent type: + # - coding/initializer: 300 turns (complex multi-step implementation) + # - testing: 100 turns (focused verification of a single feature) + max_turns_map = { + "coding": 300, + "testing": 100, + "initializer": 300, + } + max_turns = max_turns_map.get(agent_type, 300) + + # Build allowed tools list based on mode and agent type. + # In YOLO mode, exclude Playwright tools for faster prototyping. + allowed_tools = [*BUILTIN_TOOLS, *feature_tools] if not yolo_mode: allowed_tools.extend(PLAYWRIGHT_TOOLS) - # Build permissions list + # Build permissions list. + # We permit ALL feature MCP tools at the security layer (so the MCP server + # can respond if called), but the LLM only *sees* the agent-type-specific + # subset via allowed_tools above. permissions_list = [ # Allow all file operations within the project directory "Read(./**)", @@ -325,11 +347,11 @@ def create_client( # Bash permission granted here, but actual commands are validated # by the bash_security_hook (see security.py for allowed commands) "Bash(*)", - # Allow web tools for documentation lookup - "WebFetch", - "WebSearch", + # Allow web tools for looking up framework/library documentation + "WebFetch(*)", + "WebSearch(*)", # Allow Feature MCP tools for feature management - *FEATURE_MCP_TOOLS, + *ALL_FEATURE_MCP_TOOLS, ] # Add extra read paths from environment variable (read-only access) @@ -360,7 +382,9 @@ def create_client( project_dir.mkdir(parents=True, exist_ok=True) # Write settings to a file in the project directory - settings_file = project_dir / ".claude_settings.json" + from autocoder_paths import get_claude_settings_path + settings_file = get_claude_settings_path(project_dir) + settings_file.parent.mkdir(parents=True, exist_ok=True) with open(settings_file, "w") as f: json.dump(security_settings, f, indent=2) @@ -459,9 +483,10 @@ def create_client( context["project_dir"] = str(project_dir.resolve()) return await bash_security_hook(input_data, tool_use_id, context) - # PreCompact hook for logging and customizing context compaction + # PreCompact hook for logging and customizing context compaction. # Compaction is handled automatically by Claude Code CLI when context approaches limits. - # This hook allows us to log when compaction occurs and optionally provide custom instructions. + # This hook provides custom instructions that guide the summarizer to preserve + # critical workflow state while discarding verbose/redundant content. async def pre_compact_hook( input_data: HookInput, tool_use_id: str | None, @@ -474,8 +499,9 @@ def create_client( - "auto": Automatic compaction when context approaches token limits - "manual": User-initiated compaction via /compact command - The hook can customize compaction via hookSpecificOutput: - - customInstructions: String with focus areas for summarization + Returns custom instructions that guide the compaction summarizer to: + 1. Preserve critical workflow state (feature ID, modified files, test results) + 2. Discard verbose content (screenshots, long grep outputs, repeated reads) """ trigger = input_data.get("trigger", "auto") custom_instructions = input_data.get("custom_instructions") @@ -486,18 +512,53 @@ def create_client( print("[Context] Manual compaction requested") if custom_instructions: - print(f"[Context] Custom instructions: {custom_instructions}") + print(f"[Context] Custom instructions provided: {custom_instructions}") - # Return empty dict to allow compaction to proceed with default behavior - # To customize, return: - # { - # "hookSpecificOutput": { - # "hookEventName": "PreCompact", - # "customInstructions": "Focus on preserving file paths and test results" - # } - # } - return SyncHookJSONOutput() + # Build compaction instructions that preserve workflow-critical context + # while discarding verbose content that inflates token usage. + # + # The summarizer receives these instructions and uses them to decide + # what to keep vs. discard during context compaction. + compaction_guidance = "\n".join([ + "## PRESERVE (critical workflow state)", + "- Current feature ID, feature name, and feature status (pending/in_progress/passing/failing)", + "- List of all files created or modified during this session, with their paths", + "- Last test/lint/type-check results: command run, pass/fail status, and key error messages", + "- Current step in the workflow (e.g., implementing, testing, fixing lint errors)", + "- Any dependency information (which features block this one)", + "- Git operations performed (commits, branches created)", + "- MCP tool call results (feature_claim_and_get, feature_mark_passing, etc.)", + "- Key architectural decisions made during this session", + "", + "## DISCARD (verbose content safe to drop)", + "- Full screenshot base64 data (just note that a screenshot was taken and what it showed)", + "- Long grep/find/glob output listings (summarize to: searched for X, found Y relevant files)", + "- Repeated file reads of the same file (keep only the latest read or a summary of changes)", + "- Full file contents from Read tool (summarize to: read file X, key sections were Y)", + "- Verbose npm/pip install output (just note: dependencies installed successfully/failed)", + "- Full lint/type-check output when passing (just note: lint passed with no errors)", + "- Browser console message dumps (summarize to: N errors found, key error was X)", + "- Redundant tool result confirmations ([Done] markers)", + ]) + print("[Context] Applying custom compaction instructions (preserve workflow state, discard verbose content)") + + # The SDK's HookSpecificOutput union type does not yet include a + # PreCompactHookSpecificOutput variant, but the CLI protocol accepts + # {"hookEventName": "PreCompact", "customInstructions": "..."}. + # The dict is serialized to JSON and sent to the CLI process directly, + # so the runtime behavior is correct despite the type mismatch. + return SyncHookJSONOutput( + hookSpecificOutput={ # type: ignore[typeddict-item] + "hookEventName": "PreCompact", + "customInstructions": compaction_guidance, + } + ) + + # PROMPT CACHING: The Claude Code CLI applies cache_control breakpoints internally. + # Our system_prompt benefits from automatic caching without explicit configuration. + # If explicit cache_control is needed, the SDK would need to accept content blocks + # with cache_control fields (not currently supported in v0.1.x). return ClaudeSDKClient( options=ClaudeAgentOptions( model=model, @@ -506,7 +567,7 @@ def create_client( setting_sources=["project"], # Enable skills, commands, and CLAUDE.md from project dir max_buffer_size=10 * 1024 * 1024, # 10MB for large Playwright screenshots allowed_tools=allowed_tools, - mcp_servers=mcp_servers, + mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime hooks={ "PreToolUse": [ HookMatcher(matcher="Bash", hooks=[bash_hook_with_context]), @@ -518,7 +579,7 @@ def create_client( HookMatcher(hooks=[pre_compact_hook]), ], }, - max_turns=1000, + max_turns=max_turns, cwd=str(project_dir.resolve()), settings=str(settings_file.resolve()), # Use absolute path env=sdk_env, # Pass API configuration overrides to CLI subprocess @@ -536,7 +597,7 @@ def create_client( # parameters. Instead, context is managed via: # 1. betas=["context-1m-2025-08-07"] - Extended context window # 2. PreCompact hook - Intercept and customize compaction behavior - # 3. max_turns - Limit conversation turns (set to 1000 for long sessions) + # 3. max_turns - Limit conversation turns (per agent type: coding=300, testing=100) # # Future SDK versions may add explicit compaction controls. When available, # consider adding: diff --git a/env_constants.py b/env_constants.py new file mode 100644 index 0000000..2a8753d --- /dev/null +++ b/env_constants.py @@ -0,0 +1,27 @@ +""" +Shared Environment Variable Constants +====================================== + +Single source of truth for environment variables forwarded to Claude CLI +subprocesses. Imported by both ``client.py`` (agent sessions) and +``server/services/chat_constants.py`` (chat sessions) to avoid maintaining +duplicate lists. + +These allow autocoder to use alternative API endpoints (Ollama, GLM, +Vertex AI) without affecting the user's global Claude Code settings. +""" + +API_ENV_VARS: list[str] = [ + # Core API configuration + "ANTHROPIC_BASE_URL", # Custom API endpoint (e.g., https://api.z.ai/api/anthropic) + "ANTHROPIC_AUTH_TOKEN", # API authentication token + "API_TIMEOUT_MS", # Request timeout in milliseconds + # Model tier overrides + "ANTHROPIC_DEFAULT_SONNET_MODEL", # Model override for Sonnet + "ANTHROPIC_DEFAULT_OPUS_MODEL", # Model override for Opus + "ANTHROPIC_DEFAULT_HAIKU_MODEL", # Model override for Haiku + # Vertex AI configuration + "CLAUDE_CODE_USE_VERTEX", # Enable Vertex AI mode (set to "1") + "CLOUD_ML_REGION", # GCP region (e.g., us-east5) + "ANTHROPIC_VERTEX_PROJECT_ID", # GCP project ID +] diff --git a/mcp_server/feature_mcp.py b/mcp_server/feature_mcp.py index a394f1e..ce3859f 100755 --- a/mcp_server/feature_mcp.py +++ b/mcp_server/feature_mcp.py @@ -30,18 +30,18 @@ orchestrator, not by agents. Agents receive pre-assigned feature IDs. import json import os import sys -import threading from contextlib import asynccontextmanager from pathlib import Path from typing import Annotated from mcp.server.fastmcp import FastMCP from pydantic import BaseModel, Field +from sqlalchemy import text # Add parent directory to path so we can import from api module sys.path.insert(0, str(Path(__file__).parent.parent)) -from api.database import Feature, create_database +from api.database import Feature, atomic_transaction, create_database from api.dependency_resolver import ( MAX_DEPENDENCIES_PER_FEATURE, compute_scheduling_scores, @@ -96,8 +96,9 @@ class BulkCreateInput(BaseModel): _session_maker = None _engine = None -# Lock for priority assignment to prevent race conditions -_priority_lock = threading.Lock() +# NOTE: The old threading.Lock() was removed because it only worked per-process, +# not cross-process. In parallel mode, multiple MCP servers run in separate +# processes, so the lock was useless. We now use atomic SQL operations instead. @asynccontextmanager @@ -243,15 +244,25 @@ def feature_mark_passing( """ session = get_session() try: - feature = session.query(Feature).filter(Feature.id == feature_id).first() - - if feature is None: - return json.dumps({"error": f"Feature with ID {feature_id} not found"}) - - feature.passes = True - feature.in_progress = False + # Atomic update with state guard - prevents double-pass in parallel mode + result = session.execute(text(""" + UPDATE features + SET passes = 1, in_progress = 0 + WHERE id = :id AND passes = 0 + """), {"id": feature_id}) session.commit() + if result.rowcount == 0: + # Check why the update didn't match + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if feature is None: + return json.dumps({"error": f"Feature with ID {feature_id} not found"}) + if feature.passes: + return json.dumps({"error": f"Feature with ID {feature_id} is already passing"}) + return json.dumps({"error": "Failed to mark feature passing for unknown reason"}) + + # Get the feature name for the response + feature = session.query(Feature).filter(Feature.id == feature_id).first() return json.dumps({"success": True, "feature_id": feature_id, "name": feature.name}) except Exception as e: session.rollback() @@ -284,14 +295,20 @@ def feature_mark_failing( """ session = get_session() try: + # Check if feature exists first feature = session.query(Feature).filter(Feature.id == feature_id).first() - if feature is None: return json.dumps({"error": f"Feature with ID {feature_id} not found"}) - feature.passes = False - feature.in_progress = False + # Atomic update for parallel safety + session.execute(text(""" + UPDATE features + SET passes = 0, in_progress = 0 + WHERE id = :id + """), {"id": feature_id}) session.commit() + + # Refresh to get updated state session.refresh(feature) return json.dumps({ @@ -337,25 +354,28 @@ def feature_skip( return json.dumps({"error": "Cannot skip a feature that is already passing"}) old_priority = feature.priority + name = feature.name - # Use lock to prevent race condition in priority assignment - with _priority_lock: - # Get max priority and set this feature to max + 1 - max_priority_result = session.query(Feature.priority).order_by(Feature.priority.desc()).first() - new_priority = (max_priority_result[0] + 1) if max_priority_result else 1 - - feature.priority = new_priority - feature.in_progress = False - session.commit() + # Atomic update: set priority to max+1 in a single statement + # This prevents race conditions where two features get the same priority + session.execute(text(""" + UPDATE features + SET priority = (SELECT COALESCE(MAX(priority), 0) + 1 FROM features), + in_progress = 0 + WHERE id = :id + """), {"id": feature_id}) + session.commit() + # Refresh to get new priority session.refresh(feature) + new_priority = feature.priority return json.dumps({ - "id": feature.id, - "name": feature.name, + "id": feature_id, + "name": name, "old_priority": old_priority, "new_priority": new_priority, - "message": f"Feature '{feature.name}' moved to end of queue" + "message": f"Feature '{name}' moved to end of queue" }) except Exception as e: session.rollback() @@ -381,21 +401,27 @@ def feature_mark_in_progress( """ session = get_session() try: - feature = session.query(Feature).filter(Feature.id == feature_id).first() - - if feature is None: - return json.dumps({"error": f"Feature with ID {feature_id} not found"}) - - if feature.passes: - return json.dumps({"error": f"Feature with ID {feature_id} is already passing"}) - - if feature.in_progress: - return json.dumps({"error": f"Feature with ID {feature_id} is already in-progress"}) - - feature.in_progress = True + # Atomic claim: only succeeds if feature is not already claimed or passing + result = session.execute(text(""" + UPDATE features + SET in_progress = 1 + WHERE id = :id AND passes = 0 AND in_progress = 0 + """), {"id": feature_id}) session.commit() - session.refresh(feature) + if result.rowcount == 0: + # Check why the claim failed + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if feature is None: + return json.dumps({"error": f"Feature with ID {feature_id} not found"}) + if feature.passes: + return json.dumps({"error": f"Feature with ID {feature_id} is already passing"}) + if feature.in_progress: + return json.dumps({"error": f"Feature with ID {feature_id} is already in-progress"}) + return json.dumps({"error": "Failed to mark feature in-progress for unknown reason"}) + + # Fetch the claimed feature + feature = session.query(Feature).filter(Feature.id == feature_id).first() return json.dumps(feature.to_dict()) except Exception as e: session.rollback() @@ -421,24 +447,35 @@ def feature_claim_and_get( """ session = get_session() try: + # First check if feature exists feature = session.query(Feature).filter(Feature.id == feature_id).first() - if feature is None: return json.dumps({"error": f"Feature with ID {feature_id} not found"}) if feature.passes: return json.dumps({"error": f"Feature with ID {feature_id} is already passing"}) - # Idempotent: if already in-progress, just return details - already_claimed = feature.in_progress - if not already_claimed: - feature.in_progress = True - session.commit() - session.refresh(feature) + # Try atomic claim: only succeeds if not already claimed + result = session.execute(text(""" + UPDATE features + SET in_progress = 1 + WHERE id = :id AND passes = 0 AND in_progress = 0 + """), {"id": feature_id}) + session.commit() - result = feature.to_dict() - result["already_claimed"] = already_claimed - return json.dumps(result) + # Determine if we claimed it or it was already claimed + already_claimed = result.rowcount == 0 + if already_claimed: + # Verify it's in_progress (not some other failure condition) + session.refresh(feature) + if not feature.in_progress: + return json.dumps({"error": f"Failed to claim feature {feature_id} for unknown reason"}) + + # Refresh to get current state + session.refresh(feature) + result_dict = feature.to_dict() + result_dict["already_claimed"] = already_claimed + return json.dumps(result_dict) except Exception as e: session.rollback() return json.dumps({"error": f"Failed to claim feature: {str(e)}"}) @@ -463,15 +500,20 @@ def feature_clear_in_progress( """ session = get_session() try: + # Check if feature exists feature = session.query(Feature).filter(Feature.id == feature_id).first() - if feature is None: return json.dumps({"error": f"Feature with ID {feature_id} not found"}) - feature.in_progress = False + # Atomic update - idempotent, safe in parallel mode + session.execute(text(""" + UPDATE features + SET in_progress = 0 + WHERE id = :id + """), {"id": feature_id}) session.commit() - session.refresh(feature) + session.refresh(feature) return json.dumps(feature.to_dict()) except Exception as e: session.rollback() @@ -506,13 +548,14 @@ def feature_create_bulk( Returns: JSON with: created (int) - number of features created, with_dependencies (int) """ - session = get_session() try: - # Use lock to prevent race condition in priority assignment - with _priority_lock: - # Get the starting priority - max_priority_result = session.query(Feature.priority).order_by(Feature.priority.desc()).first() - start_priority = (max_priority_result[0] + 1) if max_priority_result else 1 + # Use atomic transaction for bulk inserts to prevent priority conflicts + with atomic_transaction(_session_maker) as session: + # Get the starting priority atomically within the transaction + result = session.execute(text(""" + SELECT COALESCE(MAX(priority), 0) FROM features + """)).fetchone() + start_priority = (result[0] or 0) + 1 # First pass: validate all features and their index-based dependencies for i, feature_data in enumerate(features): @@ -546,7 +589,7 @@ def feature_create_bulk( "error": f"Feature at index {i} cannot depend on feature at index {idx} (forward reference not allowed)" }) - # Second pass: create all features + # Second pass: create all features with reserved priorities created_features: list[Feature] = [] for i, feature_data in enumerate(features): db_feature = Feature( @@ -571,20 +614,16 @@ def feature_create_bulk( if indices: # Convert indices to actual feature IDs dep_ids = [created_features[idx].id for idx in indices] - created_features[i].dependencies = sorted(dep_ids) + created_features[i].dependencies = sorted(dep_ids) # type: ignore[assignment] # SQLAlchemy JSON Column accepts list at runtime deps_count += 1 - session.commit() - - return json.dumps({ - "created": len(created_features), - "with_dependencies": deps_count - }) + # Commit happens automatically on context manager exit + return json.dumps({ + "created": len(created_features), + "with_dependencies": deps_count + }) except Exception as e: - session.rollback() return json.dumps({"error": str(e)}) - finally: - session.close() @mcp.tool() @@ -608,13 +647,14 @@ def feature_create( Returns: JSON with the created feature details including its ID """ - session = get_session() try: - # Use lock to prevent race condition in priority assignment - with _priority_lock: - # Get the next priority - max_priority_result = session.query(Feature.priority).order_by(Feature.priority.desc()).first() - next_priority = (max_priority_result[0] + 1) if max_priority_result else 1 + # Use atomic transaction to prevent priority collisions + with atomic_transaction(_session_maker) as session: + # Get the next priority atomically within the transaction + result = session.execute(text(""" + SELECT COALESCE(MAX(priority), 0) + 1 FROM features + """)).fetchone() + next_priority = result[0] db_feature = Feature( priority=next_priority, @@ -626,20 +666,18 @@ def feature_create( in_progress=False, ) session.add(db_feature) - session.commit() + session.flush() # Get the ID - session.refresh(db_feature) + feature_dict = db_feature.to_dict() + # Commit happens automatically on context manager exit return json.dumps({ "success": True, "message": f"Created feature: {name}", - "feature": db_feature.to_dict() + "feature": feature_dict }) except Exception as e: - session.rollback() return json.dumps({"error": str(e)}) - finally: - session.close() @mcp.tool() @@ -659,52 +697,49 @@ def feature_add_dependency( Returns: JSON with success status and updated dependencies list, or error message """ - session = get_session() try: - # Security: Self-reference check + # Security: Self-reference check (can do before transaction) if feature_id == dependency_id: return json.dumps({"error": "A feature cannot depend on itself"}) - feature = session.query(Feature).filter(Feature.id == feature_id).first() - dependency = session.query(Feature).filter(Feature.id == dependency_id).first() + # Use atomic transaction for consistent cycle detection + with atomic_transaction(_session_maker) as session: + feature = session.query(Feature).filter(Feature.id == feature_id).first() + dependency = session.query(Feature).filter(Feature.id == dependency_id).first() - if not feature: - return json.dumps({"error": f"Feature {feature_id} not found"}) - if not dependency: - return json.dumps({"error": f"Dependency feature {dependency_id} not found"}) + if not feature: + return json.dumps({"error": f"Feature {feature_id} not found"}) + if not dependency: + return json.dumps({"error": f"Dependency feature {dependency_id} not found"}) - current_deps = feature.dependencies or [] + current_deps = feature.dependencies or [] - # Security: Max dependencies limit - if len(current_deps) >= MAX_DEPENDENCIES_PER_FEATURE: - return json.dumps({"error": f"Maximum {MAX_DEPENDENCIES_PER_FEATURE} dependencies allowed per feature"}) + # Security: Max dependencies limit + if len(current_deps) >= MAX_DEPENDENCIES_PER_FEATURE: + return json.dumps({"error": f"Maximum {MAX_DEPENDENCIES_PER_FEATURE} dependencies allowed per feature"}) - # Check if already exists - if dependency_id in current_deps: - return json.dumps({"error": "Dependency already exists"}) + # Check if already exists + if dependency_id in current_deps: + return json.dumps({"error": "Dependency already exists"}) - # Security: Circular dependency check - # would_create_circular_dependency(features, source_id, target_id) - # source_id = feature gaining the dependency, target_id = feature being depended upon - all_features = [f.to_dict() for f in session.query(Feature).all()] - if would_create_circular_dependency(all_features, feature_id, dependency_id): - return json.dumps({"error": "Cannot add: would create circular dependency"}) + # Security: Circular dependency check + # Within IMMEDIATE transaction, snapshot is protected by write lock + all_features = [f.to_dict() for f in session.query(Feature).all()] + if would_create_circular_dependency(all_features, feature_id, dependency_id): + return json.dumps({"error": "Cannot add: would create circular dependency"}) - # Add dependency - current_deps.append(dependency_id) - feature.dependencies = sorted(current_deps) - session.commit() + # Add dependency atomically + new_deps = sorted(current_deps + [dependency_id]) + feature.dependencies = new_deps + # Commit happens automatically on context manager exit - return json.dumps({ - "success": True, - "feature_id": feature_id, - "dependencies": feature.dependencies - }) + return json.dumps({ + "success": True, + "feature_id": feature_id, + "dependencies": new_deps + }) except Exception as e: - session.rollback() return json.dumps({"error": f"Failed to add dependency: {str(e)}"}) - finally: - session.close() @mcp.tool() @@ -721,30 +756,29 @@ def feature_remove_dependency( Returns: JSON with success status and updated dependencies list, or error message """ - session = get_session() try: - feature = session.query(Feature).filter(Feature.id == feature_id).first() - if not feature: - return json.dumps({"error": f"Feature {feature_id} not found"}) + # Use atomic transaction for consistent read-modify-write + with atomic_transaction(_session_maker) as session: + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if not feature: + return json.dumps({"error": f"Feature {feature_id} not found"}) - current_deps = feature.dependencies or [] - if dependency_id not in current_deps: - return json.dumps({"error": "Dependency does not exist"}) + current_deps = feature.dependencies or [] + if dependency_id not in current_deps: + return json.dumps({"error": "Dependency does not exist"}) - current_deps.remove(dependency_id) - feature.dependencies = current_deps if current_deps else None - session.commit() + # Remove dependency atomically + new_deps = [d for d in current_deps if d != dependency_id] + feature.dependencies = new_deps if new_deps else None + # Commit happens automatically on context manager exit - return json.dumps({ - "success": True, - "feature_id": feature_id, - "dependencies": feature.dependencies or [] - }) + return json.dumps({ + "success": True, + "feature_id": feature_id, + "dependencies": new_deps + }) except Exception as e: - session.rollback() return json.dumps({"error": f"Failed to remove dependency: {str(e)}"}) - finally: - session.close() @mcp.tool() @@ -897,9 +931,8 @@ def feature_set_dependencies( Returns: JSON with success status and updated dependencies list, or error message """ - session = get_session() try: - # Security: Self-reference check + # Security: Self-reference check (can do before transaction) if feature_id in dependency_ids: return json.dumps({"error": "A feature cannot depend on itself"}) @@ -911,45 +944,44 @@ def feature_set_dependencies( if len(dependency_ids) != len(set(dependency_ids)): return json.dumps({"error": "Duplicate dependencies not allowed"}) - feature = session.query(Feature).filter(Feature.id == feature_id).first() - if not feature: - return json.dumps({"error": f"Feature {feature_id} not found"}) + # Use atomic transaction for consistent cycle detection + with atomic_transaction(_session_maker) as session: + feature = session.query(Feature).filter(Feature.id == feature_id).first() + if not feature: + return json.dumps({"error": f"Feature {feature_id} not found"}) - # Validate all dependencies exist - all_feature_ids = {f.id for f in session.query(Feature).all()} - missing = [d for d in dependency_ids if d not in all_feature_ids] - if missing: - return json.dumps({"error": f"Dependencies not found: {missing}"}) + # Validate all dependencies exist + all_feature_ids = {f.id for f in session.query(Feature).all()} + missing = [d for d in dependency_ids if d not in all_feature_ids] + if missing: + return json.dumps({"error": f"Dependencies not found: {missing}"}) - # Check for circular dependencies - all_features = [f.to_dict() for f in session.query(Feature).all()] - # Temporarily update the feature's dependencies for cycle check - test_features = [] - for f in all_features: - if f["id"] == feature_id: - test_features.append({**f, "dependencies": dependency_ids}) - else: - test_features.append(f) + # Check for circular dependencies + # Within IMMEDIATE transaction, snapshot is protected by write lock + all_features = [f.to_dict() for f in session.query(Feature).all()] + test_features = [] + for f in all_features: + if f["id"] == feature_id: + test_features.append({**f, "dependencies": dependency_ids}) + else: + test_features.append(f) - for dep_id in dependency_ids: - # source_id = feature_id (gaining dep), target_id = dep_id (being depended upon) - if would_create_circular_dependency(test_features, feature_id, dep_id): - return json.dumps({"error": f"Cannot add dependency {dep_id}: would create circular dependency"}) + for dep_id in dependency_ids: + if would_create_circular_dependency(test_features, feature_id, dep_id): + return json.dumps({"error": f"Cannot add dependency {dep_id}: would create circular dependency"}) - # Set dependencies - feature.dependencies = sorted(dependency_ids) if dependency_ids else None - session.commit() + # Set dependencies atomically + sorted_deps = sorted(dependency_ids) if dependency_ids else None + feature.dependencies = sorted_deps + # Commit happens automatically on context manager exit - return json.dumps({ - "success": True, - "feature_id": feature_id, - "dependencies": feature.dependencies or [] - }) + return json.dumps({ + "success": True, + "feature_id": feature_id, + "dependencies": sorted_deps or [] + }) except Exception as e: - session.rollback() return json.dumps({"error": f"Failed to set dependencies: {str(e)}"}) - finally: - session.close() if __name__ == "__main__": diff --git a/parallel_orchestrator.py b/parallel_orchestrator.py index 574cbd2..d31db0b 100644 --- a/parallel_orchestrator.py +++ b/parallel_orchestrator.py @@ -19,19 +19,27 @@ Usage: """ import asyncio +import atexit +import logging import os +import re +import signal import subprocess import sys import threading from datetime import datetime, timezone from pathlib import Path -from typing import Callable, Literal +from typing import Any, Callable, Literal + +from sqlalchemy import text from api.database import Feature, create_database from api.dependency_resolver import are_dependencies_satisfied, compute_scheduling_scores from progress import has_features from server.utils.process_utils import kill_process_tree +logger = logging.getLogger(__name__) + # Root directory of autocoder (where this script and autonomous_agent_demo.py live) AUTOCODER_ROOT = Path(__file__).parent.resolve() @@ -79,23 +87,25 @@ class DebugLogger: debug_log = DebugLogger() -def _dump_database_state(session, label: str = ""): - """Helper to dump full database state to debug log.""" - from api.database import Feature - all_features = session.query(Feature).all() +def _dump_database_state(feature_dicts: list[dict], label: str = ""): + """Helper to dump full database state to debug log. - passing = [f for f in all_features if f.passes] - in_progress = [f for f in all_features if f.in_progress and not f.passes] - pending = [f for f in all_features if not f.passes and not f.in_progress] + Args: + feature_dicts: Pre-fetched list of feature dicts. + label: Optional label for the dump entry. + """ + passing = [f for f in feature_dicts if f.get("passes")] + in_progress = [f for f in feature_dicts if f.get("in_progress") and not f.get("passes")] + pending = [f for f in feature_dicts if not f.get("passes") and not f.get("in_progress")] debug_log.log("DB_DUMP", f"Full database state {label}", - total_features=len(all_features), + total_features=len(feature_dicts), passing_count=len(passing), - passing_ids=[f.id for f in passing], + passing_ids=[f["id"] for f in passing], in_progress_count=len(in_progress), - in_progress_ids=[f.id for f in in_progress], + in_progress_ids=[f["id"] for f in in_progress], pending_count=len(pending), - pending_ids=[f.id for f in pending[:10]]) # First 10 pending only + pending_ids=[f["id"] for f in pending[:10]]) # First 10 pending only # ============================================================================= # Process Limits @@ -121,6 +131,7 @@ def _dump_database_state(session, label: str = ""): MAX_PARALLEL_AGENTS = 5 MAX_TOTAL_AGENTS = 10 DEFAULT_CONCURRENCY = 3 +DEFAULT_TESTING_BATCH_SIZE = 3 # Number of features per testing batch (1-5) POLL_INTERVAL = 5 # seconds between checking for ready features MAX_FEATURE_RETRIES = 3 # Maximum times to retry a failed feature INITIALIZER_TIMEOUT = 1800 # 30 minutes timeout for initializer @@ -139,11 +150,13 @@ class ParallelOrchestrator: self, project_dir: Path, max_concurrency: int = DEFAULT_CONCURRENCY, - model: str = None, + model: str | None = None, yolo_mode: bool = False, testing_agent_ratio: int = 1, - on_output: Callable[[int, str], None] = None, - on_status: Callable[[int, str], None] = None, + testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE, + batch_size: int = 3, + on_output: Callable[[int, str], None] | None = None, + on_status: Callable[[int, str], None] | None = None, ): """Initialize the orchestrator. @@ -155,6 +168,8 @@ class ParallelOrchestrator: yolo_mode: Whether to run in YOLO mode (skip testing agents entirely) testing_agent_ratio: Number of regression testing agents to maintain (0-3). 0 = disabled, 1-3 = maintain that many testing agents running independently. + testing_batch_size: Number of features to include per testing session (1-5). + Each testing agent receives this many features to regression test. on_output: Callback for agent output (feature_id, line) on_status: Callback for agent status changes (feature_id, status) """ @@ -163,6 +178,8 @@ class ParallelOrchestrator: self.model = model self.yolo_mode = yolo_mode self.testing_agent_ratio = min(max(testing_agent_ratio, 0), 3) # Clamp 0-3 + self.testing_batch_size = min(max(testing_batch_size, 1), 5) # Clamp 1-5 + self.batch_size = min(max(batch_size, 1), 3) # Clamp 1-3 self.on_output = on_output self.on_status = on_status @@ -182,14 +199,27 @@ class ParallelOrchestrator: # Track feature failures to prevent infinite retry loops self._failure_counts: dict[int, int] = {} + # Track recently tested feature IDs to avoid redundant re-testing. + # Cleared when all passing features have been covered at least once. + self._recently_tested: set[int] = set() + + # Batch tracking: primary feature_id -> all feature IDs in batch + self._batch_features: dict[int, list[int]] = {} + # Reverse mapping: any feature_id -> primary feature_id + self._feature_to_primary: dict[int, int] = {} + + # Shutdown flag for async-safe signal handling + # Signal handlers only set this flag; cleanup happens in the main loop + self._shutdown_requested = False + # Session tracking for logging/debugging - self.session_start_time: datetime = None + self.session_start_time: datetime | None = None # Event signaled when any agent completes, allowing the main loop to wake # immediately instead of waiting for the full POLL_INTERVAL timeout. # This reduces latency when spawning the next feature after completion. - self._agent_completed_event: asyncio.Event = None # Created in run_loop - self._event_loop: asyncio.AbstractEventLoop = None # Stored for thread-safe signaling + self._agent_completed_event: asyncio.Event | None = None # Created in run_loop + self._event_loop: asyncio.AbstractEventLoop | None = None # Stored for thread-safe signaling # Database session for this orchestrator self._engine, self._session_maker = create_database(project_dir) @@ -205,6 +235,9 @@ class ParallelOrchestrator: This simplifies the architecture by removing unnecessary coordination. Returns the feature ID if available, None if no passing features exist. + + Note: Prefer _get_test_batch() for batch testing mode. This method is + retained for backward compatibility. """ from sqlalchemy.sql.expression import func @@ -223,163 +256,384 @@ class ParallelOrchestrator: finally: session.close() - def get_resumable_features(self) -> list[dict]: + def _get_test_batch(self, batch_size: int = 3) -> list[int]: + """Select a prioritized batch of passing features for regression testing. + + Uses weighted scoring to prioritize features that: + 1. Haven't been tested recently in this orchestrator session + 2. Are depended on by many other features (higher impact if broken) + 3. Have more dependencies themselves (complex integration points) + + When all passing features have been recently tested, the tracking set + is cleared so the cycle starts fresh. + + Args: + batch_size: Maximum number of feature IDs to return (1-5). + + Returns: + List of feature IDs to test, may be shorter than batch_size if + fewer passing features are available. Empty list if none available. + """ + session = self.get_session() + try: + session.expire_all() + passing = ( + session.query(Feature) + .filter(Feature.passes == True) + .filter(Feature.in_progress == False) # Don't test while coding + .all() + ) + + # Extract data from ORM objects before closing the session to avoid + # DetachedInstanceError when accessing attributes after session.close(). + passing_data: list[dict] = [] + for f in passing: + passing_data.append({ + 'id': f.id, + 'dependencies': f.get_dependencies_safe() if hasattr(f, 'get_dependencies_safe') else [], + }) + finally: + session.close() + + if not passing_data: + return [] + + # Build a reverse dependency map: feature_id -> count of features that depend on it. + # The Feature model stores dependencies (what I depend ON), so we invert to find + # dependents (what depends ON me). + dependent_counts: dict[int, int] = {} + for fd in passing_data: + for dep_id in fd['dependencies']: + dependent_counts[dep_id] = dependent_counts.get(dep_id, 0) + 1 + + # Exclude features that are already being tested by running testing agents + # to avoid redundant concurrent testing of the same features. + # running_testing_agents is dict[pid, (primary_feature_id, process)] + with self._lock: + currently_testing_ids: set[int] = set() + for _pid, (feat_id, _proc) in self.running_testing_agents.items(): + currently_testing_ids.add(feat_id) + + # If all passing features have been recently tested, reset the tracker + # so we cycle through them again rather than returning empty batches. + passing_ids = {fd['id'] for fd in passing_data} + if passing_ids.issubset(self._recently_tested): + self._recently_tested.clear() + + # Score each feature by testing priority + scored: list[tuple[int, int]] = [] + for fd in passing_data: + f_id = fd['id'] + + # Skip features already being tested by a running testing agent + if f_id in currently_testing_ids: + continue + + score = 0 + + # Weight 1: Features depended on by many others are higher impact + # if they regress, so test them more often + score += dependent_counts.get(f_id, 0) * 2 + + # Weight 2: Strongly prefer features not tested recently + if f_id not in self._recently_tested: + score += 5 + + # Weight 3: Features with more dependencies are integration points + # that are more likely to regress when other code changes + dep_count = len(fd['dependencies']) + score += min(dep_count, 3) # Cap at 3 to avoid over-weighting + + scored.append((f_id, score)) + + # Sort by score descending (highest priority first) + scored.sort(key=lambda x: x[1], reverse=True) + selected = [fid for fid, _ in scored[:batch_size]] + + # Track what we've tested to avoid re-testing the same features next batch + self._recently_tested.update(selected) + + debug_log.log("TEST_BATCH", f"Selected {len(selected)} features for testing batch", + selected_ids=selected, + recently_tested_count=len(self._recently_tested), + total_passing=len(passing_data)) + + return selected + + def build_feature_batches( + self, + ready: list[dict], + all_features: list[dict], + scheduling_scores: dict[int, float], + ) -> list[list[dict]]: + """Build dependency-aware feature batches for coding agents. + + Each batch contains up to `batch_size` features. The algorithm: + 1. Start with a ready feature (sorted by scheduling score) + 2. Chain extension: find dependents whose deps are satisfied if earlier batch features pass + 3. Same-category fill: fill remaining slots with ready features from the same category + + Args: + ready: Ready features (sorted by scheduling score) + all_features: All features for dependency checking + scheduling_scores: Pre-computed scheduling scores + + Returns: + List of batches, each batch is a list of feature dicts + """ + if self.batch_size <= 1: + # No batching - return each feature as a single-item batch + return [[f] for f in ready] + + # Build children adjacency: parent_id -> [child_ids] + children: dict[int, list[int]] = {f["id"]: [] for f in all_features} + feature_map: dict[int, dict] = {f["id"]: f for f in all_features} + for f in all_features: + for dep_id in (f.get("dependencies") or []): + if dep_id in children: + children[dep_id].append(f["id"]) + + # Pre-compute passing IDs + passing_ids = {f["id"] for f in all_features if f.get("passes")} + + used_ids: set[int] = set() # Features already assigned to a batch + batches: list[list[dict]] = [] + + for feature in ready: + if feature["id"] in used_ids: + continue + + batch = [feature] + used_ids.add(feature["id"]) + # Simulate passing set = real passing + batch features + simulated_passing = passing_ids | {feature["id"]} + + # Phase 1: Chain extension - find dependents whose deps are met + for _ in range(self.batch_size - 1): + best_candidate = None + best_score = -1.0 + # Check children of all features currently in the batch + candidate_ids: set[int] = set() + for bf in batch: + for child_id in children.get(bf["id"], []): + if child_id not in used_ids and child_id not in simulated_passing: + candidate_ids.add(child_id) + + for cid in candidate_ids: + cf = feature_map.get(cid) + if not cf or cf.get("passes") or cf.get("in_progress"): + continue + # Check if ALL deps are satisfied by simulated passing set + deps = cf.get("dependencies") or [] + if all(d in simulated_passing for d in deps): + score = scheduling_scores.get(cid, 0) + if score > best_score: + best_score = score + best_candidate = cf + + if best_candidate: + batch.append(best_candidate) + used_ids.add(best_candidate["id"]) + simulated_passing.add(best_candidate["id"]) + else: + break + + # Phase 2: Same-category fill + if len(batch) < self.batch_size: + category = feature.get("category", "") + for rf in ready: + if len(batch) >= self.batch_size: + break + if rf["id"] in used_ids: + continue + if rf.get("category", "") == category: + batch.append(rf) + used_ids.add(rf["id"]) + + batches.append(batch) + + debug_log.log("BATCH", f"Built {len(batches)} batches from {len(ready)} ready features", + batch_sizes=[len(b) for b in batches], + batch_ids=[[f['id'] for f in b] for b in batches[:5]]) + + return batches + + def get_resumable_features( + self, + feature_dicts: list[dict] | None = None, + scheduling_scores: dict[int, float] | None = None, + ) -> list[dict]: """Get features that were left in_progress from a previous session. These are features where in_progress=True but passes=False, and they're not currently being worked on by this orchestrator. This handles the case where a previous session was interrupted before completing the feature. + + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. + scheduling_scores: Pre-computed scheduling scores. If None, computed from feature_dicts. """ - session = self.get_session() - try: - # Force fresh read from database to avoid stale cached data - # This is critical when agent subprocesses have committed changes - session.expire_all() + if feature_dicts is None: + session = self.get_session() + try: + session.expire_all() + all_features = session.query(Feature).all() + feature_dicts = [f.to_dict() for f in all_features] + finally: + session.close() - # Find features that are in_progress but not complete - stale = session.query(Feature).filter( - Feature.in_progress == True, - Feature.passes == False - ).all() + # Snapshot running IDs once (include all batch feature IDs) + with self._lock: + running_ids = set(self.running_coding_agents.keys()) + for batch_ids in self._batch_features.values(): + running_ids.update(batch_ids) - resumable = [] - for f in stale: - # Skip if already running in this orchestrator instance - with self._lock: - if f.id in self.running_coding_agents: - continue - # Skip if feature has failed too many times - if self._failure_counts.get(f.id, 0) >= MAX_FEATURE_RETRIES: - continue - resumable.append(f.to_dict()) + resumable = [] + for fd in feature_dicts: + if not fd.get("in_progress") or fd.get("passes"): + continue + # Skip if already running in this orchestrator instance + if fd["id"] in running_ids: + continue + # Skip if feature has failed too many times + if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES: + continue + resumable.append(fd) - # Sort by scheduling score (higher = first), then priority, then id - all_dicts = [f.to_dict() for f in session.query(Feature).all()] - scores = compute_scheduling_scores(all_dicts) - resumable.sort(key=lambda f: (-scores.get(f["id"], 0), f["priority"], f["id"])) - return resumable - finally: - session.close() + # Sort by scheduling score (higher = first), then priority, then id + if scheduling_scores is None: + scheduling_scores = compute_scheduling_scores(feature_dicts) + resumable.sort(key=lambda f: (-scheduling_scores.get(f["id"], 0), f["priority"], f["id"])) + return resumable - def get_ready_features(self) -> list[dict]: - """Get features with satisfied dependencies, not already running.""" - session = self.get_session() - try: - # Force fresh read from database to avoid stale cached data - # This is critical when agent subprocesses have committed changes - session.expire_all() + def get_ready_features( + self, + feature_dicts: list[dict] | None = None, + scheduling_scores: dict[int, float] | None = None, + ) -> list[dict]: + """Get features with satisfied dependencies, not already running. - all_features = session.query(Feature).all() - all_dicts = [f.to_dict() for f in all_features] + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. + scheduling_scores: Pre-computed scheduling scores. If None, computed from feature_dicts. + """ + if feature_dicts is None: + session = self.get_session() + try: + session.expire_all() + all_features = session.query(Feature).all() + feature_dicts = [f.to_dict() for f in all_features] + finally: + session.close() - # Pre-compute passing_ids once to avoid O(n^2) in the loop - passing_ids = {f.id for f in all_features if f.passes} + # Pre-compute passing_ids once to avoid O(n^2) in the loop + passing_ids = {fd["id"] for fd in feature_dicts if fd.get("passes")} - ready = [] - skipped_reasons = {"passes": 0, "in_progress": 0, "running": 0, "failed": 0, "deps": 0} - for f in all_features: - if f.passes: - skipped_reasons["passes"] += 1 - continue - if f.in_progress: - skipped_reasons["in_progress"] += 1 - continue - # Skip if already running in this orchestrator - with self._lock: - if f.id in self.running_coding_agents: - skipped_reasons["running"] += 1 - continue - # Skip if feature has failed too many times - if self._failure_counts.get(f.id, 0) >= MAX_FEATURE_RETRIES: - skipped_reasons["failed"] += 1 - continue - # Check dependencies (pass pre-computed passing_ids) - if are_dependencies_satisfied(f.to_dict(), all_dicts, passing_ids): - ready.append(f.to_dict()) - else: - skipped_reasons["deps"] += 1 + # Snapshot running IDs once (include all batch feature IDs) + with self._lock: + running_ids = set(self.running_coding_agents.keys()) + for batch_ids in self._batch_features.values(): + running_ids.update(batch_ids) - # Sort by scheduling score (higher = first), then priority, then id - scores = compute_scheduling_scores(all_dicts) - ready.sort(key=lambda f: (-scores.get(f["id"], 0), f["priority"], f["id"])) + ready = [] + skipped_reasons = {"passes": 0, "in_progress": 0, "running": 0, "failed": 0, "deps": 0} + for fd in feature_dicts: + if fd.get("passes"): + skipped_reasons["passes"] += 1 + continue + if fd.get("in_progress"): + skipped_reasons["in_progress"] += 1 + continue + # Skip if already running in this orchestrator + if fd["id"] in running_ids: + skipped_reasons["running"] += 1 + continue + # Skip if feature has failed too many times + if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES: + skipped_reasons["failed"] += 1 + continue + # Check dependencies (pass pre-computed passing_ids) + if are_dependencies_satisfied(fd, feature_dicts, passing_ids): + ready.append(fd) + else: + skipped_reasons["deps"] += 1 - # Debug logging - passing = sum(1 for f in all_features if f.passes) - in_progress = sum(1 for f in all_features if f.in_progress and not f.passes) - print( - f"[DEBUG] get_ready_features: {len(ready)} ready, " - f"{passing} passing, {in_progress} in_progress, {len(all_features)} total", - flush=True - ) - print( - f"[DEBUG] Skipped: {skipped_reasons['passes']} passing, {skipped_reasons['in_progress']} in_progress, " - f"{skipped_reasons['running']} running, {skipped_reasons['failed']} failed, {skipped_reasons['deps']} blocked by deps", - flush=True - ) + # Sort by scheduling score (higher = first), then priority, then id + if scheduling_scores is None: + scheduling_scores = compute_scheduling_scores(feature_dicts) + ready.sort(key=lambda f: (-scheduling_scores.get(f["id"], 0), f["priority"], f["id"])) - # Log to debug file (but not every call to avoid spam) - debug_log.log("READY", "get_ready_features() called", - ready_count=len(ready), - ready_ids=[f['id'] for f in ready[:5]], # First 5 only - passing=passing, - in_progress=in_progress, - total=len(all_features), - skipped=skipped_reasons) + # Summary counts for logging + passing = skipped_reasons["passes"] + in_progress = skipped_reasons["in_progress"] + total = len(feature_dicts) - return ready - finally: - session.close() + debug_log.log("READY", "get_ready_features() called", + ready_count=len(ready), + ready_ids=[f['id'] for f in ready[:5]], # First 5 only + passing=passing, + in_progress=in_progress, + total=total, + skipped=skipped_reasons) - def get_all_complete(self) -> bool: + return ready + + def get_all_complete(self, feature_dicts: list[dict] | None = None) -> bool: """Check if all features are complete or permanently failed. Returns False if there are no features (initialization needed). + + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. """ - session = self.get_session() - try: - # Force fresh read from database to avoid stale cached data - # This is critical when agent subprocesses have committed changes - session.expire_all() + if feature_dicts is None: + session = self.get_session() + try: + session.expire_all() + all_features = session.query(Feature).all() + feature_dicts = [f.to_dict() for f in all_features] + finally: + session.close() - all_features = session.query(Feature).all() + # No features = NOT complete, need initialization + if len(feature_dicts) == 0: + return False - # No features = NOT complete, need initialization - if len(all_features) == 0: - return False + passing_count = 0 + failed_count = 0 + pending_count = 0 + for fd in feature_dicts: + if fd.get("passes"): + passing_count += 1 + continue # Completed successfully + if self._failure_counts.get(fd["id"], 0) >= MAX_FEATURE_RETRIES: + failed_count += 1 + continue # Permanently failed, count as "done" + pending_count += 1 - passing_count = 0 - failed_count = 0 - pending_count = 0 - for f in all_features: - if f.passes: - passing_count += 1 - continue # Completed successfully - if self._failure_counts.get(f.id, 0) >= MAX_FEATURE_RETRIES: - failed_count += 1 - continue # Permanently failed, count as "done" - pending_count += 1 + total = len(feature_dicts) + is_complete = pending_count == 0 + debug_log.log("COMPLETE_CHECK", f"get_all_complete: {passing_count}/{total} passing, " + f"{failed_count} failed, {pending_count} pending -> {is_complete}") + return is_complete - total = len(all_features) - is_complete = pending_count == 0 - print( - f"[DEBUG] get_all_complete: {passing_count}/{total} passing, " - f"{failed_count} failed, {pending_count} pending -> {is_complete}", - flush=True - ) - return is_complete - finally: - session.close() + def get_passing_count(self, feature_dicts: list[dict] | None = None) -> int: + """Get the number of passing features. - def get_passing_count(self) -> int: - """Get the number of passing features.""" - session = self.get_session() - try: - session.expire_all() - return session.query(Feature).filter(Feature.passes == True).count() - finally: - session.close() + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. + """ + if feature_dicts is None: + session = self.get_session() + try: + session.expire_all() + count: int = session.query(Feature).filter(Feature.passes == True).count() + return count + finally: + session.close() + return sum(1 for fd in feature_dicts if fd.get("passes")) - def _maintain_testing_agents(self) -> None: + def _maintain_testing_agents(self, feature_dicts: list[dict] | None = None) -> None: """Maintain the desired count of testing agents independently. This runs every loop iteration and spawns testing agents as needed to maintain @@ -393,18 +647,21 @@ class ParallelOrchestrator: - YOLO mode is enabled - testing_agent_ratio is 0 - No passing features exist yet + + Args: + feature_dicts: Pre-fetched list of feature dicts. If None, queries the database. """ # Skip if testing is disabled if self.yolo_mode or self.testing_agent_ratio == 0: return # No testing until there are passing features - passing_count = self.get_passing_count() + passing_count = self.get_passing_count(feature_dicts) if passing_count == 0: return # Don't spawn testing agents if all features are already complete - if self.get_all_complete(): + if self.get_all_complete(feature_dicts): return # Spawn testing agents one at a time, re-checking limits each time @@ -430,7 +687,7 @@ class ParallelOrchestrator: passing_count=passing_count) # Spawn outside lock (I/O bound operation) - print(f"[DEBUG] Spawning testing agent ({spawn_index}/{desired})", flush=True) + logger.debug("Spawning testing agent (%d/%d)", spawn_index, desired) success, msg = self._spawn_testing_agent() if not success: debug_log.log("TESTING", f"Spawn failed, stopping: {msg}") @@ -488,6 +745,75 @@ class ParallelOrchestrator: return True, f"Started feature {feature_id}" + def start_feature_batch(self, feature_ids: list[int], resume: bool = False) -> tuple[bool, str]: + """Start a coding agent for a batch of features. + + Args: + feature_ids: List of feature IDs to implement in batch + resume: If True, resume features already in_progress + + Returns: + Tuple of (success, message) + """ + if not feature_ids: + return False, "No features to start" + + # Single feature falls back to start_feature + if len(feature_ids) == 1: + return self.start_feature(feature_ids[0], resume=resume) + + with self._lock: + # Check if any feature in batch is already running + for fid in feature_ids: + if fid in self.running_coding_agents or fid in self._feature_to_primary: + return False, f"Feature {fid} already running" + if len(self.running_coding_agents) >= self.max_concurrency: + return False, "At max concurrency" + total_agents = len(self.running_coding_agents) + len(self.running_testing_agents) + if total_agents >= MAX_TOTAL_AGENTS: + return False, f"At max total agents ({total_agents}/{MAX_TOTAL_AGENTS})" + + # Mark all features as in_progress in a single transaction + session = self.get_session() + try: + features_to_mark = [] + for fid in feature_ids: + feature = session.query(Feature).filter(Feature.id == fid).first() + if not feature: + return False, f"Feature {fid} not found" + if feature.passes: + return False, f"Feature {fid} already complete" + if not resume: + if feature.in_progress: + return False, f"Feature {fid} already in progress" + features_to_mark.append(feature) + else: + if not feature.in_progress: + return False, f"Feature {fid} not in progress, cannot resume" + + for feature in features_to_mark: + feature.in_progress = True + session.commit() + finally: + session.close() + + # Spawn batch coding agent + success, message = self._spawn_coding_agent_batch(feature_ids) + if not success: + # Clear in_progress on failure + session = self.get_session() + try: + for fid in feature_ids: + feature = session.query(Feature).filter(Feature.id == fid).first() + if feature and not resume: + feature.in_progress = False + session.commit() + finally: + session.close() + return False, message + + return True, f"Started batch [{', '.join(str(fid) for fid in feature_ids)}]" + def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]: """Spawn a coding agent subprocess for a specific feature.""" # Create abort event @@ -511,11 +837,14 @@ class ParallelOrchestrator: try: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads - popen_kwargs = { + # encoding="utf-8" and errors="replace" fix Windows CP1252 issues + popen_kwargs: dict[str, Any] = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "text": True, + "encoding": "utf-8", + "errors": "replace", "cwd": str(AUTOCODER_ROOT), # Run from autocoder root for proper imports "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, } @@ -546,18 +875,90 @@ class ParallelOrchestrator: daemon=True ).start() - if self.on_status: + if self.on_status is not None: self.on_status(feature_id, "running") print(f"Started coding agent for feature #{feature_id}", flush=True) return True, f"Started feature {feature_id}" - def _spawn_testing_agent(self) -> tuple[bool, str]: - """Spawn a testing agent subprocess for regression testing. + def _spawn_coding_agent_batch(self, feature_ids: list[int]) -> tuple[bool, str]: + """Spawn a coding agent subprocess for a batch of features.""" + primary_id = feature_ids[0] + abort_event = threading.Event() - Picks a random passing feature to test. Multiple testing agents can test - the same feature concurrently - this is intentional and simplifies the - architecture by removing claim coordination. + cmd = [ + sys.executable, + "-u", + str(AUTOCODER_ROOT / "autonomous_agent_demo.py"), + "--project-dir", str(self.project_dir), + "--max-iterations", "1", + "--agent-type", "coding", + "--feature-ids", ",".join(str(fid) for fid in feature_ids), + ] + if self.model: + cmd.extend(["--model", self.model]) + if self.yolo_mode: + cmd.append("--yolo") + + try: + popen_kwargs: dict[str, Any] = { + "stdin": subprocess.DEVNULL, + "stdout": subprocess.PIPE, + "stderr": subprocess.STDOUT, + "text": True, + "encoding": "utf-8", + "errors": "replace", + "cwd": str(AUTOCODER_ROOT), + "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, + } + if sys.platform == "win32": + popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW + + proc = subprocess.Popen(cmd, **popen_kwargs) + except Exception as e: + # Reset in_progress on failure + session = self.get_session() + try: + for fid in feature_ids: + feature = session.query(Feature).filter(Feature.id == fid).first() + if feature: + feature.in_progress = False + session.commit() + finally: + session.close() + return False, f"Failed to start batch agent: {e}" + + with self._lock: + self.running_coding_agents[primary_id] = proc + self.abort_events[primary_id] = abort_event + self._batch_features[primary_id] = list(feature_ids) + for fid in feature_ids: + self._feature_to_primary[fid] = primary_id + + # Start output reader thread + threading.Thread( + target=self._read_output, + args=(primary_id, proc, abort_event, "coding"), + daemon=True + ).start() + + if self.on_status is not None: + for fid in feature_ids: + self.on_status(fid, "running") + + ids_str = ", ".join(f"#{fid}" for fid in feature_ids) + print(f"Started coding agent for features {ids_str}", flush=True) + return True, f"Started batch [{ids_str}]" + + def _spawn_testing_agent(self) -> tuple[bool, str]: + """Spawn a testing agent subprocess for batch regression testing. + + Selects a prioritized batch of passing features using weighted scoring + (via _get_test_batch) and passes them as --testing-feature-ids to the + subprocess. Falls back to single --testing-feature-id for batches of one. + + Multiple testing agents can test the same feature concurrently - this is + intentional and simplifies the architecture by removing claim coordination. """ # Check limits first (under lock) with self._lock: @@ -570,13 +971,16 @@ class ParallelOrchestrator: debug_log.log("TESTING", f"Skipped spawn - at max total agents ({total_agents}/{MAX_TOTAL_AGENTS})") return False, f"At max total agents ({total_agents})" - # Pick a random passing feature (no claim needed - concurrent testing is fine) - feature_id = self._get_random_passing_feature() - if feature_id is None: + # Select a weighted batch of passing features for regression testing + batch = self._get_test_batch(self.testing_batch_size) + if not batch: debug_log.log("TESTING", "No features available for testing") return False, "No features available for testing" - debug_log.log("TESTING", f"Selected feature #{feature_id} for testing") + # Use the first feature ID as the representative for logging/tracking + primary_feature_id = batch[0] + batch_str = ",".join(str(fid) for fid in batch) + debug_log.log("TESTING", f"Selected batch for testing: [{batch_str}]") # Spawn the testing agent with self._lock: @@ -592,7 +996,7 @@ class ParallelOrchestrator: "--project-dir", str(self.project_dir), "--max-iterations", "1", "--agent-type", "testing", - "--testing-feature-id", str(feature_id), + "--testing-feature-ids", batch_str, ] if self.model: cmd.extend(["--model", self.model]) @@ -600,11 +1004,14 @@ class ParallelOrchestrator: try: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads - popen_kwargs = { + # encoding="utf-8" and errors="replace" fix Windows CP1252 issues + popen_kwargs: dict[str, Any] = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "text": True, + "encoding": "utf-8", + "errors": "replace", "cwd": str(AUTOCODER_ROOT), "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, } @@ -618,22 +1025,22 @@ class ParallelOrchestrator: # Register process by PID (not feature_id) to avoid overwrites # when multiple agents test the same feature - self.running_testing_agents[proc.pid] = (feature_id, proc) + self.running_testing_agents[proc.pid] = (primary_feature_id, proc) testing_count = len(self.running_testing_agents) - # Start output reader thread with feature ID (same as coding agents) + # Start output reader thread with primary feature ID for log attribution threading.Thread( target=self._read_output, - args=(feature_id, proc, threading.Event(), "testing"), + args=(primary_feature_id, proc, threading.Event(), "testing"), daemon=True ).start() - print(f"Started testing agent for feature #{feature_id} (PID {proc.pid})", flush=True) - debug_log.log("TESTING", f"Successfully spawned testing agent for feature #{feature_id}", + print(f"Started testing agent for features [{batch_str}] (PID {proc.pid})", flush=True) + debug_log.log("TESTING", f"Successfully spawned testing agent for batch [{batch_str}]", pid=proc.pid, - feature_id=feature_id, + feature_ids=batch, total_testing_agents=testing_count) - return True, f"Started testing agent for feature #{feature_id}" + return True, f"Started testing agent for features [{batch_str}]" async def _run_initializer(self) -> bool: """Run initializer agent as blocking subprocess. @@ -658,11 +1065,14 @@ class ParallelOrchestrator: # CREATE_NO_WINDOW on Windows prevents console window pop-ups # stdin=DEVNULL prevents blocking on stdin reads - popen_kwargs = { + # encoding="utf-8" and errors="replace" fix Windows CP1252 issues + popen_kwargs: dict[str, Any] = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "text": True, + "encoding": "utf-8", + "errors": "replace", "cwd": str(AUTOCODER_ROOT), "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, } @@ -682,7 +1092,7 @@ class ParallelOrchestrator: if not line: break print(line.rstrip(), flush=True) - if self.on_output: + if self.on_output is not None: self.on_output(0, line.rstrip()) # Use 0 as feature_id for initializer proc.wait() @@ -707,6 +1117,11 @@ class ParallelOrchestrator: return True + # Pattern to detect when a batch agent claims a new feature + _CLAIM_FEATURE_PATTERN = re.compile( + r"feature_claim_and_get\b.*?['\"]?feature_id['\"]?\s*[:=]\s*(\d+)" + ) + def _read_output( self, feature_id: int | None, @@ -715,16 +1130,26 @@ class ParallelOrchestrator: agent_type: Literal["coding", "testing"] = "coding", ): """Read output from subprocess and emit events.""" + current_feature_id = feature_id try: + if proc.stdout is None: + proc.wait() + return for line in proc.stdout: if abort.is_set(): break line = line.rstrip() - if self.on_output: - self.on_output(feature_id or 0, line) + # Detect when a batch agent claims a new feature + claim_match = self._CLAIM_FEATURE_PATTERN.search(line) + if claim_match: + claimed_id = int(claim_match.group(1)) + if claimed_id != current_feature_id: + current_feature_id = claimed_id + if self.on_output is not None: + self.on_output(current_feature_id or 0, line) else: # Both coding and testing agents now use [Feature #X] format - print(f"[Feature #{feature_id}] {line}", flush=True) + print(f"[Feature #{current_feature_id}] {line}", flush=True) proc.wait() finally: # CRITICAL: Kill the process tree to clean up any child processes (e.g., Claude CLI) @@ -814,73 +1239,87 @@ class ParallelOrchestrator: self._signal_agent_completed() return - # Coding agent completion - debug_log.log("COMPLETE", f"Coding agent for feature #{feature_id} finished", - return_code=return_code, - status="success" if return_code == 0 else "failed") + # feature_id is required for coding agents (always passed from start_feature) + assert feature_id is not None, "feature_id must not be None for coding agents" + # Coding agent completion - handle both single and batch features + batch_ids = None with self._lock: + batch_ids = self._batch_features.pop(feature_id, None) + if batch_ids: + # Clean up reverse mapping + for fid in batch_ids: + self._feature_to_primary.pop(fid, None) self.running_coding_agents.pop(feature_id, None) self.abort_events.pop(feature_id, None) + all_feature_ids = batch_ids or [feature_id] + + debug_log.log("COMPLETE", f"Coding agent for feature(s) {all_feature_ids} finished", + return_code=return_code, + status="success" if return_code == 0 else "failed", + batch_size=len(all_feature_ids)) + # Refresh session cache to see subprocess commits - # The coding agent runs as a subprocess and commits changes (e.g., passes=True). - # Using session.expire_all() is lighter weight than engine.dispose() for SQLite WAL mode - # and is sufficient to invalidate cached data and force fresh reads. - # engine.dispose() is only called on orchestrator shutdown, not on every agent completion. session = self.get_session() try: session.expire_all() - feature = session.query(Feature).filter(Feature.id == feature_id).first() - feature_passes = feature.passes if feature else None - feature_in_progress = feature.in_progress if feature else None - debug_log.log("DB", f"Feature #{feature_id} state after session.expire_all()", - passes=feature_passes, - in_progress=feature_in_progress) - if feature and feature.in_progress and not feature.passes: - feature.in_progress = False - session.commit() - debug_log.log("DB", f"Cleared in_progress for feature #{feature_id} (agent failed)") + for fid in all_feature_ids: + feature = session.query(Feature).filter(Feature.id == fid).first() + feature_passes = feature.passes if feature else None + feature_in_progress = feature.in_progress if feature else None + debug_log.log("DB", f"Feature #{fid} state after session.expire_all()", + passes=feature_passes, + in_progress=feature_in_progress) + if feature and feature.in_progress and not feature.passes: + feature.in_progress = False + session.commit() + debug_log.log("DB", f"Cleared in_progress for feature #{fid} (agent failed)") finally: session.close() - # Track failures to prevent infinite retry loops + # Track failures for features still in_progress at exit if return_code != 0: with self._lock: - self._failure_counts[feature_id] = self._failure_counts.get(feature_id, 0) + 1 - failure_count = self._failure_counts[feature_id] - if failure_count >= MAX_FEATURE_RETRIES: - print(f"Feature #{feature_id} has failed {failure_count} times, will not retry", flush=True) - debug_log.log("COMPLETE", f"Feature #{feature_id} exceeded max retries", - failure_count=failure_count) + for fid in all_feature_ids: + self._failure_counts[fid] = self._failure_counts.get(fid, 0) + 1 + failure_count = self._failure_counts[fid] + if failure_count >= MAX_FEATURE_RETRIES: + print(f"Feature #{fid} has failed {failure_count} times, will not retry", flush=True) + debug_log.log("COMPLETE", f"Feature #{fid} exceeded max retries", + failure_count=failure_count) status = "completed" if return_code == 0 else "failed" - if self.on_status: - self.on_status(feature_id, status) - # CRITICAL: This print triggers the WebSocket to emit agent_update with state='error' or 'success' - print(f"Feature #{feature_id} {status}", flush=True) + if self.on_status is not None: + for fid in all_feature_ids: + self.on_status(fid, status) + + # CRITICAL: Print triggers WebSocket to emit agent_update + if batch_ids and len(batch_ids) > 1: + ids_str = ", ".join(f"#{fid}" for fid in batch_ids) + print(f"Features {ids_str} {status}", flush=True) + else: + print(f"Feature #{feature_id} {status}", flush=True) # Signal main loop that an agent slot is available self._signal_agent_completed() - # NOTE: Testing agents are now spawned in start_feature() when coding agents START, - # not here when they complete. This ensures 1:1 ratio and proper termination. - def stop_feature(self, feature_id: int) -> tuple[bool, str]: """Stop a running coding agent and all its child processes.""" with self._lock: - if feature_id not in self.running_coding_agents: + # Check if this feature is part of a batch + primary_id = self._feature_to_primary.get(feature_id, feature_id) + if primary_id not in self.running_coding_agents: return False, "Feature not running" - abort = self.abort_events.get(feature_id) - proc = self.running_coding_agents.get(feature_id) + abort = self.abort_events.get(primary_id) + proc = self.running_coding_agents.get(primary_id) if abort: abort.set() if proc: - # Kill entire process tree to avoid orphaned children (e.g., browser instances) result = kill_process_tree(proc, timeout=5.0) - debug_log.log("STOP", f"Killed feature {feature_id} process tree", + debug_log.log("STOP", f"Killed feature {feature_id} (primary {primary_id}) process tree", status=result.status, children_found=result.children_found, children_terminated=result.children_terminated, children_killed=result.children_killed) @@ -945,6 +1384,7 @@ class ParallelOrchestrator: print(f"Max concurrency: {self.max_concurrency} coding agents", flush=True) print(f"YOLO mode: {self.yolo_mode}", flush=True) print(f"Regression agents: {self.testing_agent_ratio} (maintained independently)", flush=True) + print(f"Batch size: {self.batch_size} features per agent", flush=True) print("=" * 70, flush=True) print(flush=True) @@ -976,16 +1416,15 @@ class ParallelOrchestrator: # newly created features. debug_log.section("INITIALIZATION COMPLETE") debug_log.log("INIT", "Disposing old database engine and creating fresh connection") - print("[DEBUG] Recreating database connection after initialization...", flush=True) + logger.debug("Recreating database connection after initialization") if self._engine is not None: self._engine.dispose() self._engine, self._session_maker = create_database(self.project_dir) # Debug: Show state immediately after initialization - print("[DEBUG] Post-initialization state check:", flush=True) - print(f"[DEBUG] max_concurrency={self.max_concurrency}", flush=True) - print(f"[DEBUG] yolo_mode={self.yolo_mode}", flush=True) - print(f"[DEBUG] testing_agent_ratio={self.testing_agent_ratio}", flush=True) + logger.debug("Post-initialization state check") + logger.debug("Post-initialization state: max_concurrency=%d, yolo_mode=%s, testing_agent_ratio=%d", + self.max_concurrency, self.yolo_mode, self.testing_agent_ratio) # Verify features were created and are visible session = self.get_session() @@ -993,7 +1432,7 @@ class ParallelOrchestrator: feature_count = session.query(Feature).count() all_features = session.query(Feature).all() feature_names = [f"{f.id}: {f.name}" for f in all_features[:10]] - print(f"[DEBUG] features in database={feature_count}", flush=True) + logger.debug("Features in database: %d", feature_count) debug_log.log("INIT", "Post-initialization database state", max_concurrency=self.max_concurrency, yolo_mode=self.yolo_mode, @@ -1014,10 +1453,21 @@ class ParallelOrchestrator: debug_log.section("FEATURE LOOP STARTING") loop_iteration = 0 - while self.is_running: + while self.is_running and not self._shutdown_requested: loop_iteration += 1 if loop_iteration <= 3: - print(f"[DEBUG] === Loop iteration {loop_iteration} ===", flush=True) + logger.debug("=== Loop iteration %d ===", loop_iteration) + + # Query all features ONCE per iteration and build reusable snapshot. + # Every sub-method receives this snapshot instead of re-querying the DB. + session = self.get_session() + session.expire_all() + all_features = session.query(Feature).all() + feature_dicts = [f.to_dict() for f in all_features] + session.close() + + # Pre-compute scheduling scores once (BFS + reverse topo sort) + scheduling_scores = compute_scheduling_scores(feature_dicts) # Log every iteration to debug file (first 10, then every 5th) if loop_iteration <= 10 or loop_iteration % 5 == 0: @@ -1031,20 +1481,16 @@ class ParallelOrchestrator: # Full database dump every 5 iterations if loop_iteration == 1 or loop_iteration % 5 == 0: - session = self.get_session() - try: - _dump_database_state(session, f"(iteration {loop_iteration})") - finally: - session.close() + _dump_database_state(feature_dicts, f"(iteration {loop_iteration})") try: # Check if all complete - if self.get_all_complete(): + if self.get_all_complete(feature_dicts): print("\nAll features complete!", flush=True) break # Maintain testing agents independently (runs every iteration) - self._maintain_testing_agents() + self._maintain_testing_agents(feature_dicts) # Check capacity with self._lock: @@ -1065,17 +1511,17 @@ class ParallelOrchestrator: continue # Priority 1: Resume features from previous session - resumable = self.get_resumable_features() + resumable = self.get_resumable_features(feature_dicts, scheduling_scores) if resumable: slots = self.max_concurrency - current for feature in resumable[:slots]: print(f"Resuming feature #{feature['id']}: {feature['name']}", flush=True) self.start_feature(feature["id"], resume=True) - await asyncio.sleep(2) + await asyncio.sleep(0.5) # Brief delay for subprocess to claim feature before re-querying continue # Priority 2: Start new ready features - ready = self.get_ready_features() + ready = self.get_ready_features(feature_dicts, scheduling_scores) if not ready: # Wait for running features to complete if current > 0: @@ -1088,11 +1534,12 @@ class ParallelOrchestrator: session = self.get_session() try: session.expire_all() + fresh_dicts = [f.to_dict() for f in session.query(Feature).all()] finally: session.close() # Recheck if all features are now complete - if self.get_all_complete(): + if self.get_all_complete(fresh_dicts): print("\nAll features complete!", flush=True) break @@ -1101,36 +1548,39 @@ class ParallelOrchestrator: await self._wait_for_agent_completion(timeout=POLL_INTERVAL * 2) continue - # Start features up to capacity + # Build dependency-aware batches from ready features slots = self.max_concurrency - current - print(f"[DEBUG] Spawning loop: {len(ready)} ready, {slots} slots available, max_concurrency={self.max_concurrency}", flush=True) - print(f"[DEBUG] Will attempt to start {min(len(ready), slots)} features", flush=True) - features_to_start = ready[:slots] - print(f"[DEBUG] Features to start: {[f['id'] for f in features_to_start]}", flush=True) + batches = self.build_feature_batches(ready, feature_dicts, scheduling_scores) - debug_log.log("SPAWN", "Starting features batch", + logger.debug("Spawning loop: %d ready, %d slots available, %d batches built", + len(ready), slots, len(batches)) + + debug_log.log("SPAWN", "Starting feature batches", ready_count=len(ready), slots_available=slots, - features_to_start=[f['id'] for f in features_to_start]) + batch_count=len(batches), + batches=[[f['id'] for f in b] for b in batches[:slots]]) - for i, feature in enumerate(features_to_start): - print(f"[DEBUG] Starting feature {i+1}/{len(features_to_start)}: #{feature['id']} - {feature['name']}", flush=True) - success, msg = self.start_feature(feature["id"]) + for batch in batches[:slots]: + batch_ids = [f["id"] for f in batch] + batch_names = [f"{f['id']}:{f['name']}" for f in batch] + logger.debug("Starting batch: %s", batch_ids) + success, msg = self.start_feature_batch(batch_ids) if not success: - print(f"[DEBUG] Failed to start feature #{feature['id']}: {msg}", flush=True) - debug_log.log("SPAWN", f"FAILED to start feature #{feature['id']}", - feature_name=feature['name'], + logger.debug("Failed to start batch %s: %s", batch_ids, msg) + debug_log.log("SPAWN", f"FAILED to start batch {batch_ids}", + batch_names=batch_names, error=msg) else: - print(f"[DEBUG] Successfully started feature #{feature['id']}", flush=True) + logger.debug("Successfully started batch %s", batch_ids) with self._lock: running_count = len(self.running_coding_agents) - print(f"[DEBUG] Running coding agents after start: {running_count}", flush=True) - debug_log.log("SPAWN", f"Successfully started feature #{feature['id']}", - feature_name=feature['name'], + logger.debug("Running coding agents after start: %d", running_count) + debug_log.log("SPAWN", f"Successfully started batch {batch_ids}", + batch_names=batch_names, running_coding_agents=running_count) - await asyncio.sleep(2) # Brief pause between starts + await asyncio.sleep(0.5) except Exception as e: print(f"Orchestrator error: {e}", flush=True) @@ -1163,13 +1613,44 @@ class ParallelOrchestrator: "yolo_mode": self.yolo_mode, } + def cleanup(self) -> None: + """Clean up database resources. Safe to call multiple times. + + Forces WAL checkpoint to flush pending writes to main database file, + then disposes engine to close all connections. Prevents stale cache + issues when the orchestrator restarts. + """ + # Atomically grab and clear the engine reference to prevent re-entry + engine = self._engine + self._engine = None + + if engine is None: + return # Already cleaned up + + try: + debug_log.log("CLEANUP", "Forcing WAL checkpoint before dispose") + with engine.connect() as conn: + conn.execute(text("PRAGMA wal_checkpoint(FULL)")) + conn.commit() + debug_log.log("CLEANUP", "WAL checkpoint completed, disposing engine") + except Exception as e: + debug_log.log("CLEANUP", f"WAL checkpoint failed (non-fatal): {e}") + + try: + engine.dispose() + debug_log.log("CLEANUP", "Engine disposed successfully") + except Exception as e: + debug_log.log("CLEANUP", f"Engine dispose failed: {e}") + async def run_parallel_orchestrator( project_dir: Path, max_concurrency: int = DEFAULT_CONCURRENCY, - model: str = None, + model: str | None = None, yolo_mode: bool = False, testing_agent_ratio: int = 1, + testing_batch_size: int = DEFAULT_TESTING_BATCH_SIZE, + batch_size: int = 3, ) -> None: """Run the unified orchestrator. @@ -1179,6 +1660,8 @@ async def run_parallel_orchestrator( model: Claude model to use yolo_mode: Whether to run in YOLO mode (skip testing agents) testing_agent_ratio: Number of regression agents to maintain (0-3) + testing_batch_size: Number of features per testing batch (1-5) + batch_size: Max features per coding agent batch (1-3) """ print(f"[ORCHESTRATOR] run_parallel_orchestrator called with max_concurrency={max_concurrency}", flush=True) orchestrator = ParallelOrchestrator( @@ -1187,13 +1670,41 @@ async def run_parallel_orchestrator( model=model, yolo_mode=yolo_mode, testing_agent_ratio=testing_agent_ratio, + testing_batch_size=testing_batch_size, + batch_size=batch_size, ) + # Set up cleanup to run on exit (handles normal exit, exceptions) + def cleanup_handler(): + debug_log.log("CLEANUP", "atexit cleanup handler invoked") + orchestrator.cleanup() + + atexit.register(cleanup_handler) + + # Set up async-safe signal handler for graceful shutdown + # Only sets flags - everything else is unsafe in signal context + def signal_handler(signum, frame): + orchestrator._shutdown_requested = True + orchestrator.is_running = False + + # Register SIGTERM handler for process termination signals + # Note: On Windows, SIGTERM handlers only fire from os.kill() calls within Python. + # External termination (Task Manager, taskkill, Popen.terminate()) uses + # TerminateProcess() which bypasses signal handlers entirely. + signal.signal(signal.SIGTERM, signal_handler) + + # Note: We intentionally do NOT register SIGINT handler + # Let Python raise KeyboardInterrupt naturally so the except block works + try: await orchestrator.run_loop() except KeyboardInterrupt: print("\n\nInterrupted by user. Stopping agents...", flush=True) orchestrator.stop_all() + finally: + # CRITICAL: Always clean up database resources on exit + # This forces WAL checkpoint and disposes connections + orchestrator.cleanup() def main(): @@ -1240,6 +1751,18 @@ def main(): default=1, help="Number of regression testing agents (0-3, default: 1). Set to 0 to disable testing agents.", ) + parser.add_argument( + "--testing-batch-size", + type=int, + default=DEFAULT_TESTING_BATCH_SIZE, + help=f"Number of features per testing batch (1-5, default: {DEFAULT_TESTING_BATCH_SIZE})", + ) + parser.add_argument( + "--batch-size", + type=int, + default=3, + help="Max features per coding agent batch (1-5, default: 3)", + ) args = parser.parse_args() @@ -1266,6 +1789,8 @@ def main(): model=args.model, yolo_mode=args.yolo, testing_agent_ratio=args.testing_agent_ratio, + testing_batch_size=args.testing_batch_size, + batch_size=args.batch_size, )) except KeyboardInterrupt: print("\n\nInterrupted by user", flush=True) diff --git a/progress.py b/progress.py index 0821c90..f0795b6 100644 --- a/progress.py +++ b/progress.py @@ -10,12 +10,21 @@ import json import os import sqlite3 import urllib.request +from contextlib import closing from datetime import datetime, timezone from pathlib import Path WEBHOOK_URL = os.environ.get("PROGRESS_N8N_WEBHOOK_URL") PROGRESS_CACHE_FILE = ".progress_cache" +# SQLite connection settings for parallel mode safety +SQLITE_TIMEOUT = 30 # seconds to wait for locks + + +def _get_connection(db_file: Path) -> sqlite3.Connection: + """Get a SQLite connection with proper timeout settings for parallel mode.""" + return sqlite3.connect(db_file, timeout=SQLITE_TIMEOUT) + def has_features(project_dir: Path) -> bool: """ @@ -31,25 +40,23 @@ def has_features(project_dir: Path) -> bool: Returns False if no features exist (initializer needs to run). """ - import sqlite3 - # Check legacy JSON file first json_file = project_dir / "feature_list.json" if json_file.exists(): return True # Check SQLite database - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return False try: - conn = sqlite3.connect(db_file) - cursor = conn.cursor() - cursor.execute("SELECT COUNT(*) FROM features") - count = cursor.fetchone()[0] - conn.close() - return count > 0 + with closing(_get_connection(db_file)) as conn: + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM features") + count: int = cursor.fetchone()[0] + return bool(count > 0) except Exception: # Database exists but can't be read or has no features table return False @@ -65,41 +72,41 @@ def count_passing_tests(project_dir: Path) -> tuple[int, int, int]: Returns: (passing_count, in_progress_count, total_count) """ - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return 0, 0, 0 try: - conn = sqlite3.connect(db_file) - cursor = conn.cursor() - # Single aggregate query instead of 3 separate COUNT queries - # Handle case where in_progress column doesn't exist yet (legacy DBs) - try: - cursor.execute(""" - SELECT - COUNT(*) as total, - SUM(CASE WHEN passes = 1 THEN 1 ELSE 0 END) as passing, - SUM(CASE WHEN in_progress = 1 THEN 1 ELSE 0 END) as in_progress - FROM features - """) - row = cursor.fetchone() - total = row[0] or 0 - passing = row[1] or 0 - in_progress = row[2] or 0 - except sqlite3.OperationalError: - # Fallback for databases without in_progress column - cursor.execute(""" - SELECT - COUNT(*) as total, - SUM(CASE WHEN passes = 1 THEN 1 ELSE 0 END) as passing - FROM features - """) - row = cursor.fetchone() - total = row[0] or 0 - passing = row[1] or 0 - in_progress = 0 - conn.close() - return passing, in_progress, total + with closing(_get_connection(db_file)) as conn: + cursor = conn.cursor() + # Single aggregate query instead of 3 separate COUNT queries + # Handle case where in_progress column doesn't exist yet (legacy DBs) + try: + cursor.execute(""" + SELECT + COUNT(*) as total, + SUM(CASE WHEN passes = 1 THEN 1 ELSE 0 END) as passing, + SUM(CASE WHEN in_progress = 1 THEN 1 ELSE 0 END) as in_progress + FROM features + """) + row = cursor.fetchone() + total = row[0] or 0 + passing = row[1] or 0 + in_progress = row[2] or 0 + except sqlite3.OperationalError: + # Fallback for databases without in_progress column + cursor.execute(""" + SELECT + COUNT(*) as total, + SUM(CASE WHEN passes = 1 THEN 1 ELSE 0 END) as passing + FROM features + """) + row = cursor.fetchone() + total = row[0] or 0 + passing = row[1] or 0 + in_progress = 0 + return passing, in_progress, total except Exception as e: print(f"[Database error in count_passing_tests: {e}]") return 0, 0, 0 @@ -115,22 +122,22 @@ def get_all_passing_features(project_dir: Path) -> list[dict]: Returns: List of dicts with id, category, name for each passing feature """ - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return [] try: - conn = sqlite3.connect(db_file) - cursor = conn.cursor() - cursor.execute( - "SELECT id, category, name FROM features WHERE passes = 1 ORDER BY priority ASC" - ) - features = [ - {"id": row[0], "category": row[1], "name": row[2]} - for row in cursor.fetchall() - ] - conn.close() - return features + with closing(_get_connection(db_file)) as conn: + cursor = conn.cursor() + cursor.execute( + "SELECT id, category, name FROM features WHERE passes = 1 ORDER BY priority ASC" + ) + features = [ + {"id": row[0], "category": row[1], "name": row[2]} + for row in cursor.fetchall() + ] + return features except Exception: return [] @@ -140,7 +147,8 @@ def send_progress_webhook(passing: int, total: int, project_dir: Path) -> None: if not WEBHOOK_URL: return # Webhook not configured - cache_file = project_dir / PROGRESS_CACHE_FILE + from autocoder_paths import get_progress_cache_path + cache_file = get_progress_cache_path(project_dir) previous = 0 previous_passing_ids = set() diff --git a/prompts.py b/prompts.py index 137928c..5d83faa 100644 --- a/prompts.py +++ b/prompts.py @@ -9,6 +9,7 @@ Fallback chain: 2. Base template: .claude/templates/{name}.template.md """ +import re import shutil from pathlib import Path @@ -18,7 +19,8 @@ TEMPLATES_DIR = Path(__file__).parent / ".claude" / "templates" def get_project_prompts_dir(project_dir: Path) -> Path: """Get the prompts directory for a specific project.""" - return project_dir / "prompts" + from autocoder_paths import get_prompts_dir + return get_prompts_dir(project_dir) def load_prompt(name: str, project_dir: Path | None = None) -> str: @@ -69,42 +71,119 @@ def get_initializer_prompt(project_dir: Path | None = None) -> str: return load_prompt("initializer_prompt", project_dir) -def get_coding_prompt(project_dir: Path | None = None) -> str: - """Load the coding agent prompt (project-specific if available).""" - return load_prompt("coding_prompt", project_dir) +def _strip_browser_testing_sections(prompt: str) -> str: + """Strip browser automation and Playwright testing instructions from prompt. + + Used in YOLO mode where browser testing is skipped entirely. Replaces + browser-related sections with a brief YOLO-mode note while preserving + all non-testing instructions (implementation, git, progress notes, etc.). + + Args: + prompt: The full coding prompt text. + + Returns: + The prompt with browser testing sections replaced by YOLO guidance. + """ + original_prompt = prompt + + # Replace STEP 5 (browser automation verification) with YOLO note + prompt = re.sub( + r"### STEP 5: VERIFY WITH BROWSER AUTOMATION.*?(?=### STEP 5\.5:)", + "### STEP 5: VERIFY FEATURE (YOLO MODE)\n\n" + "**YOLO mode is active.** Skip browser automation testing. " + "Instead, verify your feature works by ensuring:\n" + "- Code compiles without errors (lint and type-check pass)\n" + "- Server starts without errors after your changes\n" + "- No obvious runtime errors in server logs\n\n", + prompt, + flags=re.DOTALL, + ) + + # Replace the screenshots-only marking rule with YOLO-appropriate wording + prompt = prompt.replace( + "**ONLY MARK A FEATURE AS PASSING AFTER VERIFICATION WITH SCREENSHOTS.**", + "**YOLO mode: Mark a feature as passing after lint/type-check succeeds and server starts cleanly.**", + ) + + # Replace the BROWSER AUTOMATION reference section + prompt = re.sub( + r"## BROWSER AUTOMATION\n\n.*?(?=---)", + "## VERIFICATION (YOLO MODE)\n\n" + "Browser automation is disabled in YOLO mode. " + "Verify features by running lint, type-check, and confirming the dev server starts without errors.\n\n", + prompt, + flags=re.DOTALL, + ) + + # In STEP 4, replace browser automation reference with YOLO guidance + prompt = prompt.replace( + "2. Test manually using browser automation (see Step 5)", + "2. Verify code compiles (lint and type-check pass)", + ) + + if prompt == original_prompt: + print("[YOLO] Warning: No browser testing sections found to strip. " + "Project-specific prompt may need manual YOLO adaptation.") + + return prompt -def get_testing_prompt(project_dir: Path | None = None, testing_feature_id: int | None = None) -> str: - """Load the testing agent prompt (project-specific if available). +def get_coding_prompt(project_dir: Path | None = None, yolo_mode: bool = False) -> str: + """Load the coding agent prompt (project-specific if available). Args: project_dir: Optional project directory for project-specific prompts - testing_feature_id: If provided, the pre-assigned feature ID to test. - The orchestrator claims the feature before spawning the agent. + yolo_mode: If True, strip browser automation / Playwright testing + instructions and replace with YOLO-mode guidance. This reduces + prompt tokens since YOLO mode skips all browser testing anyway. Returns: - The testing prompt, with pre-assigned feature instructions if applicable. + The coding prompt, optionally stripped of testing instructions. + """ + prompt = load_prompt("coding_prompt", project_dir) + + if yolo_mode: + prompt = _strip_browser_testing_sections(prompt) + + return prompt + + +def get_testing_prompt( + project_dir: Path | None = None, + testing_feature_id: int | None = None, + testing_feature_ids: list[int] | None = None, +) -> str: + """Load the testing agent prompt (project-specific if available). + + Supports both single-feature and multi-feature testing modes. When + testing_feature_ids is provided, the template's {{TESTING_FEATURE_IDS}} + placeholder is replaced with the comma-separated list. Falls back to + the legacy single-feature header when only testing_feature_id is given. + + Args: + project_dir: Optional project directory for project-specific prompts + testing_feature_id: If provided, the pre-assigned feature ID to test (legacy single mode). + testing_feature_ids: If provided, a list of feature IDs to test (batch mode). + Takes precedence over testing_feature_id when both are set. + + Returns: + The testing prompt, with feature assignment instructions populated. """ base_prompt = load_prompt("testing_prompt", project_dir) + # Batch mode: replace the {{TESTING_FEATURE_IDS}} placeholder in the template + if testing_feature_ids is not None and len(testing_feature_ids) > 0: + ids_str = ", ".join(str(fid) for fid in testing_feature_ids) + return base_prompt.replace("{{TESTING_FEATURE_IDS}}", ids_str) + + # Legacy single-feature mode: prepend header and replace placeholder if testing_feature_id is not None: - # Prepend pre-assigned feature instructions - pre_assigned_header = f"""## ASSIGNED FEATURE + # Replace the placeholder with the single ID for template consistency + base_prompt = base_prompt.replace("{{TESTING_FEATURE_IDS}}", str(testing_feature_id)) + return base_prompt -**You are assigned to regression test Feature #{testing_feature_id}.** - -### Your workflow: -1. Call `feature_get_by_id` with ID {testing_feature_id} to get the feature details -2. Verify the feature through the UI using browser automation -3. If regression found, call `feature_mark_failing` with feature_id={testing_feature_id} -4. Exit when done (no cleanup needed) - ---- - -""" - return pre_assigned_header + base_prompt - - return base_prompt + # No feature assignment -- return template with placeholder cleared + return base_prompt.replace("{{TESTING_FEATURE_IDS}}", "(none assigned)") def get_single_feature_prompt(feature_id: int, project_dir: Path | None = None, yolo_mode: bool = False) -> str: @@ -117,13 +196,13 @@ def get_single_feature_prompt(feature_id: int, project_dir: Path | None = None, Args: feature_id: The specific feature ID to work on project_dir: Optional project directory for project-specific prompts - yolo_mode: Ignored (kept for backward compatibility). Testing is now - handled by separate testing agents, not YOLO prompts. + yolo_mode: If True, strip browser testing instructions from the base + coding prompt for reduced token usage in YOLO mode. Returns: The prompt with single-feature header prepended """ - base_prompt = get_coding_prompt(project_dir) + base_prompt = get_coding_prompt(project_dir, yolo_mode=yolo_mode) # Minimal header - the base prompt already contains the full workflow single_feature_header = f"""## ASSIGNED FEATURE: #{feature_id} @@ -138,6 +217,52 @@ If blocked, use `feature_skip` and document the blocker. return single_feature_header + base_prompt +def get_batch_feature_prompt( + feature_ids: list[int], + project_dir: Path | None = None, + yolo_mode: bool = False, +) -> str: + """Prepend batch-feature assignment header to base coding prompt. + + Used in parallel mode to assign multiple features to an agent. + Features should be implemented sequentially in the given order. + + Args: + feature_ids: List of feature IDs to implement in order + project_dir: Optional project directory for project-specific prompts + yolo_mode: If True, strip browser testing instructions from the base prompt + + Returns: + The prompt with batch-feature header prepended + """ + base_prompt = get_coding_prompt(project_dir, yolo_mode=yolo_mode) + ids_str = ", ".join(f"#{fid}" for fid in feature_ids) + + batch_header = f"""## ASSIGNED FEATURES (BATCH): {ids_str} + +You have been assigned {len(feature_ids)} features to implement sequentially. +Process them IN ORDER: {ids_str} + +### Workflow for each feature: +1. Call `feature_claim_and_get` with the feature ID to get its details +2. Implement the feature fully +3. Verify it works (browser testing if applicable) +4. Call `feature_mark_passing` to mark it complete +5. Git commit the changes +6. Move to the next feature + +### Important: +- Complete each feature fully before starting the next +- Mark each feature passing individually as you go +- If blocked on a feature, use `feature_skip` and move to the next one +- Other agents are handling other features - focus only on yours + +--- + +""" + return batch_header + base_prompt + + def get_app_spec(project_dir: Path) -> str: """ Load the app spec from the project. @@ -190,9 +315,9 @@ def scaffold_project_prompts(project_dir: Path) -> Path: project_prompts = get_project_prompts_dir(project_dir) project_prompts.mkdir(parents=True, exist_ok=True) - # Create .autocoder directory for configuration files - autocoder_dir = project_dir / ".autocoder" - autocoder_dir.mkdir(parents=True, exist_ok=True) + # Create .autocoder directory with .gitignore for runtime files + from autocoder_paths import ensure_autocoder_dir + autocoder_dir = ensure_autocoder_dir(project_dir) # Define template mappings: (source_template, destination_name) templates = [ diff --git a/rate_limit_utils.py b/rate_limit_utils.py new file mode 100644 index 0000000..7fe77ea --- /dev/null +++ b/rate_limit_utils.py @@ -0,0 +1,132 @@ +""" +Rate Limit Utilities +==================== + +Shared utilities for detecting and handling API rate limits. +Used by both agent.py (production) and test_rate_limit_utils.py (tests). +""" + +import random +import re +from typing import Optional + +# Regex patterns for rate limit detection (used in both exception messages and response text) +# These patterns use word boundaries to avoid false positives like "PR #429" or "please wait while I..." +RATE_LIMIT_REGEX_PATTERNS = [ + r"\brate[_\s]?limit", # "rate limit", "rate_limit", "ratelimit" + r"\btoo\s+many\s+requests", # "too many requests" + r"\bhttp\s*429\b", # "http 429", "http429" + r"\bstatus\s*429\b", # "status 429", "status429" + r"\berror\s*429\b", # "error 429", "error429" + r"\b429\s+too\s+many", # "429 too many" + r"\b(?:server|api|system)\s+(?:is\s+)?overloaded\b", # "server is overloaded", "api overloaded" + r"\bquota\s*exceeded\b", # "quota exceeded" +] + +# Compiled regex for efficient matching +_RATE_LIMIT_REGEX = re.compile( + "|".join(RATE_LIMIT_REGEX_PATTERNS), + re.IGNORECASE +) + + +def parse_retry_after(error_message: str) -> Optional[int]: + """ + Extract retry-after seconds from various error message formats. + + Handles common formats: + - "Retry-After: 60" + - "retry after 60 seconds" + - "try again in 5 seconds" + - "30 seconds remaining" + + Args: + error_message: The error message to parse + + Returns: + Seconds to wait, or None if not parseable. + """ + # Patterns require explicit "seconds" or "s" unit, OR no unit at all (end of string/sentence) + # This prevents matching "30 minutes" or "1 hour" since those have non-seconds units + patterns = [ + r"retry.?after[:\s]+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit + r"retry.?after[:\s]+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence + r"try again in\s+(\d+)\s*(?:seconds?|s\b)", # Requires seconds unit + r"try again in\s+(\d+)(?:\s*$|\s*[,.])", # Or end of string/sentence + r"(\d+)\s*seconds?\s*(?:remaining|left|until)", + ] + + for pattern in patterns: + match = re.search(pattern, error_message, re.IGNORECASE) + if match: + return int(match.group(1)) + + return None + + +def is_rate_limit_error(error_message: str) -> bool: + """ + Detect if an error message indicates a rate limit. + + Uses regex patterns with word boundaries to avoid false positives + like "PR #429", "please wait while I...", or "Node v14.29.0". + + Args: + error_message: The error message to check + + Returns: + True if the message indicates a rate limit, False otherwise. + """ + return bool(_RATE_LIMIT_REGEX.search(error_message)) + + +def calculate_rate_limit_backoff(retries: int) -> int: + """ + Calculate exponential backoff with jitter for rate limits. + + Base formula: min(15 * 2^retries, 3600) + Jitter: adds 0-30% random jitter to prevent thundering herd. + Base sequence: ~15-20s, ~30-40s, ~60-78s, ~120-156s, ... + + The lower starting delay (15s vs 60s) allows faster recovery from + transient rate limits, while jitter prevents synchronized retries + when multiple agents hit limits simultaneously. + + Args: + retries: Number of consecutive rate limit retries (0-indexed) + + Returns: + Delay in seconds (clamped to 1-3600 range, with jitter) + """ + base = int(min(max(15 * (2 ** retries), 1), 3600)) + jitter = random.uniform(0, base * 0.3) + return int(base + jitter) + + +def calculate_error_backoff(retries: int) -> int: + """ + Calculate linear backoff for non-rate-limit errors. + + Formula: min(30 * retries, 300) - caps at 5 minutes + Sequence: 30s, 60s, 90s, 120s, ... 300s + + Args: + retries: Number of consecutive error retries (1-indexed) + + Returns: + Delay in seconds (clamped to 1-300 range) + """ + return min(max(30 * retries, 1), 300) + + +def clamp_retry_delay(delay_seconds: int) -> int: + """ + Clamp a retry delay to a safe range (1-3600 seconds). + + Args: + delay_seconds: The raw delay value + + Returns: + Delay clamped to 1-3600 seconds + """ + return min(max(delay_seconds, 1), 3600) diff --git a/registry.py b/registry.py index f84803e..685d968 100644 --- a/registry.py +++ b/registry.py @@ -17,8 +17,7 @@ from pathlib import Path from typing import Any from sqlalchemy import Column, DateTime, Integer, String, create_engine, text -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import DeclarativeBase, sessionmaker # Module logger logger = logging.getLogger(__name__) @@ -39,7 +38,17 @@ AVAILABLE_MODELS = [ VALID_MODELS = [m["id"] for m in AVAILABLE_MODELS] # Default model and settings -DEFAULT_MODEL = "claude-opus-4-5-20251101" +# Respect ANTHROPIC_DEFAULT_OPUS_MODEL env var for Foundry/custom deployments +# Guard against empty/whitespace values by trimming and falling back when blank +_env_default_model = os.getenv("ANTHROPIC_DEFAULT_OPUS_MODEL") +if _env_default_model is not None: + _env_default_model = _env_default_model.strip() +DEFAULT_MODEL = _env_default_model or "claude-opus-4-5-20251101" + +# Ensure env-provided DEFAULT_MODEL is in VALID_MODELS for validation consistency +# (idempotent: only adds if missing, doesn't alter AVAILABLE_MODELS semantics) +if DEFAULT_MODEL and DEFAULT_MODEL not in VALID_MODELS: + VALID_MODELS.append(DEFAULT_MODEL) DEFAULT_YOLO_MODE = False # SQLite connection settings @@ -75,7 +84,9 @@ class RegistryPermissionDenied(RegistryError): # SQLAlchemy Model # ============================================================================= -Base = declarative_base() +class Base(DeclarativeBase): + """SQLAlchemy 2.0 style declarative base.""" + pass class Project(Base): diff --git a/requirements.txt b/requirements.txt index 9cf420e..5d57a39 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,4 @@ pyyaml>=6.0.0 ruff>=0.8.0 mypy>=1.13.0 pytest>=8.0.0 +types-PyYAML>=6.0.0 diff --git a/security.py b/security.py index 024ad04..1e7455f 100644 --- a/security.py +++ b/security.py @@ -97,6 +97,31 @@ BLOCKED_COMMANDS = { "ufw", } +# Sensitive directories (relative to home) that should never be exposed. +# Used by both the EXTRA_READ_PATHS validator (client.py) and the filesystem +# browser API (server/routers/filesystem.py) to block credential/key directories. +# This is the single source of truth -- import from here in both places. +# +# SENSITIVE_DIRECTORIES is the union of the previous filesystem browser blocklist +# (filesystem.py) and the previous EXTRA_READ_PATHS blocklist (client.py). +# Some entries are new to each consumer -- this is intentional for defense-in-depth. +SENSITIVE_DIRECTORIES = { + ".ssh", + ".aws", + ".azure", + ".kube", + ".gnupg", + ".gpg", + ".password-store", + ".docker", + ".config/gcloud", + ".config/gh", + ".npmrc", + ".pypirc", + ".netrc", + ".terraform", +} + # Commands that trigger emphatic warnings but CAN be approved (Phase 3) # For now, these are blocked like BLOCKED_COMMANDS until Phase 3 implements approval DANGEROUS_COMMANDS = { @@ -413,24 +438,6 @@ def validate_init_script(command_string: str) -> tuple[bool, str]: return False, f"Only ./init.sh is allowed, got: {script}" -def get_command_for_validation(cmd: str, segments: list[str]) -> str: - """ - Find the specific command segment that contains the given command. - - Args: - cmd: The command name to find - segments: List of command segments - - Returns: - The segment containing the command, or empty string if not found - """ - for segment in segments: - segment_commands = extract_commands(segment) - if cmd in segment_commands: - return segment - return "" - - def matches_pattern(command: str, pattern: str) -> bool: """ Check if a command matches a pattern. @@ -472,6 +479,75 @@ def matches_pattern(command: str, pattern: str) -> bool: return False +def _validate_command_list(commands: list, config_path: Path, field_name: str) -> bool: + """ + Validate a list of command entries from a YAML config. + + Each entry must be a dict with a non-empty string 'name' field. + Used by both load_org_config() and load_project_commands() to avoid + duplicating the same validation logic. + + Args: + commands: List of command entries to validate + config_path: Path to the config file (for log messages) + field_name: Name of the YAML field being validated (e.g., 'allowed_commands', 'commands') + + Returns: + True if all entries are valid, False otherwise + """ + if not isinstance(commands, list): + logger.warning(f"Config at {config_path}: '{field_name}' must be a list") + return False + for i, cmd in enumerate(commands): + if not isinstance(cmd, dict): + logger.warning(f"Config at {config_path}: {field_name}[{i}] must be a dict") + return False + if "name" not in cmd: + logger.warning(f"Config at {config_path}: {field_name}[{i}] missing 'name'") + return False + if not isinstance(cmd["name"], str) or cmd["name"].strip() == "": + logger.warning(f"Config at {config_path}: {field_name}[{i}] has invalid 'name'") + return False + return True + + +def _validate_pkill_processes(config: dict, config_path: Path) -> Optional[list[str]]: + """ + Validate and normalize pkill_processes from a YAML config. + + Each entry must be a non-empty string matching VALID_PROCESS_NAME_PATTERN + (alphanumeric, dots, underscores, hyphens only -- no regex metacharacters). + Used by both load_org_config() and load_project_commands(). + + Args: + config: Parsed YAML config dict that may contain 'pkill_processes' + config_path: Path to the config file (for log messages) + + Returns: + Normalized list of process names, or None if validation fails. + Returns an empty list if 'pkill_processes' is not present. + """ + if "pkill_processes" not in config: + return [] + + processes = config["pkill_processes"] + if not isinstance(processes, list): + logger.warning(f"Config at {config_path}: 'pkill_processes' must be a list") + return None + + normalized = [] + for i, proc in enumerate(processes): + if not isinstance(proc, str): + logger.warning(f"Config at {config_path}: pkill_processes[{i}] must be a string") + return None + proc = proc.strip() + if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc): + logger.warning(f"Config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'") + return None + normalized.append(proc) + return normalized + + def get_org_config_path() -> Path: """ Get the organization-level config file path. @@ -513,21 +589,8 @@ def load_org_config() -> Optional[dict]: # Validate allowed_commands if present if "allowed_commands" in config: - allowed = config["allowed_commands"] - if not isinstance(allowed, list): - logger.warning(f"Org config at {config_path}: 'allowed_commands' must be a list") + if not _validate_command_list(config["allowed_commands"], config_path, "allowed_commands"): return None - for i, cmd in enumerate(allowed): - if not isinstance(cmd, dict): - logger.warning(f"Org config at {config_path}: allowed_commands[{i}] must be a dict") - return None - if "name" not in cmd: - logger.warning(f"Org config at {config_path}: allowed_commands[{i}] missing 'name'") - return None - # Validate that name is a non-empty string - if not isinstance(cmd["name"], str) or cmd["name"].strip() == "": - logger.warning(f"Org config at {config_path}: allowed_commands[{i}] has invalid 'name'") - return None # Validate blocked_commands if present if "blocked_commands" in config: @@ -541,23 +604,10 @@ def load_org_config() -> Optional[dict]: return None # Validate pkill_processes if present - if "pkill_processes" in config: - processes = config["pkill_processes"] - if not isinstance(processes, list): - logger.warning(f"Org config at {config_path}: 'pkill_processes' must be a list") - return None - # Normalize and validate each process name against safe pattern - normalized = [] - for i, proc in enumerate(processes): - if not isinstance(proc, str): - logger.warning(f"Org config at {config_path}: pkill_processes[{i}] must be a string") - return None - proc = proc.strip() - # Block empty strings and regex metacharacters - if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc): - logger.warning(f"Org config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'") - return None - normalized.append(proc) + normalized = _validate_pkill_processes(config, config_path) + if normalized is None: + return None + if normalized: config["pkill_processes"] = normalized return config @@ -603,46 +653,21 @@ def load_project_commands(project_dir: Path) -> Optional[dict]: return None commands = config.get("commands", []) - if not isinstance(commands, list): - logger.warning(f"Project config at {config_path}: 'commands' must be a list") - return None # Enforce 100 command limit - if len(commands) > 100: + if isinstance(commands, list) and len(commands) > 100: logger.warning(f"Project config at {config_path} exceeds 100 command limit ({len(commands)} commands)") return None - # Validate each command entry - for i, cmd in enumerate(commands): - if not isinstance(cmd, dict): - logger.warning(f"Project config at {config_path}: commands[{i}] must be a dict") - return None - if "name" not in cmd: - logger.warning(f"Project config at {config_path}: commands[{i}] missing 'name'") - return None - # Validate name is a non-empty string - if not isinstance(cmd["name"], str) or cmd["name"].strip() == "": - logger.warning(f"Project config at {config_path}: commands[{i}] has invalid 'name'") - return None + # Validate each command entry using shared helper + if not _validate_command_list(commands, config_path, "commands"): + return None # Validate pkill_processes if present - if "pkill_processes" in config: - processes = config["pkill_processes"] - if not isinstance(processes, list): - logger.warning(f"Project config at {config_path}: 'pkill_processes' must be a list") - return None - # Normalize and validate each process name against safe pattern - normalized = [] - for i, proc in enumerate(processes): - if not isinstance(proc, str): - logger.warning(f"Project config at {config_path}: pkill_processes[{i}] must be a string") - return None - proc = proc.strip() - # Block empty strings and regex metacharacters - if not proc or not VALID_PROCESS_NAME_PATTERN.fullmatch(proc): - logger.warning(f"Project config at {config_path}: pkill_processes[{i}] has invalid value '{proc}'") - return None - normalized.append(proc) + normalized = _validate_pkill_processes(config, config_path) + if normalized is None: + return None + if normalized: config["pkill_processes"] = normalized return config @@ -659,8 +684,12 @@ def validate_project_command(cmd_config: dict) -> tuple[bool, str]: """ Validate a single command entry from project config. + Checks that the command has a valid name and is not in any blocklist. + Called during hierarchy resolution to gate each project command before + it is added to the effective allowed set. + Args: - cmd_config: Dict with command configuration (name, description, args) + cmd_config: Dict with command configuration (name, description) Returns: Tuple of (is_valid, error_message) @@ -690,15 +719,6 @@ def validate_project_command(cmd_config: dict) -> tuple[bool, str]: if "description" in cmd_config and not isinstance(cmd_config["description"], str): return False, "Description must be a string" - # Args validation (Phase 1 - just check structure) - if "args" in cmd_config: - args = cmd_config["args"] - if not isinstance(args, list): - return False, "Args must be a list" - for arg in args: - if not isinstance(arg, str): - return False, "Each arg must be a string" - return True, "" @@ -899,8 +919,13 @@ async def bash_security_hook(input_data, tool_use_id=None, context=None): # Additional validation for sensitive commands if cmd in COMMANDS_NEEDING_EXTRA_VALIDATION: - # Find the specific segment containing this command - cmd_segment = get_command_for_validation(cmd, segments) + # Find the specific segment containing this command by searching + # each segment's extracted commands for a match + cmd_segment = "" + for segment in segments: + if cmd in extract_commands(segment): + cmd_segment = segment + break if not cmd_segment: cmd_segment = command # Fallback to full command diff --git a/server/main.py b/server/main.py index 1b01f79..687bf87 100644 --- a/server/main.py +++ b/server/main.py @@ -7,6 +7,7 @@ Provides REST API, WebSocket, and static file serving. """ import asyncio +import logging import os import shutil import sys @@ -42,6 +43,7 @@ from .routers import ( ) from .schemas import SetupStatus from .services.assistant_chat_session import cleanup_all_sessions as cleanup_assistant_sessions +from .services.chat_constants import ROOT_DIR from .services.dev_server_manager import ( cleanup_all_devservers, cleanup_orphaned_devserver_locks, @@ -53,7 +55,6 @@ from .services.terminal_manager import cleanup_all_terminals from .websocket import project_websocket # Paths -ROOT_DIR = Path(__file__).parent.parent UI_DIST_DIR = ROOT_DIR / "ui" / "dist" @@ -88,10 +89,19 @@ app = FastAPI( lifespan=lifespan, ) +# Module logger +logger = logging.getLogger(__name__) + # Check if remote access is enabled via environment variable # Set by start_ui.py when --host is not 127.0.0.1 ALLOW_REMOTE = os.environ.get("AUTOCODER_ALLOW_REMOTE", "").lower() in ("1", "true", "yes") +if ALLOW_REMOTE: + logger.warning( + "ALLOW_REMOTE is enabled. Terminal WebSocket is exposed without sandboxing. " + "Only use this in trusted network environments." + ) + # CORS - allow all origins when remote access is enabled, otherwise localhost only if ALLOW_REMOTE: app.add_middleware( @@ -222,7 +232,14 @@ if UI_DIST_DIR.exists(): raise HTTPException(status_code=404) # Try to serve the file directly - file_path = UI_DIST_DIR / path + file_path = (UI_DIST_DIR / path).resolve() + + # Ensure resolved path is within UI_DIST_DIR (prevent path traversal) + try: + file_path.relative_to(UI_DIST_DIR.resolve()) + except ValueError: + raise HTTPException(status_code=404) + if file_path.exists() and file_path.is_file(): return FileResponse(file_path) diff --git a/server/routers/agent.py b/server/routers/agent.py index 422f86b..9288745 100644 --- a/server/routers/agent.py +++ b/server/routers/agent.py @@ -6,31 +6,22 @@ API endpoints for agent control (start/stop/pause/resume). Uses project registry for path lookups. """ -import re from pathlib import Path from fastapi import APIRouter, HTTPException from ..schemas import AgentActionResponse, AgentStartRequest, AgentStatus +from ..services.chat_constants import ROOT_DIR from ..services.process_manager import get_manager +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import validate_project_name -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - -def _get_settings_defaults() -> tuple[bool, str, int]: +def _get_settings_defaults() -> tuple[bool, str, int, bool, int]: """Get defaults from global settings. Returns: - Tuple of (yolo_mode, model, testing_agent_ratio) + Tuple of (yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size) """ import sys root = Path(__file__).parent.parent.parent @@ -49,24 +40,18 @@ def _get_settings_defaults() -> tuple[bool, str, int]: except (ValueError, TypeError): testing_agent_ratio = 1 - return yolo_mode, model, testing_agent_ratio + playwright_headless = (settings.get("playwright_headless") or "true").lower() == "true" + + try: + batch_size = int(settings.get("batch_size", "3")) + except (ValueError, TypeError): + batch_size = 3 + + return yolo_mode, model, testing_agent_ratio, playwright_headless, batch_size router = APIRouter(prefix="/api/projects/{project_name}/agent", tags=["agent"]) -# Root directory for process manager -ROOT_DIR = Path(__file__).parent.parent.parent - - -def validate_project_name(name: str) -> str: - """Validate and sanitize project name to prevent path traversal.""" - if not re.match(r'^[a-zA-Z0-9_-]{1,50}$', name): - raise HTTPException( - status_code=400, - detail="Invalid project name" - ) - return name - def get_project_manager(project_name: str): """Get the process manager for a project.""" @@ -111,18 +96,22 @@ async def start_agent( manager = get_project_manager(project_name) # Get defaults from global settings if not provided in request - default_yolo, default_model, default_testing_ratio = _get_settings_defaults() + default_yolo, default_model, default_testing_ratio, playwright_headless, default_batch_size = _get_settings_defaults() yolo_mode = request.yolo_mode if request.yolo_mode is not None else default_yolo model = request.model if request.model else default_model max_concurrency = request.max_concurrency or 1 testing_agent_ratio = request.testing_agent_ratio if request.testing_agent_ratio is not None else default_testing_ratio + batch_size = default_batch_size + success, message = await manager.start( yolo_mode=yolo_mode, model=model, max_concurrency=max_concurrency, testing_agent_ratio=testing_agent_ratio, + playwright_headless=playwright_headless, + batch_size=batch_size, ) # Notify scheduler of manual start (to prevent auto-stop during scheduled window) diff --git a/server/routers/assistant_chat.py b/server/routers/assistant_chat.py index 32ba6f4..ceae8bd 100644 --- a/server/routers/assistant_chat.py +++ b/server/routers/assistant_chat.py @@ -7,8 +7,6 @@ WebSocket and REST endpoints for the read-only project assistant. import json import logging -import re -from pathlib import Path from typing import Optional from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect @@ -27,30 +25,13 @@ from ..services.assistant_database import ( get_conversation, get_conversations, ) +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import is_valid_project_name as validate_project_name logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/assistant", tags=["assistant-chat"]) -# Root directory -ROOT_DIR = Path(__file__).parent.parent.parent - - -def _get_project_path(project_name: str) -> Optional[Path]: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - -def validate_project_name(name: str) -> bool: - """Validate project name to prevent path traversal.""" - return bool(re.match(r'^[a-zA-Z0-9_-]{1,50}$', name)) - # ============================================================================ # Pydantic Models @@ -145,9 +126,9 @@ async def create_project_conversation(project_name: str): conversation = create_conversation(project_dir, project_name) return ConversationSummary( - id=conversation.id, - project_name=conversation.project_name, - title=conversation.title, + id=int(conversation.id), + project_name=str(conversation.project_name), + title=str(conversation.title) if conversation.title else None, created_at=conversation.created_at.isoformat() if conversation.created_at else None, updated_at=conversation.updated_at.isoformat() if conversation.updated_at else None, message_count=0, diff --git a/server/routers/expand_project.py b/server/routers/expand_project.py index 50bf196..3de2f44 100644 --- a/server/routers/expand_project.py +++ b/server/routers/expand_project.py @@ -8,7 +8,6 @@ Allows adding multiple features to existing projects via natural language. import json import logging -from pathlib import Path from typing import Optional from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect @@ -22,27 +21,13 @@ from ..services.expand_chat_session import ( list_expand_sessions, remove_expand_session, ) +from ..utils.project_helpers import get_project_path as _get_project_path from ..utils.validation import validate_project_name logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/expand", tags=["expand-project"]) -# Root directory -ROOT_DIR = Path(__file__).parent.parent.parent - - -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - # ============================================================================ @@ -136,7 +121,8 @@ async def expand_project_websocket(websocket: WebSocket, project_name: str): return # Verify project has app_spec.txt - spec_path = project_dir / "prompts" / "app_spec.txt" + from autocoder_paths import get_prompts_dir + spec_path = get_prompts_dir(project_dir) / "app_spec.txt" if not spec_path.exists(): await websocket.close(code=4004, reason="Project has no spec. Create spec first.") return diff --git a/server/routers/features.py b/server/routers/features.py index c4c9c27..0c8c77d 100644 --- a/server/routers/features.py +++ b/server/routers/features.py @@ -8,10 +8,12 @@ API endpoints for feature/test case management. import logging from contextlib import contextmanager from pathlib import Path +from typing import Literal from fastapi import APIRouter, HTTPException from ..schemas import ( + DependencyGraphEdge, DependencyGraphNode, DependencyGraphResponse, DependencyUpdate, @@ -22,6 +24,7 @@ from ..schemas import ( FeatureResponse, FeatureUpdate, ) +from ..utils.project_helpers import get_project_path as _get_project_path from ..utils.validation import validate_project_name # Lazy imports to avoid circular dependencies @@ -31,17 +34,6 @@ _Feature = None logger = logging.getLogger(__name__) -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - def _get_db_classes(): """Lazy import of database classes.""" global _create_database, _Feature @@ -71,6 +63,9 @@ def get_db_session(project_dir: Path): session = SessionLocal() try: yield session + except Exception: + session.rollback() + raise finally: session.close() @@ -131,7 +126,8 @@ async def list_features(project_name: str): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return FeatureListResponse(pending=[], in_progress=[], done=[]) @@ -326,7 +322,8 @@ async def get_dependency_graph(project_name: str): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): return DependencyGraphResponse(nodes=[], edges=[]) @@ -344,6 +341,7 @@ async def get_dependency_graph(project_name: str): deps = f.dependencies or [] blocking = [d for d in deps if d not in passing_ids] + status: Literal["pending", "in_progress", "done", "blocked"] if f.passes: status = "done" elif blocking: @@ -363,7 +361,7 @@ async def get_dependency_graph(project_name: str): )) for dep_id in deps: - edges.append({"source": dep_id, "target": f.id}) + edges.append(DependencyGraphEdge(source=dep_id, target=f.id)) return DependencyGraphResponse(nodes=nodes, edges=edges) except HTTPException: @@ -390,7 +388,8 @@ async def get_feature(project_name: str, feature_id: int): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - db_file = project_dir / "features.db" + from autocoder_paths import get_features_db_path + db_file = get_features_db_path(project_dir) if not db_file.exists(): raise HTTPException(status_code=404, detail="No features database found") diff --git a/server/routers/filesystem.py b/server/routers/filesystem.py index eb6293b..cdf9bc5 100644 --- a/server/routers/filesystem.py +++ b/server/routers/filesystem.py @@ -6,6 +6,7 @@ API endpoints for browsing the filesystem for project folder selection. Provides cross-platform support for Windows, macOS, and Linux. """ +import functools import logging import os import re @@ -14,6 +15,8 @@ from pathlib import Path from fastapi import APIRouter, HTTPException, Query +from security import SENSITIVE_DIRECTORIES + # Module logger logger = logging.getLogger(__name__) @@ -77,17 +80,10 @@ LINUX_BLOCKED = { "/opt", } -# Universal blocked paths (relative to home directory) -UNIVERSAL_BLOCKED_RELATIVE = { - ".ssh", - ".aws", - ".gnupg", - ".config/gh", - ".netrc", - ".docker", - ".kube", - ".terraform", -} +# Universal blocked paths (relative to home directory). +# Delegates to the canonical SENSITIVE_DIRECTORIES set in security.py so that +# the filesystem browser and the EXTRA_READ_PATHS validator share one source of truth. +UNIVERSAL_BLOCKED_RELATIVE = SENSITIVE_DIRECTORIES # Patterns for files that should not be shown HIDDEN_PATTERNS = [ @@ -99,8 +95,14 @@ HIDDEN_PATTERNS = [ ] -def get_blocked_paths() -> set[Path]: - """Get the set of blocked paths for the current platform.""" +@functools.lru_cache(maxsize=1) +def get_blocked_paths() -> frozenset[Path]: + """ + Get the set of blocked paths for the current platform. + + Cached because the platform and home directory do not change at runtime, + and this function is called once per directory entry in list_directory(). + """ home = Path.home() blocked = set() @@ -119,7 +121,7 @@ def get_blocked_paths() -> set[Path]: for rel in UNIVERSAL_BLOCKED_RELATIVE: blocked.add((home / rel).resolve()) - return blocked + return frozenset(blocked) def is_path_blocked(path: Path) -> bool: diff --git a/server/routers/projects.py b/server/routers/projects.py index 0f76ff9..bfa5b9c 100644 --- a/server/routers/projects.py +++ b/server/routers/projects.py @@ -10,6 +10,7 @@ import re import shutil import sys from pathlib import Path +from typing import Any, Callable from fastapi import APIRouter, HTTPException @@ -24,11 +25,12 @@ from ..schemas import ( ) # Lazy imports to avoid circular dependencies +# These are initialized by _init_imports() before first use. _imports_initialized = False -_check_spec_exists = None -_scaffold_project_prompts = None -_get_project_prompts_dir = None -_count_passing_tests = None +_check_spec_exists: Callable[..., Any] | None = None +_scaffold_project_prompts: Callable[..., Any] | None = None +_get_project_prompts_dir: Callable[..., Any] | None = None +_count_passing_tests: Callable[..., Any] | None = None def _init_imports(): @@ -99,6 +101,7 @@ def validate_project_name(name: str) -> str: def get_project_stats(project_dir: Path) -> ProjectStats: """Get statistics for a project.""" _init_imports() + assert _count_passing_tests is not None # guaranteed by _init_imports() passing, in_progress, total = _count_passing_tests(project_dir) percentage = (passing / total * 100) if total > 0 else 0.0 return ProjectStats( @@ -113,6 +116,7 @@ def get_project_stats(project_dir: Path) -> ProjectStats: async def list_projects(): """List all registered projects.""" _init_imports() + assert _check_spec_exists is not None # guaranteed by _init_imports() (_, _, _, list_registered_projects, validate_project_path, get_project_concurrency, _) = _get_registry_functions() @@ -145,6 +149,7 @@ async def list_projects(): async def create_project(project: ProjectCreate): """Create a new project at the specified path.""" _init_imports() + assert _scaffold_project_prompts is not None # guaranteed by _init_imports() (register_project, _, get_project_path, list_registered_projects, _, _, _) = _get_registry_functions() @@ -225,6 +230,8 @@ async def create_project(project: ProjectCreate): async def get_project(name: str): """Get detailed information about a project.""" _init_imports() + assert _check_spec_exists is not None # guaranteed by _init_imports() + assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, get_project_concurrency, _) = _get_registry_functions() name = validate_project_name(name) @@ -269,8 +276,8 @@ async def delete_project(name: str, delete_files: bool = False): raise HTTPException(status_code=404, detail=f"Project '{name}' not found") # Check if agent is running - lock_file = project_dir / ".agent.lock" - if lock_file.exists(): + from autocoder_paths import has_agent_running + if has_agent_running(project_dir): raise HTTPException( status_code=409, detail="Cannot delete project while agent is running. Stop the agent first." @@ -296,6 +303,7 @@ async def delete_project(name: str, delete_files: bool = False): async def get_project_prompts(name: str): """Get the content of project prompt files.""" _init_imports() + assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) @@ -307,7 +315,7 @@ async def get_project_prompts(name: str): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - prompts_dir = _get_project_prompts_dir(project_dir) + prompts_dir: Path = _get_project_prompts_dir(project_dir) def read_file(filename: str) -> str: filepath = prompts_dir / filename @@ -329,6 +337,7 @@ async def get_project_prompts(name: str): async def update_project_prompts(name: str, prompts: ProjectPromptsUpdate): """Update project prompt files.""" _init_imports() + assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, _, _) = _get_registry_functions() name = validate_project_name(name) @@ -398,8 +407,8 @@ async def reset_project(name: str, full_reset: bool = False): raise HTTPException(status_code=404, detail="Project directory not found") # Check if agent is running - lock_file = project_dir / ".agent.lock" - if lock_file.exists(): + from autocoder_paths import has_agent_running + if has_agent_running(project_dir): raise HTTPException( status_code=409, detail="Cannot reset project while agent is running. Stop the agent first." @@ -415,36 +424,58 @@ async def reset_project(name: str, full_reset: bool = False): deleted_files: list[str] = [] - # Files to delete in quick reset - quick_reset_files = [ - "features.db", - "features.db-wal", # WAL mode journal file - "features.db-shm", # WAL mode shared memory file - "assistant.db", - "assistant.db-wal", - "assistant.db-shm", - ".claude_settings.json", - ".claude_assistant_settings.json", + from autocoder_paths import ( + get_assistant_db_path, + get_claude_assistant_settings_path, + get_claude_settings_path, + get_features_db_path, + ) + + # Build list of files to delete using path helpers (finds files at current location) + # Plus explicit old-location fallbacks for backward compatibility + db_path = get_features_db_path(project_dir) + asst_path = get_assistant_db_path(project_dir) + reset_files: list[Path] = [ + db_path, + db_path.with_suffix(".db-wal"), + db_path.with_suffix(".db-shm"), + asst_path, + asst_path.with_suffix(".db-wal"), + asst_path.with_suffix(".db-shm"), + get_claude_settings_path(project_dir), + get_claude_assistant_settings_path(project_dir), + # Also clean old root-level locations if they exist + project_dir / "features.db", + project_dir / "features.db-wal", + project_dir / "features.db-shm", + project_dir / "assistant.db", + project_dir / "assistant.db-wal", + project_dir / "assistant.db-shm", + project_dir / ".claude_settings.json", + project_dir / ".claude_assistant_settings.json", ] - for filename in quick_reset_files: - file_path = project_dir / filename + for file_path in reset_files: if file_path.exists(): try: + relative = file_path.relative_to(project_dir) file_path.unlink() - deleted_files.append(filename) + deleted_files.append(str(relative)) except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to delete {filename}: {e}") + raise HTTPException(status_code=500, detail=f"Failed to delete {file_path.name}: {e}") # Full reset: also delete prompts directory if full_reset: - prompts_dir = project_dir / "prompts" - if prompts_dir.exists(): - try: - shutil.rmtree(prompts_dir) - deleted_files.append("prompts/") - except Exception as e: - raise HTTPException(status_code=500, detail=f"Failed to delete prompts/: {e}") + from autocoder_paths import get_prompts_dir + # Delete prompts from both possible locations + for prompts_dir in [get_prompts_dir(project_dir), project_dir / "prompts"]: + if prompts_dir.exists(): + try: + relative = prompts_dir.relative_to(project_dir) + shutil.rmtree(prompts_dir) + deleted_files.append(f"{relative}/") + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to delete prompts: {e}") return { "success": True, @@ -458,6 +489,8 @@ async def reset_project(name: str, full_reset: bool = False): async def update_project_settings(name: str, settings: ProjectSettingsUpdate): """Update project-level settings (concurrency, etc.).""" _init_imports() + assert _check_spec_exists is not None # guaranteed by _init_imports() + assert _get_project_prompts_dir is not None # guaranteed by _init_imports() (_, _, get_project_path, _, _, get_project_concurrency, set_project_concurrency) = _get_registry_functions() diff --git a/server/routers/schedules.py b/server/routers/schedules.py index 2a11ba3..1758f62 100644 --- a/server/routers/schedules.py +++ b/server/routers/schedules.py @@ -6,12 +6,10 @@ API endpoints for managing agent schedules. Provides CRUD operations for time-based schedule configuration. """ -import re -import sys from contextlib import contextmanager from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Generator, Tuple +from typing import TYPE_CHECKING, Generator, Tuple from fastapi import APIRouter, HTTPException from sqlalchemy.orm import Session @@ -26,17 +24,21 @@ from ..schemas import ( ScheduleResponse, ScheduleUpdate, ) +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import validate_project_name + +if TYPE_CHECKING: + from api.database import Schedule as ScheduleModel -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) +def _schedule_to_response(schedule: "ScheduleModel") -> ScheduleResponse: + """Convert a Schedule ORM object to a ScheduleResponse Pydantic model. + SQLAlchemy Column descriptors resolve to Python types at instance access time, + but mypy sees the Column[T] descriptor type. Using model_validate with + from_attributes handles this conversion correctly. + """ + return ScheduleResponse.model_validate(schedule, from_attributes=True) router = APIRouter( prefix="/api/projects/{project_name}/schedules", @@ -44,16 +46,6 @@ router = APIRouter( ) -def validate_project_name(name: str) -> str: - """Validate and sanitize project name to prevent path traversal.""" - if not re.match(r'^[a-zA-Z0-9_-]{1,50}$', name): - raise HTTPException( - status_code=400, - detail="Invalid project name" - ) - return name - - @contextmanager def _get_db_session(project_name: str) -> Generator[Tuple[Session, Path], None, None]: """Get database session for a project as a context manager. @@ -84,6 +76,9 @@ def _get_db_session(project_name: str) -> Generator[Tuple[Session, Path], None, db = SessionLocal() try: yield db, project_path + except Exception: + db.rollback() + raise finally: db.close() @@ -99,21 +94,7 @@ async def list_schedules(project_name: str): ).order_by(Schedule.start_time).all() return ScheduleListResponse( - schedules=[ - ScheduleResponse( - id=s.id, - project_name=s.project_name, - start_time=s.start_time, - duration_minutes=s.duration_minutes, - days_of_week=s.days_of_week, - enabled=s.enabled, - yolo_mode=s.yolo_mode, - model=s.model, - crash_count=s.crash_count, - created_at=s.created_at, - ) - for s in schedules - ] + schedules=[_schedule_to_response(s) for s in schedules] ) @@ -187,18 +168,7 @@ async def create_schedule(project_name: str, data: ScheduleCreate): except Exception as e: logger.error(f"Failed to start agent for schedule {schedule.id}: {e}", exc_info=True) - return ScheduleResponse( - id=schedule.id, - project_name=schedule.project_name, - start_time=schedule.start_time, - duration_minutes=schedule.duration_minutes, - days_of_week=schedule.days_of_week, - enabled=schedule.enabled, - yolo_mode=schedule.yolo_mode, - model=schedule.model, - crash_count=schedule.crash_count, - created_at=schedule.created_at, - ) + return _schedule_to_response(schedule) @router.get("/next", response_model=NextRunResponse) @@ -256,8 +226,8 @@ async def get_next_scheduled_run(project_name: str): return NextRunResponse( has_schedules=True, - next_start=next_start.isoformat() if (active_count == 0 and next_start) else None, - next_end=latest_end.isoformat() if latest_end else None, + next_start=next_start if active_count == 0 else None, + next_end=latest_end, is_currently_running=active_count > 0, active_schedule_count=active_count, ) @@ -277,18 +247,7 @@ async def get_schedule(project_name: str, schedule_id: int): if not schedule: raise HTTPException(status_code=404, detail="Schedule not found") - return ScheduleResponse( - id=schedule.id, - project_name=schedule.project_name, - start_time=schedule.start_time, - duration_minutes=schedule.duration_minutes, - days_of_week=schedule.days_of_week, - enabled=schedule.enabled, - yolo_mode=schedule.yolo_mode, - model=schedule.model, - crash_count=schedule.crash_count, - created_at=schedule.created_at, - ) + return _schedule_to_response(schedule) @router.patch("/{schedule_id}", response_model=ScheduleResponse) @@ -331,18 +290,7 @@ async def update_schedule( # Was enabled, now disabled - remove jobs scheduler.remove_schedule(schedule_id) - return ScheduleResponse( - id=schedule.id, - project_name=schedule.project_name, - start_time=schedule.start_time, - duration_minutes=schedule.duration_minutes, - days_of_week=schedule.days_of_week, - enabled=schedule.enabled, - yolo_mode=schedule.yolo_mode, - model=schedule.model, - crash_count=schedule.crash_count, - created_at=schedule.created_at, - ) + return _schedule_to_response(schedule) @router.delete("/{schedule_id}", status_code=204) diff --git a/server/routers/settings.py b/server/routers/settings.py index 8f3f906..77b4a4d 100644 --- a/server/routers/settings.py +++ b/server/routers/settings.py @@ -9,17 +9,16 @@ Settings are stored in the registry database and shared across all projects. import mimetypes import os import sys -from pathlib import Path from fastapi import APIRouter from ..schemas import ModelInfo, ModelsResponse, SettingsResponse, SettingsUpdate +from ..services.chat_constants import ROOT_DIR # Mimetype fix for Windows - must run before StaticFiles is mounted mimetypes.add_type("text/javascript", ".js", True) -# Add root to path for registry import -ROOT_DIR = Path(__file__).parent.parent.parent +# Ensure root is on sys.path for registry import if str(ROOT_DIR) not in sys.path: sys.path.insert(0, str(ROOT_DIR)) @@ -92,6 +91,8 @@ async def get_settings(): glm_mode=_is_glm_mode(), ollama_mode=_is_ollama_mode(), testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), + playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), + batch_size=_parse_int(all_settings.get("batch_size"), 3), ) @@ -107,6 +108,12 @@ async def update_settings(update: SettingsUpdate): if update.testing_agent_ratio is not None: set_setting("testing_agent_ratio", str(update.testing_agent_ratio)) + if update.playwright_headless is not None: + set_setting("playwright_headless", "true" if update.playwright_headless else "false") + + if update.batch_size is not None: + set_setting("batch_size", str(update.batch_size)) + # Return updated settings all_settings = get_all_settings() return SettingsResponse( @@ -115,4 +122,6 @@ async def update_settings(update: SettingsUpdate): glm_mode=_is_glm_mode(), ollama_mode=_is_ollama_mode(), testing_agent_ratio=_parse_int(all_settings.get("testing_agent_ratio"), 1), + playwright_headless=_parse_bool(all_settings.get("playwright_headless"), default=True), + batch_size=_parse_int(all_settings.get("batch_size"), 3), ) diff --git a/server/routers/spec_creation.py b/server/routers/spec_creation.py index 87f79a6..e6e917a 100644 --- a/server/routers/spec_creation.py +++ b/server/routers/spec_creation.py @@ -7,8 +7,6 @@ WebSocket and REST endpoints for interactive spec creation with Claude. import json import logging -import re -from pathlib import Path from typing import Optional from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect @@ -22,30 +20,13 @@ from ..services.spec_chat_session import ( list_sessions, remove_session, ) +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import is_valid_project_name as validate_project_name logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/spec", tags=["spec-creation"]) -# Root directory -ROOT_DIR = Path(__file__).parent.parent.parent - - -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - -def validate_project_name(name: str) -> bool: - """Validate project name to prevent path traversal.""" - return bool(re.match(r'^[a-zA-Z0-9_-]{1,50}$', name)) - # ============================================================================ # REST Endpoints @@ -124,7 +105,8 @@ async def get_spec_file_status(project_name: str): if not project_dir.exists(): raise HTTPException(status_code=404, detail="Project directory not found") - status_file = project_dir / "prompts" / ".spec_status.json" + from autocoder_paths import get_prompts_dir + status_file = get_prompts_dir(project_dir) / ".spec_status.json" if not status_file.exists(): return SpecFileStatus( diff --git a/server/routers/terminal.py b/server/routers/terminal.py index 2183369..a53b9ab 100644 --- a/server/routers/terminal.py +++ b/server/routers/terminal.py @@ -12,8 +12,6 @@ import base64 import json import logging import re -import sys -from pathlib import Path from fastapi import APIRouter, HTTPException, WebSocket, WebSocketDisconnect from pydantic import BaseModel @@ -27,13 +25,8 @@ from ..services.terminal_manager import ( rename_terminal, stop_terminal_session, ) - -# Add project root to path for registry import -_root = Path(__file__).parent.parent.parent -if str(_root) not in sys.path: - sys.path.insert(0, str(_root)) - -from registry import get_project_path as registry_get_project_path +from ..utils.project_helpers import get_project_path as _get_project_path +from ..utils.validation import is_valid_project_name as validate_project_name logger = logging.getLogger(__name__) @@ -48,27 +41,6 @@ class TerminalCloseCode: FAILED_TO_START = 4500 -def _get_project_path(project_name: str) -> Path | None: - """Get project path from registry.""" - return registry_get_project_path(project_name) - - -def validate_project_name(name: str) -> bool: - """ - Validate project name to prevent path traversal attacks. - - Allows only alphanumeric characters, underscores, and hyphens. - Maximum length of 50 characters. - - Args: - name: The project name to validate - - Returns: - True if valid, False otherwise - """ - return bool(re.match(r"^[a-zA-Z0-9_-]{1,50}$", name)) - - def validate_terminal_id(terminal_id: str) -> bool: """ Validate terminal ID format. diff --git a/server/schemas.py b/server/schemas.py index 03e73ef..e15f1b3 100644 --- a/server/schemas.py +++ b/server/schemas.py @@ -398,6 +398,8 @@ class SettingsResponse(BaseModel): glm_mode: bool = False # True if GLM API is configured via .env ollama_mode: bool = False # True if Ollama API is configured via .env testing_agent_ratio: int = 1 # Regression testing agents (0-3) + playwright_headless: bool = True + batch_size: int = 3 # Features per coding agent batch (1-3) class ModelsResponse(BaseModel): @@ -411,6 +413,8 @@ class SettingsUpdate(BaseModel): yolo_mode: bool | None = None model: str | None = None testing_agent_ratio: int | None = None # 0-3 + playwright_headless: bool | None = None + batch_size: int | None = None # Features per agent batch (1-3) @field_validator('model') @classmethod @@ -426,6 +430,13 @@ class SettingsUpdate(BaseModel): raise ValueError("testing_agent_ratio must be between 0 and 3") return v + @field_validator('batch_size') + @classmethod + def validate_batch_size(cls, v: int | None) -> int | None: + if v is not None and (v < 1 or v > 3): + raise ValueError("batch_size must be between 1 and 3") + return v + # ============================================================================ # Dev Server Schemas diff --git a/server/services/assistant_chat_session.py b/server/services/assistant_chat_session.py index f15eee8..182232c 100755 --- a/server/services/assistant_chat_session.py +++ b/server/services/assistant_chat_session.py @@ -25,25 +25,13 @@ from .assistant_database import ( create_conversation, get_messages, ) +from .chat_constants import API_ENV_VARS, ROOT_DIR # Load environment variables from .env file if present load_dotenv() logger = logging.getLogger(__name__) -# Root directory of the project -ROOT_DIR = Path(__file__).parent.parent.parent - -# Environment variables to pass through to Claude CLI for API configuration -API_ENV_VARS = [ - "ANTHROPIC_BASE_URL", - "ANTHROPIC_AUTH_TOKEN", - "API_TIMEOUT_MS", - "ANTHROPIC_DEFAULT_SONNET_MODEL", - "ANTHROPIC_DEFAULT_OPUS_MODEL", - "ANTHROPIC_DEFAULT_HAIKU_MODEL", -] - # Read-only feature MCP tools READONLY_FEATURE_MCP_TOOLS = [ "mcp__features__feature_get_stats", @@ -76,7 +64,8 @@ def get_system_prompt(project_name: str, project_dir: Path) -> str: """Generate the system prompt for the assistant with project context.""" # Try to load app_spec.txt for context app_spec_content = "" - app_spec_path = project_dir / "prompts" / "app_spec.txt" + from autocoder_paths import get_prompts_dir + app_spec_path = get_prompts_dir(project_dir) / "app_spec.txt" if app_spec_path.exists(): try: app_spec_content = app_spec_path.read_text(encoding="utf-8") @@ -90,6 +79,8 @@ def get_system_prompt(project_name: str, project_dir: Path) -> str: Your role is to help users understand the codebase, answer questions about features, and manage the project backlog. You can READ files and CREATE/MANAGE features, but you cannot modify source code. +You have MCP tools available for feature management. Use them directly by calling the tool -- do not suggest CLI commands, bash commands, or curl commands to the user. You can create features yourself using the feature_create and feature_create_bulk tools. + ## What You CAN Do **Codebase Analysis (Read-Only):** @@ -134,17 +125,21 @@ If the user asks you to modify code, explain that you're a project assistant and ## Creating Features -When a user asks to add a feature, gather the following information: -1. **Category**: A grouping like "Authentication", "API", "UI", "Database" -2. **Name**: A concise, descriptive name -3. **Description**: What the feature should do -4. **Steps**: How to verify/implement the feature (as a list) +When a user asks to add a feature, use the `feature_create` or `feature_create_bulk` MCP tools directly: + +For a **single feature**, call `feature_create` with: +- category: A grouping like "Authentication", "API", "UI", "Database" +- name: A concise, descriptive name +- description: What the feature should do +- steps: List of verification/implementation steps + +For **multiple features**, call `feature_create_bulk` with an array of feature objects. You can ask clarifying questions if the user's request is vague, or make reasonable assumptions for simple requests. **Example interaction:** User: "Add a feature for S3 sync" -You: I'll create that feature. Let me add it to the backlog... +You: I'll create that feature now. [calls feature_create with appropriate parameters] You: Done! I've added "S3 Sync Integration" to your backlog. It's now visible on the kanban board. @@ -208,7 +203,7 @@ class AssistantChatSession: # Create a new conversation if we don't have one if is_new_conversation: conv = create_conversation(self.project_dir, self.project_name) - self.conversation_id = conv.id + self.conversation_id = int(conv.id) # type coercion: Column[int] -> int yield {"type": "conversation_created", "conversation_id": self.conversation_id} # Build permissions list for assistant access (read + feature management) @@ -229,7 +224,9 @@ class AssistantChatSession: "allow": permissions_list, }, } - settings_file = self.project_dir / ".claude_assistant_settings.json" + from autocoder_paths import get_claude_assistant_settings_path + settings_file = get_claude_assistant_settings_path(self.project_dir) + settings_file.parent.mkdir(parents=True, exist_ok=True) with open(settings_file, "w") as f: json.dump(security_settings, f, indent=2) @@ -261,7 +258,11 @@ class AssistantChatSession: system_cli = shutil.which("claude") # Build environment overrides for API configuration - sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)} + sdk_env: dict[str, str] = {} + for var in API_ENV_VARS: + value = os.getenv(var) + if value: + sdk_env[var] = value # Determine model from environment or use default # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names @@ -277,7 +278,7 @@ class AssistantChatSession: # This avoids Windows command line length limit (~8191 chars) setting_sources=["project"], allowed_tools=[*READONLY_BUILTIN_TOOLS, *ASSISTANT_FEATURE_TOOLS], - mcp_servers=mcp_servers, + mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime permission_mode="bypassPermissions", max_turns=100, cwd=str(self.project_dir.resolve()), @@ -303,6 +304,8 @@ class AssistantChatSession: greeting = f"Hello! I'm your project assistant for **{self.project_name}**. I can help you understand the codebase, explain features, and answer questions about the project. What would you like to know?" # Store the greeting in the database + # conversation_id is guaranteed non-None here (set on line 206 above) + assert self.conversation_id is not None add_message(self.project_dir, self.conversation_id, "assistant", greeting) yield {"type": "text", "content": greeting} diff --git a/server/services/assistant_database.py b/server/services/assistant_database.py index f2ade75..1d0e9a6 100644 --- a/server/services/assistant_database.py +++ b/server/services/assistant_database.py @@ -7,20 +7,28 @@ Each project has its own assistant.db file in the project directory. """ import logging +import threading from datetime import datetime, timezone from pathlib import Path from typing import Optional from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, Text, create_engine, func -from sqlalchemy.orm import declarative_base, relationship, sessionmaker +from sqlalchemy.engine import Engine +from sqlalchemy.orm import DeclarativeBase, relationship, sessionmaker logger = logging.getLogger(__name__) -Base = declarative_base() +class Base(DeclarativeBase): + """SQLAlchemy 2.0 style declarative base.""" + pass # Engine cache to avoid creating new engines for each request # Key: project directory path (as posix string), Value: SQLAlchemy engine -_engine_cache: dict[str, object] = {} +_engine_cache: dict[str, Engine] = {} + +# Lock for thread-safe access to the engine cache +# Prevents race conditions when multiple threads create engines simultaneously +_cache_lock = threading.Lock() def _utc_now() -> datetime: @@ -56,7 +64,8 @@ class ConversationMessage(Base): def get_db_path(project_dir: Path) -> Path: """Get the path to the assistant database for a project.""" - return project_dir / "assistant.db" + from autocoder_paths import get_assistant_db_path + return get_assistant_db_path(project_dir) def get_engine(project_dir: Path): @@ -64,17 +73,33 @@ def get_engine(project_dir: Path): Uses a cache to avoid creating new engines for each request, which improves performance by reusing database connections. + + Thread-safe: Uses a lock to prevent race conditions when multiple threads + try to create engines simultaneously for the same project. """ cache_key = project_dir.as_posix() - if cache_key not in _engine_cache: - db_path = get_db_path(project_dir) - # Use as_posix() for cross-platform compatibility with SQLite connection strings - db_url = f"sqlite:///{db_path.as_posix()}" - engine = create_engine(db_url, echo=False) - Base.metadata.create_all(engine) - _engine_cache[cache_key] = engine - logger.debug(f"Created new database engine for {cache_key}") + # Double-checked locking for thread safety and performance + if cache_key in _engine_cache: + return _engine_cache[cache_key] + + with _cache_lock: + # Check again inside the lock in case another thread created it + if cache_key not in _engine_cache: + db_path = get_db_path(project_dir) + # Use as_posix() for cross-platform compatibility with SQLite connection strings + db_url = f"sqlite:///{db_path.as_posix()}" + engine = create_engine( + db_url, + echo=False, + connect_args={ + "check_same_thread": False, + "timeout": 30, # Wait up to 30s for locks + } + ) + Base.metadata.create_all(engine) + _engine_cache[cache_key] = engine + logger.debug(f"Created new database engine for {cache_key}") return _engine_cache[cache_key] diff --git a/server/services/chat_constants.py b/server/services/chat_constants.py new file mode 100644 index 0000000..6af3c1b --- /dev/null +++ b/server/services/chat_constants.py @@ -0,0 +1,57 @@ +""" +Chat Session Constants +====================== + +Shared constants for all chat session types (assistant, spec, expand). + +The canonical ``API_ENV_VARS`` list lives in ``env_constants.py`` at the +project root and is re-exported here for convenience so that existing +imports (``from .chat_constants import API_ENV_VARS``) continue to work. +""" + +import sys +from pathlib import Path +from typing import AsyncGenerator + +# ------------------------------------------------------------------- +# Root directory of the autocoder project (repository root). +# Used throughout the server package whenever the repo root is needed. +# ------------------------------------------------------------------- +ROOT_DIR = Path(__file__).parent.parent.parent + +# Ensure the project root is on sys.path so we can import env_constants +# from the root-level module without requiring a package install. +_root_str = str(ROOT_DIR) +if _root_str not in sys.path: + sys.path.insert(0, _root_str) + +# ------------------------------------------------------------------- +# Environment variables forwarded to Claude CLI subprocesses. +# Single source of truth lives in env_constants.py at the project root. +# Re-exported here so existing ``from .chat_constants import API_ENV_VARS`` +# imports continue to work unchanged. +# ------------------------------------------------------------------- +from env_constants import API_ENV_VARS # noqa: E402, F401 + + +async def make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: + """Yield a single multimodal user message in Claude Agent SDK format. + + The Claude Agent SDK's ``query()`` method accepts either a plain string + or an ``AsyncIterable[dict]`` for custom message formats. This helper + wraps a list of content blocks (text and/or images) in the expected + envelope. + + Args: + content_blocks: List of content-block dicts, e.g. + ``[{"type": "text", "text": "..."}, {"type": "image", ...}]``. + + Yields: + A single dict representing the user message. + """ + yield { + "type": "user", + "message": {"role": "user", "content": content_blocks}, + "parent_tool_use_id": None, + "session_id": "default", + } diff --git a/server/services/expand_chat_session.py b/server/services/expand_chat_session.py index 58dd50d..4fd0978 100644 --- a/server/services/expand_chat_session.py +++ b/server/services/expand_chat_session.py @@ -16,28 +16,19 @@ import threading import uuid from datetime import datetime from pathlib import Path -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from dotenv import load_dotenv from ..schemas import ImageAttachment +from .chat_constants import API_ENV_VARS, ROOT_DIR, make_multimodal_message # Load environment variables from .env file if present load_dotenv() logger = logging.getLogger(__name__) -# Environment variables to pass through to Claude CLI for API configuration -API_ENV_VARS = [ - "ANTHROPIC_BASE_URL", - "ANTHROPIC_AUTH_TOKEN", - "API_TIMEOUT_MS", - "ANTHROPIC_DEFAULT_SONNET_MODEL", - "ANTHROPIC_DEFAULT_OPUS_MODEL", - "ANTHROPIC_DEFAULT_HAIKU_MODEL", -] - # Feature MCP tools needed for expand session EXPAND_FEATURE_TOOLS = [ "mcp__features__feature_create", @@ -46,22 +37,6 @@ EXPAND_FEATURE_TOOLS = [ ] -async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: - """ - Create an async generator that yields a properly formatted multimodal message. - """ - yield { - "type": "user", - "message": {"role": "user", "content": content_blocks}, - "parent_tool_use_id": None, - "session_id": "default", - } - - -# Root directory of the project -ROOT_DIR = Path(__file__).parent.parent.parent - - class ExpandChatSession: """ Manages a project expansion conversation. @@ -128,7 +103,8 @@ class ExpandChatSession: return # Verify project has existing spec - spec_path = self.project_dir / "prompts" / "app_spec.txt" + from autocoder_paths import get_prompts_dir + spec_path = get_prompts_dir(self.project_dir) / "app_spec.txt" if not spec_path.exists(): yield { "type": "error", @@ -162,10 +138,13 @@ class ExpandChatSession: "allow": [ "Read(./**)", "Glob(./**)", + *EXPAND_FEATURE_TOOLS, ], }, } - settings_file = self.project_dir / f".claude_settings.expand.{uuid.uuid4().hex}.json" + from autocoder_paths import get_expand_settings_path + settings_file = get_expand_settings_path(self.project_dir, uuid.uuid4().hex) + settings_file.parent.mkdir(parents=True, exist_ok=True) self._settings_file = settings_file with open(settings_file, "w", encoding="utf-8") as f: json.dump(security_settings, f, indent=2) @@ -175,7 +154,12 @@ class ExpandChatSession: system_prompt = skill_content.replace("$ARGUMENTS", project_path) # Build environment overrides for API configuration - sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)} + # Filter to only include vars that are actually set (non-None) + sdk_env: dict[str, str] = {} + for var in API_ENV_VARS: + value = os.getenv(var) + if value: + sdk_env[var] = value # Determine model from environment or use default # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names @@ -203,9 +187,12 @@ class ExpandChatSession: allowed_tools=[ "Read", "Glob", + "Grep", + "WebFetch", + "WebSearch", *EXPAND_FEATURE_TOOLS, ], - mcp_servers=mcp_servers, + mcp_servers=mcp_servers, # type: ignore[arg-type] # SDK accepts dict config at runtime permission_mode="bypassPermissions", max_turns=100, cwd=str(self.project_dir.resolve()), @@ -299,7 +286,7 @@ class ExpandChatSession: # Build the message content if attachments and len(attachments) > 0: - content_blocks = [] + content_blocks: list[dict[str, Any]] = [] if message: content_blocks.append({"type": "text", "text": message}) for att in attachments: @@ -311,7 +298,7 @@ class ExpandChatSession: "data": att.base64Data, } }) - await self.client.query(_make_multimodal_message(content_blocks)) + await self.client.query(make_multimodal_message(content_blocks)) logger.info(f"Sent multimodal message with {len(attachments)} image(s)") else: await self.client.query(message) diff --git a/server/services/process_manager.py b/server/services/process_manager.py index fd1a192..3340cd1 100644 --- a/server/services/process_manager.py +++ b/server/services/process_manager.py @@ -15,7 +15,7 @@ import sys import threading from datetime import datetime from pathlib import Path -from typing import Awaitable, Callable, Literal, Set +from typing import Any, Awaitable, Callable, Literal, Set import psutil @@ -92,7 +92,8 @@ class AgentProcessManager: self._callbacks_lock = threading.Lock() # Lock file to prevent multiple instances (stored in project directory) - self.lock_file = self.project_dir / ".agent.lock" + from autocoder_paths import get_agent_lock_path + self.lock_file = get_agent_lock_path(self.project_dir) @property def status(self) -> Literal["stopped", "running", "paused", "crashed"]: @@ -296,6 +297,8 @@ class AgentProcessManager: parallel_mode: bool = False, max_concurrency: int | None = None, testing_agent_ratio: int = 1, + playwright_headless: bool = True, + batch_size: int = 3, ) -> tuple[bool, str]: """ Start the agent as a subprocess. @@ -306,6 +309,7 @@ class AgentProcessManager: parallel_mode: DEPRECATED - ignored, always uses unified orchestrator max_concurrency: Max concurrent coding agents (1-5, default 1) testing_agent_ratio: Number of regression testing agents (0-3, default 1) + playwright_headless: If True, run browser in headless mode Returns: Tuple of (success, message) @@ -346,18 +350,21 @@ class AgentProcessManager: # Add testing agent configuration cmd.extend(["--testing-ratio", str(testing_agent_ratio)]) + # Add --batch-size flag for multi-feature batching + cmd.extend(["--batch-size", str(batch_size)]) + try: # Start subprocess with piped stdout/stderr # Use project_dir as cwd so Claude SDK sandbox allows access to project files # stdin=DEVNULL prevents blocking if Claude CLI or child process tries to read stdin # CREATE_NO_WINDOW on Windows prevents console window pop-ups # PYTHONUNBUFFERED ensures output isn't delayed - popen_kwargs = { + popen_kwargs: dict[str, Any] = { "stdin": subprocess.DEVNULL, "stdout": subprocess.PIPE, "stderr": subprocess.STDOUT, "cwd": str(self.project_dir), - "env": {**os.environ, "PYTHONUNBUFFERED": "1"}, + "env": {**os.environ, "PYTHONUNBUFFERED": "1", "PLAYWRIGHT_HEADLESS": "true" if playwright_headless else "false"}, } if sys.platform == "win32": popen_kwargs["creationflags"] = subprocess.CREATE_NO_WINDOW @@ -579,8 +586,18 @@ def cleanup_orphaned_locks() -> int: if not project_path.exists(): continue - lock_file = project_path / ".agent.lock" - if not lock_file.exists(): + # Check both legacy and new locations for lock files + from autocoder_paths import get_autocoder_dir + lock_locations = [ + project_path / ".agent.lock", + get_autocoder_dir(project_path) / ".agent.lock", + ] + lock_file = None + for candidate in lock_locations: + if candidate.exists(): + lock_file = candidate + break + if lock_file is None: continue try: diff --git a/server/services/scheduler_service.py b/server/services/scheduler_service.py index eb22a3a..578aed2 100644 --- a/server/services/scheduler_service.py +++ b/server/services/scheduler_service.py @@ -92,8 +92,9 @@ class SchedulerService: async def _load_project_schedules(self, project_name: str, project_dir: Path) -> int: """Load schedules for a single project. Returns count of schedules loaded.""" from api.database import Schedule, create_database + from autocoder_paths import get_features_db_path - db_path = project_dir / "features.db" + db_path = get_features_db_path(project_dir) if not db_path.exists(): return 0 @@ -567,8 +568,9 @@ class SchedulerService: ): """Check if a project should be started on server startup.""" from api.database import Schedule, ScheduleOverride, create_database + from autocoder_paths import get_features_db_path - db_path = project_dir / "features.db" + db_path = get_features_db_path(project_dir) if not db_path.exists(): return diff --git a/server/services/spec_chat_session.py b/server/services/spec_chat_session.py index c86bda2..b352cb5 100644 --- a/server/services/spec_chat_session.py +++ b/server/services/spec_chat_session.py @@ -13,49 +13,19 @@ import shutil import threading from datetime import datetime from pathlib import Path -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional from claude_agent_sdk import ClaudeAgentOptions, ClaudeSDKClient from dotenv import load_dotenv from ..schemas import ImageAttachment +from .chat_constants import API_ENV_VARS, ROOT_DIR, make_multimodal_message # Load environment variables from .env file if present load_dotenv() logger = logging.getLogger(__name__) -# Environment variables to pass through to Claude CLI for API configuration -API_ENV_VARS = [ - "ANTHROPIC_BASE_URL", - "ANTHROPIC_AUTH_TOKEN", - "API_TIMEOUT_MS", - "ANTHROPIC_DEFAULT_SONNET_MODEL", - "ANTHROPIC_DEFAULT_OPUS_MODEL", - "ANTHROPIC_DEFAULT_HAIKU_MODEL", -] - - -async def _make_multimodal_message(content_blocks: list[dict]) -> AsyncGenerator[dict, None]: - """ - Create an async generator that yields a properly formatted multimodal message. - - The Claude Agent SDK's query() method accepts either: - - A string (simple text) - - An AsyncIterable[dict] (for custom message formats) - - This function wraps content blocks in the expected message format. - """ - yield { - "type": "user", - "message": {"role": "user", "content": content_blocks}, - "parent_tool_use_id": None, - "session_id": "default", - } - -# Root directory of the project -ROOT_DIR = Path(__file__).parent.parent.parent - class SpecChatSession: """ @@ -125,7 +95,8 @@ class SpecChatSession: # Delete app_spec.txt so Claude can create it fresh # The SDK requires reading existing files before writing, but app_spec.txt is created new # Note: We keep initializer_prompt.md so Claude can read and update the template - prompts_dir = self.project_dir / "prompts" + from autocoder_paths import get_prompts_dir + prompts_dir = get_prompts_dir(self.project_dir) app_spec_path = prompts_dir / "app_spec.txt" if app_spec_path.exists(): app_spec_path.unlink() @@ -145,7 +116,9 @@ class SpecChatSession: ], }, } - settings_file = self.project_dir / ".claude_settings.json" + from autocoder_paths import get_claude_settings_path + settings_file = get_claude_settings_path(self.project_dir) + settings_file.parent.mkdir(parents=True, exist_ok=True) with open(settings_file, "w") as f: json.dump(security_settings, f, indent=2) @@ -167,7 +140,12 @@ class SpecChatSession: system_cli = shutil.which("claude") # Build environment overrides for API configuration - sdk_env = {var: os.getenv(var) for var in API_ENV_VARS if os.getenv(var)} + # Filter to only include vars that are actually set (non-None) + sdk_env: dict[str, str] = {} + for var in API_ENV_VARS: + value = os.getenv(var) + if value: + sdk_env[var] = value # Determine model from environment or use default # This allows using alternative APIs (e.g., GLM via z.ai) that may not support Claude model names @@ -289,7 +267,7 @@ class SpecChatSession: # Build the message content if attachments and len(attachments) > 0: # Multimodal message: build content blocks array - content_blocks = [] + content_blocks: list[dict[str, Any]] = [] # Add text block if there's text if message: @@ -308,7 +286,7 @@ class SpecChatSession: # Send multimodal content to Claude using async generator format # The SDK's query() accepts AsyncIterable[dict] for custom message formats - await self.client.query(_make_multimodal_message(content_blocks)) + await self.client.query(make_multimodal_message(content_blocks)) logger.info(f"Sent multimodal message with {len(attachments)} image(s)") else: # Text-only message: use string format @@ -317,7 +295,7 @@ class SpecChatSession: current_text = "" # Track pending writes for BOTH required files - pending_writes = { + pending_writes: dict[str, dict[str, Any] | None] = { "app_spec": None, # {"tool_id": ..., "path": ...} "initializer": None, # {"tool_id": ..., "path": ...} } @@ -392,7 +370,8 @@ class SpecChatSession: logger.warning(f"Tool error: {content}") # Clear any pending writes that failed for key in pending_writes: - if pending_writes[key] and tool_use_id == pending_writes[key].get("tool_id"): + pending_write = pending_writes[key] + if pending_write is not None and tool_use_id == pending_write.get("tool_id"): logger.error(f"{key} write failed: {content}") pending_writes[key] = None else: diff --git a/server/services/terminal_manager.py b/server/services/terminal_manager.py index 09abfa2..852c635 100644 --- a/server/services/terminal_manager.py +++ b/server/services/terminal_manager.py @@ -371,7 +371,7 @@ class TerminalSession: # Reap zombie if not already reaped if self._child_pid is not None: try: - os.waitpid(self._child_pid, os.WNOHANG) + os.waitpid(self._child_pid, os.WNOHANG) # type: ignore[attr-defined] # Unix-only method, guarded by runtime platform selection except ChildProcessError: pass except Exception: @@ -736,7 +736,7 @@ async def cleanup_all_terminals() -> None: Called on server shutdown to ensure all PTY processes are terminated. """ with _sessions_lock: - all_sessions = [] + all_sessions: list[TerminalSession] = [] for project_sessions in _sessions.values(): all_sessions.extend(project_sessions.values()) diff --git a/server/utils/project_helpers.py b/server/utils/project_helpers.py new file mode 100644 index 0000000..020b4a1 --- /dev/null +++ b/server/utils/project_helpers.py @@ -0,0 +1,32 @@ +""" +Project Helper Utilities +======================== + +Shared project path lookup used across all server routers and websocket handlers. +Consolidates the previously duplicated _get_project_path() function. +""" + +import sys +from pathlib import Path + +# Ensure the project root is on sys.path so `registry` can be imported. +# This is necessary because `registry.py` lives at the repository root, +# outside the `server` package. +_root = Path(__file__).parent.parent.parent +if str(_root) not in sys.path: + sys.path.insert(0, str(_root)) + +from registry import get_project_path as _registry_get_project_path + + +def get_project_path(project_name: str) -> Path | None: + """Look up a project's filesystem path from the global registry. + + Args: + project_name: The registered name of the project. + + Returns: + The resolved ``Path`` to the project directory, or ``None`` if the + project is not found in the registry. + """ + return _registry_get_project_path(project_name) diff --git a/server/utils/validation.py b/server/utils/validation.py index 9f1bf11..ea20cf3 100644 --- a/server/utils/validation.py +++ b/server/utils/validation.py @@ -1,26 +1,52 @@ """ -Shared validation utilities for the server. +Shared Validation Utilities +============================ + +Project name validation used across REST endpoints and WebSocket handlers. +Two variants are provided: + +* ``is_valid_project_name`` -- returns ``bool``, suitable for WebSocket + handlers where raising an HTTPException is not appropriate. +* ``validate_project_name`` -- raises ``HTTPException(400)`` on failure, + suitable for REST endpoint handlers. """ import re from fastapi import HTTPException +# Compiled once; reused by both variants. +_PROJECT_NAME_RE = re.compile(r'^[a-zA-Z0-9_-]{1,50}$') + + +def is_valid_project_name(name: str) -> bool: + """Check whether *name* is a valid project name. + + Allows only ASCII letters, digits, hyphens, and underscores (1-50 chars). + Returns ``True`` if valid, ``False`` otherwise. + + Use this in WebSocket handlers where you need to close the socket + yourself rather than raise an HTTP error. + """ + return bool(_PROJECT_NAME_RE.match(name)) + def validate_project_name(name: str) -> str: - """ - Validate and sanitize project name to prevent path traversal. + """Validate and return *name*, or raise ``HTTPException(400)``. + + Suitable for REST endpoint handlers where FastAPI will convert the + exception into an HTTP 400 response automatically. Args: - name: Project name to validate + name: Project name to validate. Returns: - The validated project name + The validated project name (unchanged). Raises: - HTTPException: If name is invalid + HTTPException: If *name* is invalid. """ - if not re.match(r'^[a-zA-Z0-9_-]{1,50}$', name): + if not _PROJECT_NAME_RE.match(name): raise HTTPException( status_code=400, detail="Invalid project name. Use only letters, numbers, hyphens, and underscores (1-50 chars)." diff --git a/server/websocket.py b/server/websocket.py index 4b86456..dfb4dee 100644 --- a/server/websocket.py +++ b/server/websocket.py @@ -16,8 +16,11 @@ from typing import Set from fastapi import WebSocket, WebSocketDisconnect from .schemas import AGENT_MASCOTS +from .services.chat_constants import ROOT_DIR from .services.dev_server_manager import get_devserver_manager from .services.process_manager import get_manager +from .utils.project_helpers import get_project_path as _get_project_path +from .utils.validation import is_valid_project_name as validate_project_name # Lazy imports _count_passing_tests = None @@ -36,6 +39,14 @@ TESTING_AGENT_START_PATTERN = re.compile(r'Started testing agent for feature #(\ # Matches: "Feature #123 testing completed" or "Feature #123 testing failed" TESTING_AGENT_COMPLETE_PATTERN = re.compile(r'Feature #(\d+) testing (completed|failed)') +# Pattern to detect batch coding agent start message +# Matches: "Started coding agent for features #5, #8, #12" +BATCH_CODING_AGENT_START_PATTERN = re.compile(r'Started coding agent for features (#\d+(?:,\s*#\d+)*)') + +# Pattern to detect batch completion +# Matches: "Features #5, #8, #12 completed" or "Features #5, #8, #12 failed" +BATCH_FEATURES_COMPLETE_PATTERN = re.compile(r'Features (#\d+(?:,\s*#\d+)*)\s+(completed|failed)') + # Patterns for detecting agent activity and thoughts THOUGHT_PATTERNS = [ # Claude's tool usage patterns (actual format: [Tool: name]) @@ -61,9 +72,9 @@ ORCHESTRATOR_PATTERNS = { 'capacity_check': re.compile(r'\[DEBUG\] Spawning loop: (\d+) ready, (\d+) slots'), 'at_capacity': re.compile(r'At max capacity|at max testing agents|At max total agents'), 'feature_start': re.compile(r'Starting feature \d+/\d+: #(\d+) - (.+)'), - 'coding_spawn': re.compile(r'Started coding agent for feature #(\d+)'), + 'coding_spawn': re.compile(r'Started coding agent for features? #(\d+)'), 'testing_spawn': re.compile(r'Started testing agent for feature #(\d+)'), - 'coding_complete': re.compile(r'Feature #(\d+) (completed|failed)'), + 'coding_complete': re.compile(r'Features? #(\d+)(?:,\s*#\d+)* (completed|failed)'), 'testing_complete': re.compile(r'Feature #(\d+) testing (completed|failed)'), 'all_complete': re.compile(r'All features complete'), 'blocked_features': re.compile(r'(\d+) blocked by dependencies'), @@ -93,14 +104,26 @@ class AgentTracker: # Check for orchestrator status messages first # These don't have [Feature #X] prefix - # Coding agent start: "Started coding agent for feature #X" - if line.startswith("Started coding agent for feature #"): + # Batch coding agent start: "Started coding agent for features #5, #8, #12" + batch_start_match = BATCH_CODING_AGENT_START_PATTERN.match(line) + if batch_start_match: try: - feature_id = int(re.search(r'#(\d+)', line).group(1)) - return await self._handle_agent_start(feature_id, line, agent_type="coding") - except (AttributeError, ValueError): + feature_ids = [int(x.strip().lstrip('#')) for x in batch_start_match.group(1).split(',')] + if feature_ids: + return await self._handle_batch_agent_start(feature_ids, "coding") + except ValueError: pass + # Single coding agent start: "Started coding agent for feature #X" + if line.startswith("Started coding agent for feature #"): + m = re.search(r'#(\d+)', line) + if m: + try: + feature_id = int(m.group(1)) + return await self._handle_agent_start(feature_id, line, agent_type="coding") + except ValueError: + pass + # Testing agent start: "Started testing agent for feature #X (PID xxx)" testing_start_match = TESTING_AGENT_START_PATTERN.match(line) if testing_start_match: @@ -114,14 +137,27 @@ class AgentTracker: is_success = testing_complete_match.group(2) == "completed" return await self._handle_agent_complete(feature_id, is_success, agent_type="testing") + # Batch features complete: "Features #5, #8, #12 completed/failed" + batch_complete_match = BATCH_FEATURES_COMPLETE_PATTERN.match(line) + if batch_complete_match: + try: + feature_ids = [int(x.strip().lstrip('#')) for x in batch_complete_match.group(1).split(',')] + is_success = batch_complete_match.group(2) == "completed" + if feature_ids: + return await self._handle_batch_agent_complete(feature_ids, is_success, "coding") + except ValueError: + pass + # Coding agent complete: "Feature #X completed/failed" (without "testing" keyword) if line.startswith("Feature #") and ("completed" in line or "failed" in line) and "testing" not in line: - try: - feature_id = int(re.search(r'#(\d+)', line).group(1)) - is_success = "completed" in line - return await self._handle_agent_complete(feature_id, is_success, agent_type="coding") - except (AttributeError, ValueError): - pass + m = re.search(r'#(\d+)', line) + if m: + try: + feature_id = int(m.group(1)) + is_success = "completed" in line + return await self._handle_agent_complete(feature_id, is_success, agent_type="coding") + except ValueError: + pass # Check for feature-specific output lines: [Feature #X] content # Both coding and testing agents use this format now @@ -151,6 +187,7 @@ class AgentTracker: 'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], 'agent_index': agent_index, 'agent_type': 'coding', + 'feature_ids': [feature_id], 'state': 'thinking', 'feature_name': f'Feature #{feature_id}', 'last_thought': None, @@ -158,6 +195,10 @@ class AgentTracker: agent = self.active_agents[key] + # Update current_feature_id for batch agents when output comes from a different feature + if 'current_feature_id' in agent and feature_id in agent.get('feature_ids', []): + agent['current_feature_id'] = feature_id + # Detect state and thought from content state = 'working' thought = None @@ -181,6 +222,7 @@ class AgentTracker: 'agentName': agent['name'], 'agentType': agent['agent_type'], 'featureId': feature_id, + 'featureIds': agent.get('feature_ids', [feature_id]), 'featureName': agent['feature_name'], 'state': state, 'thought': thought, @@ -237,6 +279,7 @@ class AgentTracker: 'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], 'agent_index': agent_index, 'agent_type': agent_type, + 'feature_ids': [feature_id], 'state': 'thinking', 'feature_name': feature_name, 'last_thought': 'Starting work...', @@ -248,12 +291,55 @@ class AgentTracker: 'agentName': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], 'agentType': agent_type, 'featureId': feature_id, + 'featureIds': [feature_id], 'featureName': feature_name, 'state': 'thinking', 'thought': 'Starting work...', 'timestamp': datetime.now().isoformat(), } + async def _handle_batch_agent_start(self, feature_ids: list[int], agent_type: str = "coding") -> dict | None: + """Handle batch agent start message from orchestrator.""" + if not feature_ids: + return None + primary_id = feature_ids[0] + async with self._lock: + key = (primary_id, agent_type) + agent_index = self._next_agent_index + self._next_agent_index += 1 + + feature_name = f'Features {", ".join(f"#{fid}" for fid in feature_ids)}' + + self.active_agents[key] = { + 'name': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], + 'agent_index': agent_index, + 'agent_type': agent_type, + 'feature_ids': list(feature_ids), + 'current_feature_id': primary_id, + 'state': 'thinking', + 'feature_name': feature_name, + 'last_thought': 'Starting batch work...', + } + + # Register all feature IDs so output lines can find this agent + for fid in feature_ids: + secondary_key = (fid, agent_type) + if secondary_key != key: + self.active_agents[secondary_key] = self.active_agents[key] + + return { + 'type': 'agent_update', + 'agentIndex': agent_index, + 'agentName': AGENT_MASCOTS[agent_index % len(AGENT_MASCOTS)], + 'agentType': agent_type, + 'featureId': primary_id, + 'featureIds': list(feature_ids), + 'featureName': feature_name, + 'state': 'thinking', + 'thought': 'Starting batch work...', + 'timestamp': datetime.now().isoformat(), + } + async def _handle_agent_complete(self, feature_id: int, is_success: bool, agent_type: str = "coding") -> dict | None: """Handle agent completion - ALWAYS emits a message, even if agent wasn't tracked. @@ -275,6 +361,7 @@ class AgentTracker: 'agentName': agent['name'], 'agentType': agent.get('agent_type', agent_type), 'featureId': feature_id, + 'featureIds': agent.get('feature_ids', [feature_id]), 'featureName': agent['feature_name'], 'state': state, 'thought': 'Completed successfully!' if is_success else 'Failed to complete', @@ -291,6 +378,7 @@ class AgentTracker: 'agentName': 'Unknown', 'agentType': agent_type, 'featureId': feature_id, + 'featureIds': [feature_id], 'featureName': f'Feature #{feature_id}', 'state': state, 'thought': 'Completed successfully!' if is_success else 'Failed to complete', @@ -298,6 +386,49 @@ class AgentTracker: 'synthetic': True, } + async def _handle_batch_agent_complete(self, feature_ids: list[int], is_success: bool, agent_type: str = "coding") -> dict | None: + """Handle batch agent completion.""" + if not feature_ids: + return None + primary_id = feature_ids[0] + async with self._lock: + state = 'success' if is_success else 'error' + key = (primary_id, agent_type) + + if key in self.active_agents: + agent = self.active_agents[key] + result = { + 'type': 'agent_update', + 'agentIndex': agent['agent_index'], + 'agentName': agent['name'], + 'agentType': agent.get('agent_type', agent_type), + 'featureId': primary_id, + 'featureIds': agent.get('feature_ids', list(feature_ids)), + 'featureName': agent['feature_name'], + 'state': state, + 'thought': 'Batch completed successfully!' if is_success else 'Batch failed to complete', + 'timestamp': datetime.now().isoformat(), + } + # Clean up all keys for this batch + for fid in feature_ids: + self.active_agents.pop((fid, agent_type), None) + return result + else: + # Synthetic completion + return { + 'type': 'agent_update', + 'agentIndex': -1, + 'agentName': 'Unknown', + 'agentType': agent_type, + 'featureId': primary_id, + 'featureIds': list(feature_ids), + 'featureName': f'Features {", ".join(f"#{fid}" for fid in feature_ids)}', + 'state': state, + 'thought': 'Batch completed successfully!' if is_success else 'Batch failed to complete', + 'timestamp': datetime.now().isoformat(), + 'synthetic': True, + } + class OrchestratorTracker: """Tracks orchestrator state for Mission Control observability. @@ -444,7 +575,7 @@ class OrchestratorTracker: timestamp = datetime.now().isoformat() # Add to recent events (keep last 5) - event = { + event: dict[str, str | int] = { 'eventType': event_type, 'message': message, 'timestamp': timestamp, @@ -487,17 +618,6 @@ class OrchestratorTracker: self.recent_events.clear() -def _get_project_path(project_name: str) -> Path: - """Get project path from registry.""" - import sys - root = Path(__file__).parent.parent - if str(root) not in sys.path: - sys.path.insert(0, str(root)) - - from registry import get_project_path - return get_project_path(project_name) - - def _get_count_passing_tests(): """Lazy import of count_passing_tests.""" global _count_passing_tests @@ -564,15 +684,6 @@ class ConnectionManager: # Global connection manager manager = ConnectionManager() -# Root directory -ROOT_DIR = Path(__file__).parent.parent - - -def validate_project_name(name: str) -> bool: - """Validate project name to prevent path traversal.""" - return bool(re.match(r'^[a-zA-Z0-9_-]{1,50}$', name)) - - async def poll_progress(websocket: WebSocket, project_name: str, project_dir: Path): """Poll database for progress changes and send updates.""" count_passing_tests = _get_count_passing_tests() @@ -652,7 +763,7 @@ async def project_websocket(websocket: WebSocket, project_name: str): agent_index, _ = await agent_tracker.get_agent_info(feature_id) # Send the raw log line with optional feature/agent attribution - log_msg = { + log_msg: dict[str, str | int] = { "type": "log", "line": line, "timestamp": datetime.now().isoformat(), diff --git a/start_ui.py b/start_ui.py index 3e619c1..ad30112 100644 --- a/start_ui.py +++ b/start_ui.py @@ -202,7 +202,7 @@ def build_frontend() -> bool: trigger_file = "dist/ directory missing" elif src_dir.exists(): # Find the newest file in dist/ directory - newest_dist_mtime = 0 + newest_dist_mtime: float = 0 for dist_file in dist_dir.rglob("*"): try: if dist_file.is_file(): diff --git a/test_client.py b/test_client.py index 48f52c4..4597002 100644 --- a/test_client.py +++ b/test_client.py @@ -8,9 +8,17 @@ Run with: python test_client.py """ import os +import sys +import tempfile import unittest +from pathlib import Path -from client import convert_model_for_vertex +from client import ( + EXTRA_READ_PATHS_BLOCKLIST, + EXTRA_READ_PATHS_VAR, + convert_model_for_vertex, + get_extra_read_paths, +) class TestConvertModelForVertex(unittest.TestCase): @@ -101,5 +109,157 @@ class TestConvertModelForVertex(unittest.TestCase): self.assertEqual(convert_model_for_vertex(""), "") +class TestExtraReadPathsBlocklist(unittest.TestCase): + """Tests for EXTRA_READ_PATHS sensitive directory blocking in get_extra_read_paths().""" + + def setUp(self): + """Save original environment and home directory state.""" + self._orig_extra_read = os.environ.get(EXTRA_READ_PATHS_VAR) + self._orig_home = os.environ.get("HOME") + self._orig_userprofile = os.environ.get("USERPROFILE") + self._orig_homedrive = os.environ.get("HOMEDRIVE") + self._orig_homepath = os.environ.get("HOMEPATH") + + def tearDown(self): + """Restore original environment state.""" + restore_map = { + EXTRA_READ_PATHS_VAR: self._orig_extra_read, + "HOME": self._orig_home, + "USERPROFILE": self._orig_userprofile, + "HOMEDRIVE": self._orig_homedrive, + "HOMEPATH": self._orig_homepath, + } + for key, value in restore_map.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + def _set_home(self, home_path: str): + """Set the home directory for both Unix and Windows.""" + os.environ["HOME"] = home_path + if sys.platform == "win32": + os.environ["USERPROFILE"] = home_path + drive, path = os.path.splitdrive(home_path) + if drive: + os.environ["HOMEDRIVE"] = drive + os.environ["HOMEPATH"] = path + + def test_sensitive_directory_is_blocked(self): + """Path that IS a sensitive directory (e.g., ~/.ssh) should be blocked.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + # Create the sensitive directory so it exists + ssh_dir = Path(tmpdir) / ".ssh" + ssh_dir.mkdir() + + os.environ[EXTRA_READ_PATHS_VAR] = str(ssh_dir) + result = get_extra_read_paths() + self.assertEqual(result, [], "Path that IS ~/.ssh should be blocked") + + def test_path_inside_sensitive_directory_is_blocked(self): + """Path INSIDE a sensitive directory (e.g., ~/.ssh/keys) should be blocked.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + ssh_dir = Path(tmpdir) / ".ssh" + keys_dir = ssh_dir / "keys" + keys_dir.mkdir(parents=True) + + os.environ[EXTRA_READ_PATHS_VAR] = str(keys_dir) + result = get_extra_read_paths() + self.assertEqual(result, [], "Path inside ~/.ssh should be blocked") + + def test_path_containing_sensitive_directory_is_blocked(self): + """Path that contains a sensitive directory inside it should be blocked. + + For example, if the extra read path is the user's home directory, and + ~/.ssh exists inside it, the path should be blocked because granting + read access to the parent would expose the sensitive subdirectory. + """ + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + # Create a sensitive dir inside the home so it triggers the + # "sensitive dir is inside the requested path" check + ssh_dir = Path(tmpdir) / ".ssh" + ssh_dir.mkdir() + + os.environ[EXTRA_READ_PATHS_VAR] = tmpdir + result = get_extra_read_paths() + self.assertEqual(result, [], "Home dir containing .ssh should be blocked") + + def test_valid_non_sensitive_path_is_allowed(self): + """A valid directory that is NOT sensitive should be allowed.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + # Create a non-sensitive directory under home + docs_dir = Path(tmpdir) / "Documents" / "myproject" + docs_dir.mkdir(parents=True) + + os.environ[EXTRA_READ_PATHS_VAR] = str(docs_dir) + result = get_extra_read_paths() + self.assertEqual(len(result), 1, "Non-sensitive path should be allowed") + self.assertEqual(result[0], docs_dir.resolve()) + + def test_all_blocklist_entries_are_checked(self): + """Every directory in EXTRA_READ_PATHS_BLOCKLIST should actually be blocked.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + + for sensitive_name in sorted(EXTRA_READ_PATHS_BLOCKLIST): + sensitive_dir = Path(tmpdir) / sensitive_name + sensitive_dir.mkdir(parents=True, exist_ok=True) + + os.environ[EXTRA_READ_PATHS_VAR] = str(sensitive_dir) + result = get_extra_read_paths() + self.assertEqual( + result, [], + f"Blocklist entry '{sensitive_name}' should be blocked" + ) + + def test_multiple_paths_mixed_sensitive_and_valid(self): + """When given multiple paths, only non-sensitive ones should pass.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + + # Create one sensitive and one valid directory + ssh_dir = Path(tmpdir) / ".ssh" + ssh_dir.mkdir() + valid_dir = Path(tmpdir) / "projects" + valid_dir.mkdir() + + os.environ[EXTRA_READ_PATHS_VAR] = f"{ssh_dir},{valid_dir}" + result = get_extra_read_paths() + self.assertEqual(len(result), 1, "Only the non-sensitive path should be returned") + self.assertEqual(result[0], valid_dir.resolve()) + + def test_empty_extra_read_paths_returns_empty(self): + """Empty EXTRA_READ_PATHS should return empty list.""" + os.environ[EXTRA_READ_PATHS_VAR] = "" + result = get_extra_read_paths() + self.assertEqual(result, []) + + def test_unset_extra_read_paths_returns_empty(self): + """Unset EXTRA_READ_PATHS should return empty list.""" + os.environ.pop(EXTRA_READ_PATHS_VAR, None) + result = get_extra_read_paths() + self.assertEqual(result, []) + + def test_nonexistent_path_is_skipped(self): + """A path that does not exist should be skipped.""" + with tempfile.TemporaryDirectory() as tmpdir: + self._set_home(tmpdir) + nonexistent = Path(tmpdir) / "does_not_exist" + + os.environ[EXTRA_READ_PATHS_VAR] = str(nonexistent) + result = get_extra_read_paths() + self.assertEqual(result, []) + + def test_relative_path_is_skipped(self): + """A relative path should be skipped.""" + os.environ[EXTRA_READ_PATHS_VAR] = "relative/path" + result = get_extra_read_paths() + self.assertEqual(result, []) + + if __name__ == "__main__": unittest.main() diff --git a/test_rate_limit_utils.py b/test_rate_limit_utils.py new file mode 100644 index 0000000..c22038f --- /dev/null +++ b/test_rate_limit_utils.py @@ -0,0 +1,205 @@ +""" +Unit tests for rate limit handling functions. + +Tests the parse_retry_after(), is_rate_limit_error(), and backoff calculation +functions from rate_limit_utils.py (shared module). +""" + +import unittest + +from rate_limit_utils import ( + calculate_error_backoff, + calculate_rate_limit_backoff, + clamp_retry_delay, + is_rate_limit_error, + parse_retry_after, +) + + +class TestParseRetryAfter(unittest.TestCase): + """Tests for parse_retry_after() function.""" + + def test_retry_after_colon_format(self): + """Test 'Retry-After: 60' format.""" + assert parse_retry_after("Retry-After: 60") == 60 + assert parse_retry_after("retry-after: 120") == 120 + assert parse_retry_after("retry after: 30 seconds") == 30 + + def test_retry_after_space_format(self): + """Test 'retry after 60 seconds' format.""" + assert parse_retry_after("retry after 60 seconds") == 60 + assert parse_retry_after("Please retry after 120 seconds") == 120 + assert parse_retry_after("Retry after 30") == 30 + + def test_try_again_in_format(self): + """Test 'try again in X seconds' format.""" + assert parse_retry_after("try again in 120 seconds") == 120 + assert parse_retry_after("Please try again in 60s") == 60 + assert parse_retry_after("Try again in 30 seconds") == 30 + + def test_seconds_remaining_format(self): + """Test 'X seconds remaining' format.""" + assert parse_retry_after("30 seconds remaining") == 30 + assert parse_retry_after("60 seconds left") == 60 + assert parse_retry_after("120 seconds until reset") == 120 + + def test_retry_after_zero(self): + """Test 'Retry-After: 0' returns 0 (not None).""" + assert parse_retry_after("Retry-After: 0") == 0 + assert parse_retry_after("retry after 0 seconds") == 0 + + def test_no_match(self): + """Test messages that don't contain retry-after info.""" + assert parse_retry_after("no match here") is None + assert parse_retry_after("Connection refused") is None + assert parse_retry_after("Internal server error") is None + assert parse_retry_after("") is None + + def test_minutes_not_supported(self): + """Test that minutes are not parsed (by design).""" + # We only support seconds to avoid complexity + # These patterns should NOT match when followed by minute/hour units + assert parse_retry_after("wait 5 minutes") is None + assert parse_retry_after("try again in 2 minutes") is None + assert parse_retry_after("retry after 5 minutes") is None + assert parse_retry_after("retry after 1 hour") is None + assert parse_retry_after("try again in 30 min") is None + + +class TestIsRateLimitError(unittest.TestCase): + """Tests for is_rate_limit_error() function.""" + + def test_rate_limit_patterns(self): + """Test various rate limit error messages.""" + assert is_rate_limit_error("Rate limit exceeded") is True + assert is_rate_limit_error("rate_limit_exceeded") is True + assert is_rate_limit_error("Too many requests") is True + assert is_rate_limit_error("HTTP 429 Too Many Requests") is True + assert is_rate_limit_error("API quota exceeded") is True + assert is_rate_limit_error("Server is overloaded") is True + + def test_specific_429_patterns(self): + """Test that 429 is detected with proper context.""" + assert is_rate_limit_error("http 429") is True + assert is_rate_limit_error("HTTP429") is True + assert is_rate_limit_error("status 429") is True + assert is_rate_limit_error("error 429") is True + assert is_rate_limit_error("429 too many requests") is True + + def test_case_insensitive(self): + """Test that detection is case-insensitive.""" + assert is_rate_limit_error("RATE LIMIT") is True + assert is_rate_limit_error("Rate Limit") is True + assert is_rate_limit_error("rate limit") is True + assert is_rate_limit_error("RaTe LiMiT") is True + + def test_non_rate_limit_errors(self): + """Test non-rate-limit error messages.""" + assert is_rate_limit_error("Connection refused") is False + assert is_rate_limit_error("Authentication failed") is False + assert is_rate_limit_error("Invalid API key") is False + assert is_rate_limit_error("Internal server error") is False + assert is_rate_limit_error("Network timeout") is False + assert is_rate_limit_error("") is False + + +class TestFalsePositives(unittest.TestCase): + """Verify non-rate-limit messages don't trigger detection.""" + + def test_version_numbers_with_429(self): + """Version numbers should not trigger.""" + assert is_rate_limit_error("Node v14.29.0") is False + assert is_rate_limit_error("Python 3.12.429") is False + assert is_rate_limit_error("Version 2.429 released") is False + + def test_issue_and_pr_numbers(self): + """Issue/PR numbers should not trigger.""" + assert is_rate_limit_error("See PR #429") is False + assert is_rate_limit_error("Fixed in issue 429") is False + assert is_rate_limit_error("Closes #429") is False + + def test_line_numbers(self): + """Line numbers in errors should not trigger.""" + assert is_rate_limit_error("Error at line 429") is False + assert is_rate_limit_error("See file.py:429") is False + + def test_port_numbers(self): + """Port numbers should not trigger.""" + assert is_rate_limit_error("port 4293") is False + assert is_rate_limit_error("localhost:4290") is False + + def test_legitimate_wait_messages(self): + """Legitimate wait instructions should not trigger.""" + # These would fail if "please wait" pattern still exists + assert is_rate_limit_error("Please wait for the build to complete") is False + assert is_rate_limit_error("Please wait while I analyze this") is False + + def test_retry_discussion_messages(self): + """Messages discussing retry logic should not trigger.""" + # These would fail if "try again later" pattern still exists + assert is_rate_limit_error("Try again later after maintenance") is False + assert is_rate_limit_error("The user should try again later") is False + + def test_limit_discussion_messages(self): + """Messages discussing limits should not trigger (removed pattern).""" + # These would fail if "limit reached" pattern still exists + assert is_rate_limit_error("File size limit reached") is False + assert is_rate_limit_error("Memory limit reached, consider optimization") is False + + def test_overloaded_in_programming_context(self): + """Method/operator overloading discussions should not trigger.""" + assert is_rate_limit_error("I will create an overloaded constructor") is False + assert is_rate_limit_error("The + operator is overloaded") is False + assert is_rate_limit_error("Here is the overloaded version of the function") is False + assert is_rate_limit_error("The method is overloaded to accept different types") is False + # But actual API overload messages should still match + assert is_rate_limit_error("Server is overloaded") is True + assert is_rate_limit_error("API overloaded") is True + assert is_rate_limit_error("system is overloaded") is True + + +class TestBackoffFunctions(unittest.TestCase): + """Test backoff calculation functions from rate_limit_utils.""" + + def test_rate_limit_backoff_sequence(self): + """Test that rate limit backoff follows expected exponential sequence with jitter. + + Base formula: 15 * 2^retries with 0-30% jitter. + Base values: 15, 30, 60, 120, 240, 480, 960, 1920, 3600, 3600 + With jitter the result should be in [base, base * 1.3]. + """ + base_values = [15, 30, 60, 120, 240, 480, 960, 1920, 3600, 3600] + for retries, base in enumerate(base_values): + delay = calculate_rate_limit_backoff(retries) + # Delay must be at least the base value (jitter is non-negative) + assert delay >= base, f"Retry {retries}: {delay} < base {base}" + # Delay must not exceed base + 30% jitter (int truncation means <= base * 1.3) + max_with_jitter = int(base * 1.3) + assert delay <= max_with_jitter, f"Retry {retries}: {delay} > max {max_with_jitter}" + + def test_error_backoff_sequence(self): + """Test that error backoff follows expected linear sequence.""" + expected = [30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 300] # Caps at 300 + for retries in range(1, len(expected) + 1): + delay = calculate_error_backoff(retries) + expected_delay = expected[retries - 1] + assert delay == expected_delay, f"Retry {retries}: expected {expected_delay}, got {delay}" + + def test_clamp_retry_delay(self): + """Test that retry delay is clamped to valid range.""" + # Values within range stay the same + assert clamp_retry_delay(60) == 60 + assert clamp_retry_delay(1800) == 1800 + assert clamp_retry_delay(3600) == 3600 + + # Values below minimum get clamped to 1 + assert clamp_retry_delay(0) == 1 + assert clamp_retry_delay(-10) == 1 + + # Values above maximum get clamped to 3600 + assert clamp_retry_delay(7200) == 3600 + assert clamp_retry_delay(86400) == 3600 + + +if __name__ == "__main__": + unittest.main() diff --git a/test_security.py b/test_security.py index d8cb256..40c1fa1 100644 --- a/test_security.py +++ b/test_security.py @@ -992,31 +992,26 @@ def main(): failed += pkill_failed # Commands that SHOULD be blocked + # Note: blocklisted commands (sudo, shutdown, dd, aws) are tested in + # test_blocklist_enforcement(). chmod validation is tested in + # test_validate_chmod(). init.sh validation is tested in + # test_validate_init_script(). pkill validation is tested in + # test_pkill_extensibility(). The entries below focus on scenarios + # NOT covered by those dedicated tests. print("\nCommands that should be BLOCKED:\n") dangerous = [ # Not in allowlist - dangerous system commands - "shutdown now", "reboot", - "dd if=/dev/zero of=/dev/sda", # Not in allowlist - common commands excluded from minimal set "wget https://example.com", "python app.py", "killall node", - # pkill with non-dev processes + # pkill with non-dev processes (pkill python tested in test_pkill_extensibility) "pkill bash", "pkill chrome", - "pkill python", # Shell injection attempts "$(echo pkill) node", 'eval "pkill node"', - # chmod with disallowed modes - "chmod 777 file.sh", - "chmod 755 file.sh", - "chmod +w file.sh", - "chmod -R +x dir/", - # Non-init.sh scripts - "./setup.sh", - "./malicious.sh", ] for cmd in dangerous: @@ -1026,6 +1021,10 @@ def main(): failed += 1 # Commands that SHOULD be allowed + # Note: chmod +x variants are tested in test_validate_chmod(). + # init.sh variants are tested in test_validate_init_script(). + # The combined "chmod +x init.sh && ./init.sh" below serves as the + # integration test verifying the hook routes to both validators correctly. print("\nCommands that should be ALLOWED:\n") safe = [ # File inspection @@ -1076,16 +1075,7 @@ def main(): "ls | grep test", # Full paths "/usr/local/bin/node app.js", - # chmod +x (allowed) - "chmod +x init.sh", - "chmod +x script.sh", - "chmod u+x init.sh", - "chmod a+x init.sh", - # init.sh execution (allowed) - "./init.sh", - "./init.sh --production", - "/path/to/init.sh", - # Combined chmod and init.sh + # Combined chmod and init.sh (integration test for both validators) "chmod +x init.sh && ./init.sh", ] diff --git a/ui/package-lock.json b/ui/package-lock.json index 2c33986..ae46a24 100644 --- a/ui/package-lock.json +++ b/ui/package-lock.json @@ -12,16 +12,9 @@ "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.16", "@radix-ui/react-label": "^2.1.8", - "@radix-ui/react-popover": "^1.1.15", - "@radix-ui/react-radio-group": "^1.3.8", - "@radix-ui/react-scroll-area": "^1.2.10", - "@radix-ui/react-select": "^2.2.6", "@radix-ui/react-separator": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-switch": "^1.2.6", - "@radix-ui/react-tabs": "^1.1.13", - "@radix-ui/react-toggle": "^1.1.10", - "@radix-ui/react-tooltip": "^1.2.8", "@tanstack/react-query": "^5.72.0", "@xterm/addon-fit": "^0.11.0", "@xterm/addon-web-links": "^0.12.0", @@ -1093,12 +1086,6 @@ "node": ">=18" } }, - "node_modules/@radix-ui/number": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.1.tgz", - "integrity": "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==", - "license": "MIT" - }, "node_modules/@radix-ui/primitive": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz", @@ -1519,61 +1506,6 @@ } } }, - "node_modules/@radix-ui/react-popover": { - "version": "1.1.15", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.15.tgz", - "integrity": "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-popper": { "version": "1.2.8", "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.8.tgz", @@ -1695,38 +1627,6 @@ } } }, - "node_modules/@radix-ui/react-radio-group": { - "version": "1.3.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-radio-group/-/react-radio-group-1.3.8.tgz", - "integrity": "sha512-VBKYIYImA5zsxACdisNQ3BjCBfmbGH3kQlnFVqlWU4tXwjy7cGX8ta80BcrO+WJXIn5iBylEH3K6ZTlee//lgQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-use-size": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-roving-focus": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.11.tgz", @@ -1758,98 +1658,6 @@ } } }, - "node_modules/@radix-ui/react-scroll-area": { - "version": "1.2.10", - "resolved": "https://registry.npmjs.org/@radix-ui/react-scroll-area/-/react-scroll-area-1.2.10.tgz", - "integrity": "sha512-tAXIa1g3sM5CGpVT0uIbUx/U3Gs5N8T52IICuCtObaos1S8fzsrPXG5WObkQN3S6NVl6wKgPhAIiBGbWnvc97A==", - "license": "MIT", - "dependencies": { - "@radix-ui/number": "1.1.1", - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-layout-effect": "1.1.1" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-select": { - "version": "2.2.6", - "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.6.tgz", - "integrity": "sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/number": "1.1.1", - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-collection": "1.1.7", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-focus-guards": "1.1.3", - "@radix-ui/react-focus-scope": "1.1.7", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-callback-ref": "1.1.1", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-use-layout-effect": "1.1.1", - "@radix-ui/react-use-previous": "1.1.1", - "@radix-ui/react-visually-hidden": "1.2.3", - "aria-hidden": "^1.2.4", - "react-remove-scroll": "^2.6.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-separator": { "version": "1.1.8", "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.8.tgz", @@ -1943,113 +1751,6 @@ } } }, - "node_modules/@radix-ui/react-tabs": { - "version": "1.1.13", - "resolved": "https://registry.npmjs.org/@radix-ui/react-tabs/-/react-tabs-1.1.13.tgz", - "integrity": "sha512-7xdcatg7/U+7+Udyoj2zodtI9H/IIopqo+YOIcZOq1nJwXWBZ9p8xiu5llXlekDbZkca79a/fozEYQXIA4sW6A==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-direction": "1.1.1", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-roving-focus": "1.1.11", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-toggle": { - "version": "1.1.10", - "resolved": "https://registry.npmjs.org/@radix-ui/react-toggle/-/react-toggle-1.1.10.tgz", - "integrity": "sha512-lS1odchhFTeZv3xwHH31YPObmJn8gOg7Lq12inrr0+BH/l3Tsq32VfjqH1oh80ARM3mlkfMic15n0kg4sD1poQ==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-use-controllable-state": "1.2.2" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-tooltip": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.2.8.tgz", - "integrity": "sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==", - "license": "MIT", - "dependencies": { - "@radix-ui/primitive": "1.1.3", - "@radix-ui/react-compose-refs": "1.1.2", - "@radix-ui/react-context": "1.1.2", - "@radix-ui/react-dismissable-layer": "1.1.11", - "@radix-ui/react-id": "1.1.1", - "@radix-ui/react-popper": "1.2.8", - "@radix-ui/react-portal": "1.1.9", - "@radix-ui/react-presence": "1.1.5", - "@radix-ui/react-primitive": "2.1.3", - "@radix-ui/react-slot": "1.2.3", - "@radix-ui/react-use-controllable-state": "1.2.2", - "@radix-ui/react-visually-hidden": "1.2.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, - "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-slot": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.2.3.tgz", - "integrity": "sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-compose-refs": "1.1.2" - }, - "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/@radix-ui/react-use-callback-ref": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.1.tgz", @@ -2186,29 +1887,6 @@ } } }, - "node_modules/@radix-ui/react-visually-hidden": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.2.3.tgz", - "integrity": "sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==", - "license": "MIT", - "dependencies": { - "@radix-ui/react-primitive": "2.1.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", - "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } - } - }, "node_modules/@radix-ui/rect": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.1.tgz", diff --git a/ui/package.json b/ui/package.json index f70b9ca..71b5375 100644 --- a/ui/package.json +++ b/ui/package.json @@ -16,16 +16,9 @@ "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-dropdown-menu": "^2.1.16", "@radix-ui/react-label": "^2.1.8", - "@radix-ui/react-popover": "^1.1.15", - "@radix-ui/react-radio-group": "^1.3.8", - "@radix-ui/react-scroll-area": "^1.2.10", - "@radix-ui/react-select": "^2.2.6", "@radix-ui/react-separator": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-switch": "^1.2.6", - "@radix-ui/react-tabs": "^1.1.13", - "@radix-ui/react-toggle": "^1.1.10", - "@radix-ui/react-tooltip": "^1.2.8", "@tanstack/react-query": "^5.72.0", "@xterm/addon-fit": "^0.11.0", "@xterm/addon-web-links": "^0.12.0", diff --git a/ui/src/App.tsx b/ui/src/App.tsx index 6c8fa00..dcfe729 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -13,7 +13,6 @@ import { SetupWizard } from './components/SetupWizard' import { AddFeatureForm } from './components/AddFeatureForm' import { FeatureModal } from './components/FeatureModal' import { DebugLogViewer, type TabType } from './components/DebugLogViewer' -import { AgentThought } from './components/AgentThought' import { AgentMissionControl } from './components/AgentMissionControl' import { CelebrationOverlay } from './components/CelebrationOverlay' import { AssistantFAB } from './components/AssistantFAB' @@ -28,8 +27,8 @@ import { KeyboardShortcutsHelp } from './components/KeyboardShortcutsHelp' import { ThemeSelector } from './components/ThemeSelector' import { ResetProjectModal } from './components/ResetProjectModal' import { ProjectSetupRequired } from './components/ProjectSetupRequired' -import { getDependencyGraph } from './lib/api' -import { Loader2, Settings, Moon, Sun, RotateCcw } from 'lucide-react' +import { getDependencyGraph, startAgent } from './lib/api' +import { Loader2, Settings, Moon, Sun, RotateCcw, BookOpen } from 'lucide-react' import type { Feature } from './lib/types' import { Button } from '@/components/ui/button' import { Card, CardContent } from '@/components/ui/card' @@ -41,6 +40,8 @@ const VIEW_MODE_KEY = 'autocoder-view-mode' // Bottom padding for main content when debug panel is collapsed (40px header + 8px margin) const COLLAPSED_DEBUG_PANEL_CLEARANCE = 48 +type InitializerStatus = 'idle' | 'starting' | 'error' + function App() { // Initialize selected project from localStorage const [selectedProject, setSelectedProject] = useState(() => { @@ -63,6 +64,8 @@ function App() { const [isSpecCreating, setIsSpecCreating] = useState(false) const [showResetModal, setShowResetModal] = useState(false) const [showSpecChat, setShowSpecChat] = useState(false) // For "Create Spec" button in empty kanban + const [specInitializerStatus, setSpecInitializerStatus] = useState('idle') + const [specInitializerError, setSpecInitializerError] = useState(null) const [viewMode, setViewMode] = useState(() => { try { const stored = localStorage.getItem(VIEW_MODE_KEY) @@ -332,6 +335,17 @@ function App() { )} + {/* Docs link */} + + {/* Theme selector */} {/* Agent Mission Control - shows orchestrator status and active agents in parallel mode */} @@ -396,13 +412,6 @@ function App() { getAgentLogs={wsState.getAgentLogs} /> - {/* Agent Thought - shows latest agent narrative (single agent mode) */} - {wsState.activeAgents.length === 0 && ( - - )} {/* Initializing Features State - show when agent is running but no features yet */} {features && @@ -495,14 +504,31 @@ function App() {
{ - setShowSpecChat(false) - // Refresh projects to update has_spec - queryClient.invalidateQueries({ queryKey: ['projects'] }) - queryClient.invalidateQueries({ queryKey: ['features', selectedProject] }) + onComplete={async (_specPath, yoloMode) => { + setSpecInitializerStatus('starting') + try { + await startAgent(selectedProject, { + yoloMode: yoloMode ?? false, + maxConcurrency: 3, + }) + // Success — close chat and refresh + setShowSpecChat(false) + setSpecInitializerStatus('idle') + queryClient.invalidateQueries({ queryKey: ['projects'] }) + queryClient.invalidateQueries({ queryKey: ['features', selectedProject] }) + } catch (err) { + setSpecInitializerStatus('error') + setSpecInitializerError(err instanceof Error ? err.message : 'Failed to start agent') + } + }} + onCancel={() => { setShowSpecChat(false); setSpecInitializerStatus('idle') }} + onExitToProject={() => { setShowSpecChat(false); setSpecInitializerStatus('idle') }} + initializerStatus={specInitializerStatus} + initializerError={specInitializerError} + onRetryInitializer={() => { + setSpecInitializerError(null) + setSpecInitializerStatus('idle') }} - onCancel={() => setShowSpecChat(false)} - onExitToProject={() => setShowSpecChat(false)} />
)} diff --git a/ui/src/components/AgentAvatar.tsx b/ui/src/components/AgentAvatar.tsx index edb36d6..3899cbf 100644 --- a/ui/src/components/AgentAvatar.tsx +++ b/ui/src/components/AgentAvatar.tsx @@ -1,4 +1,10 @@ import { type AgentMascot, type AgentState } from '../lib/types' +import { + AVATAR_COLORS, + UNKNOWN_COLORS, + MASCOT_SVGS, + UnknownMascotSVG, +} from './mascotData' interface AgentAvatarProps { name: AgentMascot | 'Unknown' @@ -7,515 +13,12 @@ interface AgentAvatarProps { showName?: boolean } -// Fallback colors for unknown agents (neutral gray) -const UNKNOWN_COLORS = { primary: '#6B7280', secondary: '#9CA3AF', accent: '#F3F4F6' } - -const AVATAR_COLORS: Record = { - // Original 5 - Spark: { primary: '#3B82F6', secondary: '#60A5FA', accent: '#DBEAFE' }, // Blue robot - Fizz: { primary: '#F97316', secondary: '#FB923C', accent: '#FFEDD5' }, // Orange fox - Octo: { primary: '#8B5CF6', secondary: '#A78BFA', accent: '#EDE9FE' }, // Purple octopus - Hoot: { primary: '#22C55E', secondary: '#4ADE80', accent: '#DCFCE7' }, // Green owl - Buzz: { primary: '#EAB308', secondary: '#FACC15', accent: '#FEF9C3' }, // Yellow bee - // Tech-inspired - Pixel: { primary: '#EC4899', secondary: '#F472B6', accent: '#FCE7F3' }, // Pink - Byte: { primary: '#06B6D4', secondary: '#22D3EE', accent: '#CFFAFE' }, // Cyan - Nova: { primary: '#F43F5E', secondary: '#FB7185', accent: '#FFE4E6' }, // Rose - Chip: { primary: '#84CC16', secondary: '#A3E635', accent: '#ECFCCB' }, // Lime - Bolt: { primary: '#FBBF24', secondary: '#FCD34D', accent: '#FEF3C7' }, // Amber - // Energetic - Dash: { primary: '#14B8A6', secondary: '#2DD4BF', accent: '#CCFBF1' }, // Teal - Zap: { primary: '#A855F7', secondary: '#C084FC', accent: '#F3E8FF' }, // Violet - Gizmo: { primary: '#64748B', secondary: '#94A3B8', accent: '#F1F5F9' }, // Slate - Turbo: { primary: '#EF4444', secondary: '#F87171', accent: '#FEE2E2' }, // Red - Blip: { primary: '#10B981', secondary: '#34D399', accent: '#D1FAE5' }, // Emerald - // Playful - Neon: { primary: '#D946EF', secondary: '#E879F9', accent: '#FAE8FF' }, // Fuchsia - Widget: { primary: '#6366F1', secondary: '#818CF8', accent: '#E0E7FF' }, // Indigo - Zippy: { primary: '#F59E0B', secondary: '#FBBF24', accent: '#FEF3C7' }, // Orange-yellow - Quirk: { primary: '#0EA5E9', secondary: '#38BDF8', accent: '#E0F2FE' }, // Sky - Flux: { primary: '#7C3AED', secondary: '#8B5CF6', accent: '#EDE9FE' }, // Purple -} - const SIZES = { sm: { svg: 32, font: 'text-xs' }, md: { svg: 48, font: 'text-sm' }, lg: { svg: 64, font: 'text-base' }, } -// SVG mascot definitions - simple cute characters -function SparkSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Spark; size: number }) { - return ( - - {/* Robot body */} - - {/* Robot head */} - - {/* Antenna */} - - - {/* Eyes */} - - - - - {/* Mouth */} - - {/* Arms */} - - - - ) -} - -function FizzSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Fizz; size: number }) { - return ( - - {/* Ears */} - - - - - {/* Head */} - - {/* Face */} - - {/* Eyes */} - - - - - {/* Nose */} - - {/* Whiskers */} - - - - - - ) -} - -function OctoSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Octo; size: number }) { - return ( - - {/* Tentacles */} - - - - - - {/* Head */} - - {/* Eyes */} - - - - - {/* Smile */} - - - ) -} - -function HootSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Hoot; size: number }) { - return ( - - {/* Ear tufts */} - - - {/* Body */} - - {/* Head */} - - {/* Eye circles */} - - - {/* Eyes */} - - - - - {/* Beak */} - - {/* Belly */} - - - ) -} - -function BuzzSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Buzz; size: number }) { - return ( - - {/* Wings */} - - - {/* Body stripes */} - - - - {/* Head */} - - {/* Antennae */} - - - - - {/* Eyes */} - - - - - {/* Smile */} - - - ) -} - -// Pixel - cute pixel art style character -function PixelSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Pixel; size: number }) { - return ( - - {/* Blocky body */} - - - - {/* Head */} - - {/* Eyes */} - - - - - {/* Mouth */} - - - ) -} - -// Byte - data cube character -function ByteSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Byte; size: number }) { - return ( - - {/* 3D cube body */} - - - - {/* Face */} - - - - - - - ) -} - -// Nova - star character -function NovaSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Nova; size: number }) { - return ( - - {/* Star points */} - - - {/* Face */} - - - - - - - ) -} - -// Chip - circuit board character -function ChipSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Chip; size: number }) { - return ( - - {/* Chip body */} - - {/* Pins */} - - - - - - - {/* Face */} - - - - - - - ) -} - -// Bolt - lightning character -function BoltSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Bolt; size: number }) { - return ( - - {/* Lightning bolt body */} - - - {/* Face */} - - - - - - ) -} - -// Dash - speedy character -function DashSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Dash; size: number }) { - return ( - - {/* Speed lines */} - - - {/* Aerodynamic body */} - - - {/* Face */} - - - - - - - ) -} - -// Zap - electric orb -function ZapSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Zap; size: number }) { - return ( - - {/* Electric sparks */} - - - {/* Orb */} - - - {/* Face */} - - - - - - - ) -} - -// Gizmo - gear character -function GizmoSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Gizmo; size: number }) { - return ( - - {/* Gear teeth */} - - - - - {/* Gear body */} - - - {/* Face */} - - - - - - - ) -} - -// Turbo - rocket character -function TurboSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Turbo; size: number }) { - return ( - - {/* Flames */} - - - {/* Rocket body */} - - {/* Nose cone */} - - {/* Fins */} - - - {/* Window/Face */} - - - - - - ) -} - -// Blip - radar dot character -function BlipSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Blip; size: number }) { - return ( - - {/* Radar rings */} - - - {/* Main dot */} - - - {/* Face */} - - - - - - - ) -} - -// Neon - glowing character -function NeonSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Neon; size: number }) { - return ( - - {/* Glow effect */} - - - {/* Body */} - - {/* Inner glow */} - - {/* Face */} - - - - - - - ) -} - -// Widget - UI component character -function WidgetSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Widget; size: number }) { - return ( - - {/* Window frame */} - - {/* Title bar */} - - - - - {/* Content area / Face */} - - - - - - - - ) -} - -// Zippy - fast bunny-like character -function ZippySVG({ colors, size }: { colors: typeof AVATAR_COLORS.Zippy; size: number }) { - return ( - - {/* Ears */} - - - - - {/* Head */} - - {/* Face */} - - - - - {/* Nose and mouth */} - - - - ) -} - -// Quirk - question mark character -function QuirkSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Quirk; size: number }) { - return ( - - {/* Question mark body */} - - - {/* Face on the dot */} - - - - - {/* Decorative swirl */} - - - ) -} - -// Flux - flowing wave character -function FluxSVG({ colors, size }: { colors: typeof AVATAR_COLORS.Flux; size: number }) { - return ( - - {/* Wave body */} - - - {/* Face */} - - - - - {/* Sparkles */} - - - - ) -} - -// Unknown agent fallback - simple question mark icon -function UnknownSVG({ colors, size }: { colors: typeof UNKNOWN_COLORS; size: number }) { - return ( - - {/* Circle background */} - - - {/* Question mark */} - ? - - ) -} - -const MASCOT_SVGS: Record = { - // Original 5 - Spark: SparkSVG, - Fizz: FizzSVG, - Octo: OctoSVG, - Hoot: HootSVG, - Buzz: BuzzSVG, - // Tech-inspired - Pixel: PixelSVG, - Byte: ByteSVG, - Nova: NovaSVG, - Chip: ChipSVG, - Bolt: BoltSVG, - // Energetic - Dash: DashSVG, - Zap: ZapSVG, - Gizmo: GizmoSVG, - Turbo: TurboSVG, - Blip: BlipSVG, - // Playful - Neon: NeonSVG, - Widget: WidgetSVG, - Zippy: ZippySVG, - Quirk: QuirkSVG, - Flux: FluxSVG, -} - // Animation classes based on state function getStateAnimation(state: AgentState): string { switch (state) { @@ -581,7 +84,7 @@ export function AgentAvatar({ name, state, size = 'md', showName = false }: Agen const isUnknown = name === 'Unknown' const colors = isUnknown ? UNKNOWN_COLORS : AVATAR_COLORS[name] const { svg: svgSize, font } = SIZES[size] - const SvgComponent = isUnknown ? UnknownSVG : MASCOT_SVGS[name] + const SvgComponent = isUnknown ? UnknownMascotSVG : MASCOT_SVGS[name] const stateDesc = getStateDescription(state) const ariaLabel = `Agent ${name} is ${stateDesc}` diff --git a/ui/src/components/AgentCard.tsx b/ui/src/components/AgentCard.tsx index 9fdff64..faa10db 100644 --- a/ui/src/components/AgentCard.tsx +++ b/ui/src/components/AgentCard.tsx @@ -112,12 +112,25 @@ export function AgentCard({ agent, onShowLogs }: AgentCardProps) { {/* Feature info */}
-
- Feature #{agent.featureId} -
-
- {agent.featureName} -
+ {agent.featureIds && agent.featureIds.length > 1 ? ( + <> +
+ Batch: {agent.featureIds.map(id => `#${id}`).join(', ')} +
+
+ Active: Feature #{agent.featureId} +
+ + ) : ( + <> +
+ Feature #{agent.featureId} +
+
+ {agent.featureName} +
+ + )}
{/* Thought bubble */} @@ -195,7 +208,10 @@ export function AgentLogModal({ agent, logs, onClose }: AgentLogModalProps) {

- Feature #{agent.featureId}: {agent.featureName} + {agent.featureIds && agent.featureIds.length > 1 + ? `Batch: ${agent.featureIds.map(id => `#${id}`).join(', ')}` + : `Feature #${agent.featureId}: ${agent.featureName}` + }

diff --git a/ui/src/components/DependencyGraph.tsx b/ui/src/components/DependencyGraph.tsx index 3061548..4151c39 100644 --- a/ui/src/components/DependencyGraph.tsx +++ b/ui/src/components/DependencyGraph.tsx @@ -227,10 +227,14 @@ function DependencyGraphInner({ graphData, onNodeClick, activeAgents = [] }: Dep }, []) // Create a map of featureId to agent info for quick lookup + // Maps ALL batch feature IDs to the same agent const agentByFeatureId = useMemo(() => { const map = new Map() for (const agent of activeAgents) { - map.set(agent.featureId, { name: agent.agentName, state: agent.state }) + const ids = agent.featureIds || [agent.featureId] + for (const fid of ids) { + map.set(fid, { name: agent.agentName, state: agent.state }) + } } return map }, [activeAgents]) diff --git a/ui/src/components/KanbanColumn.tsx b/ui/src/components/KanbanColumn.tsx index 9ab8902..1c39f30 100644 --- a/ui/src/components/KanbanColumn.tsx +++ b/ui/src/components/KanbanColumn.tsx @@ -41,9 +41,14 @@ export function KanbanColumn({ showCreateSpec, }: KanbanColumnProps) { // Create a map of feature ID to active agent for quick lookup - const agentByFeatureId = new Map( - activeAgents.map(agent => [agent.featureId, agent]) - ) + // Maps ALL batch feature IDs to the same agent + const agentByFeatureId = new Map() + for (const agent of activeAgents) { + const ids = agent.featureIds || [agent.featureId] + for (const fid of ids) { + agentByFeatureId.set(fid, agent) + } + } return ( diff --git a/ui/src/components/NewProjectModal.tsx b/ui/src/components/NewProjectModal.tsx index 38e567f..4b46023 100644 --- a/ui/src/components/NewProjectModal.tsx +++ b/ui/src/components/NewProjectModal.tsx @@ -10,6 +10,7 @@ */ import { useState } from 'react' +import { createPortal } from 'react-dom' import { Bot, FileEdit, ArrowRight, ArrowLeft, Loader2, CheckCircle2, Folder } from 'lucide-react' import { useCreateProject } from '../hooks/useProjects' import { SpecCreationChat } from './SpecCreationChat' @@ -200,10 +201,10 @@ export function NewProjectModal({ } } - // Full-screen chat view + // Full-screen chat view - use portal to render at body level if (step === 'chat') { - return ( -
+ return createPortal( +
-
+
, + document.body ) } diff --git a/ui/src/components/ProgressDashboard.tsx b/ui/src/components/ProgressDashboard.tsx index 7b935db..4a1865f 100644 --- a/ui/src/components/ProgressDashboard.tsx +++ b/ui/src/components/ProgressDashboard.tsx @@ -1,12 +1,40 @@ -import { Wifi, WifiOff } from 'lucide-react' +import { useMemo, useState, useEffect } from 'react' +import { Wifi, WifiOff, Brain, Sparkles } from 'lucide-react' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Badge } from '@/components/ui/badge' +import type { AgentStatus } from '../lib/types' interface ProgressDashboardProps { passing: number total: number percentage: number isConnected: boolean + logs?: Array<{ line: string; timestamp: string }> + agentStatus?: AgentStatus +} + +const IDLE_TIMEOUT = 30000 + +function isAgentThought(line: string): boolean { + const trimmed = line.trim() + if (/^\[Tool:/.test(trimmed)) return false + if (/^\s*Input:\s*\{/.test(trimmed)) return false + if (/^\[(Done|Error)\]/.test(trimmed)) return false + if (/^Output:/.test(trimmed)) return false + if (/^[[{]/.test(trimmed)) return false + if (trimmed.length < 10) return false + if (/^[A-Za-z]:\\/.test(trimmed)) return false + if (/^\/[a-z]/.test(trimmed)) return false + return true +} + +function getLatestThought(logs: Array<{ line: string; timestamp: string }>): string | null { + for (let i = logs.length - 1; i >= 0; i--) { + if (isAgentThought(logs[i].line)) { + return logs[i].line.trim() + } + } + return null } export function ProgressDashboard({ @@ -14,67 +42,109 @@ export function ProgressDashboard({ total, percentage, isConnected, + logs = [], + agentStatus, }: ProgressDashboardProps) { + const thought = useMemo(() => getLatestThought(logs), [logs]) + const [displayedThought, setDisplayedThought] = useState(null) + const [textVisible, setTextVisible] = useState(true) + + const lastLogTimestamp = logs.length > 0 + ? new Date(logs[logs.length - 1].timestamp).getTime() + : 0 + + const showThought = useMemo(() => { + if (!thought) return false + if (agentStatus === 'running') return true + if (agentStatus === 'paused') { + return Date.now() - lastLogTimestamp < IDLE_TIMEOUT + } + return false + }, [thought, agentStatus, lastLogTimestamp]) + + useEffect(() => { + if (thought !== displayedThought && thought) { + setTextVisible(false) + const timeout = setTimeout(() => { + setDisplayedThought(thought) + setTextVisible(true) + }, 150) + return () => clearTimeout(timeout) + } + }, [thought, displayedThought]) + + const isRunning = agentStatus === 'running' + return ( - - - Progress - - - {isConnected ? ( - <> - - Live - - ) : ( - <> - - Offline - - )} - + +
+ + Progress + + + {isConnected ? ( + <> + + Live + + ) : ( + <> + + Offline + + )} + +
+
+ + {passing} + + / + + {total} + +
- - {/* Large Percentage */} -
- - - {percentage.toFixed(1)} - - - % - + +
+ {/* Progress Bar */} +
+
+
+ {/* Percentage */} + + {percentage.toFixed(1)}%
- {/* Progress Bar */} -
-
-
- - {/* Stats */} -
-
- - {passing} - - - Passing - -
-
/
-
- - {total} - - - Total - + {/* Agent Thought */} +
+
+
+ + {isRunning && ( + + )} +
+

+ {displayedThought?.replace(/:$/, '')} +

diff --git a/ui/src/components/SettingsModal.tsx b/ui/src/components/SettingsModal.tsx index a4b787f..0246cdd 100644 --- a/ui/src/components/SettingsModal.tsx +++ b/ui/src/components/SettingsModal.tsx @@ -41,6 +41,12 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { } } + const handleBatchSizeChange = (size: number) => { + if (!updateSettings.isPending) { + updateSettings.mutate({ batch_size: size }) + } + } + const models = modelsData?.models ?? [] const isSaving = updateSettings.isPending @@ -171,6 +177,24 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) { />
+ {/* Headless Browser Toggle */} +
+
+ +

+ Run browser without visible window (saves CPU) +

+
+ updateSettings.mutate({ playwright_headless: !settings.playwright_headless })} + disabled={isSaving} + /> +
+ {/* Model Selection */}
@@ -216,6 +240,30 @@ export function SettingsModal({ isOpen, onClose }: SettingsModalProps) {
+ {/* Features per Agent */} +
+ +

+ Number of features assigned to each coding agent +

+
+ {[1, 2, 3].map((size) => ( + + ))} +
+
+ {/* Update Error */} {updateSettings.isError && ( diff --git a/ui/src/components/SpecCreationChat.tsx b/ui/src/components/SpecCreationChat.tsx index c96a1f2..184b26e 100644 --- a/ui/src/components/SpecCreationChat.tsx +++ b/ui/src/components/SpecCreationChat.tsx @@ -228,7 +228,7 @@ export function SpecCreationChat({ } return ( -
+
{/* Header */}
@@ -303,7 +303,7 @@ export function SpecCreationChat({ )} {/* Messages area */} -
+
{messages.length === 0 && !isLoading && (
@@ -451,9 +451,8 @@ export function SpecCreationChat({ {/* Completion footer */} {isComplete && ( -
+
{initializerStatus === 'starting' ? ( diff --git a/ui/src/components/docs/DocsContent.tsx b/ui/src/components/docs/DocsContent.tsx new file mode 100644 index 0000000..84d7309 --- /dev/null +++ b/ui/src/components/docs/DocsContent.tsx @@ -0,0 +1,130 @@ +/** + * DocsContent Component + * + * Renders all 13 documentation section components in order. + * Uses IntersectionObserver to detect which section heading is currently + * visible in the viewport, and notifies the parent so the sidebar + * can highlight the active section. + */ + +import { useEffect, useRef, useCallback } from 'react' +import { DOC_SECTIONS } from './docsData' +// Section components -- lazy-load candidates in the future, but imported +// statically for now to keep the build simple and deterministic. +import { GettingStarted } from './sections/GettingStarted' +import { AppSpecSetup } from './sections/AppSpecSetup' +import { ProjectStructure } from './sections/ProjectStructure' +import { FeaturesKanban } from './sections/FeaturesKanban' +import { AgentSystem } from './sections/AgentSystem' +import { SettingsConfig } from './sections/SettingsConfig' +import { DeveloperTools } from './sections/DeveloperTools' +import { AIAssistant } from './sections/AIAssistant' +import { Scheduling } from './sections/Scheduling' +import { AppearanceThemes } from './sections/AppearanceThemes' +import { Security } from './sections/Security' +import { AdvancedConfig } from './sections/AdvancedConfig' +import { FAQ } from './sections/FAQ' + +interface DocsContentProps { + activeSectionId: string | null + onSectionVisible: (id: string) => void +} + +/** + * Maps each section id from docsData to its corresponding React component. + * Order matches DOC_SECTIONS so we can iterate safely. + */ +const SECTION_COMPONENTS: Record = { + 'getting-started': GettingStarted, + 'app-spec-setup': AppSpecSetup, + 'project-structure': ProjectStructure, + 'features-kanban': FeaturesKanban, + 'agent-system': AgentSystem, + 'settings-config': SettingsConfig, + 'developer-tools': DeveloperTools, + 'ai-assistant': AIAssistant, + scheduling: Scheduling, + 'appearance-themes': AppearanceThemes, + security: Security, + 'advanced-config': AdvancedConfig, + faq: FAQ, +} + +export function DocsContent({ onSectionVisible }: DocsContentProps) { + const containerRef = useRef(null) + // Store refs to each section heading element so the observer can watch them + const headingRefs = useRef>(new Map()) + + // Stable callback ref setter -- avoids recreating refs on every render + const setHeadingRef = useCallback((id: string, element: HTMLElement | null) => { + if (element) { + headingRefs.current.set(id, element) + } else { + headingRefs.current.delete(id) + } + }, []) + + // IntersectionObserver: track which section heading is at or near the top of the viewport + useEffect(() => { + const headings = headingRefs.current + if (headings.size === 0) return + + // rootMargin: trigger when a heading enters the top 20% of the viewport. + // This ensures the sidebar updates *before* the user scrolls past the heading. + const observer = new IntersectionObserver( + (entries) => { + // Find the topmost visible heading -- the one closest to the top of the viewport + const visible = entries + .filter((entry) => entry.isIntersecting) + .sort((a, b) => a.boundingClientRect.top - b.boundingClientRect.top) + + if (visible.length > 0) { + const topEntry = visible[0] + const sectionId = topEntry.target.getAttribute('data-section-id') + if (sectionId) { + onSectionVisible(sectionId) + } + } + }, + { + // Observe from the very top of the viewport down to -60% from the bottom, + // so headings are detected while in the upper portion of the screen. + rootMargin: '0px 0px -60% 0px', + threshold: 0, + }, + ) + + headings.forEach((element) => observer.observe(element)) + + return () => observer.disconnect() + }, [onSectionVisible]) + + return ( +
+ {DOC_SECTIONS.map((section) => { + const SectionComponent = SECTION_COMPONENTS[section.id] + if (!SectionComponent) return null + + const Icon = section.icon + + return ( +
+ {/* Section heading with anchor */} +

setHeadingRef(section.id, el)} + data-section-id={section.id} + className="font-display text-2xl font-bold tracking-tight mb-6 flex items-center gap-3 + text-foreground border-b-2 border-border pb-3" + > + + {section.title} +

+ + {/* Section body */} + +
+ ) + })} +
+ ) +} diff --git a/ui/src/components/docs/DocsPage.tsx b/ui/src/components/docs/DocsPage.tsx new file mode 100644 index 0000000..25288cc --- /dev/null +++ b/ui/src/components/docs/DocsPage.tsx @@ -0,0 +1,215 @@ +/** + * DocsPage Component + * + * Main layout for the documentation route (#/docs). + * Full-page layout with a sticky header, collapsible sidebar on the left, + * and scrollable content area on the right. + * + * Mobile-responsive: sidebar collapses behind a hamburger menu that + * opens as an overlay. + */ + +import { useState, useEffect, useCallback } from 'react' +import { ArrowLeft, Menu, X, Moon, Sun } from 'lucide-react' +import { useHashRoute } from '../../hooks/useHashRoute' +import { useTheme } from '../../hooks/useTheme' +import { ThemeSelector } from '../ThemeSelector' +import { Button } from '@/components/ui/button' +import { Badge } from '@/components/ui/badge' +import { DocsSidebar } from './DocsSidebar' +import { DocsSearch } from './DocsSearch' +import { DocsContent } from './DocsContent' + +export function DocsPage() { + const [activeSectionId, setActiveSectionId] = useState(null) + const [searchQuery, setSearchQuery] = useState('') + const [mobileSidebarOpen, setMobileSidebarOpen] = useState(false) + + const { section: initialSection } = useHashRoute() + const { theme, setTheme, darkMode, toggleDarkMode, themes } = useTheme() + + // On mount, if the hash includes a section id (e.g. #/docs/getting-started), + // scroll to it and set it as active + useEffect(() => { + if (initialSection) { + setActiveSectionId(initialSection) + // Delay scroll slightly so the DOM is rendered + requestAnimationFrame(() => { + const element = document.getElementById(initialSection) + if (element) { + element.scrollIntoView({ behavior: 'smooth', block: 'start' }) + } + }) + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) // Run only on mount + + // When a sidebar item is clicked, scroll the corresponding element into view + const handleSectionClick = useCallback((id: string) => { + setActiveSectionId(id) + + // Update hash for linkability (without triggering a route change) + history.replaceState(null, '', `#/docs/${id}`) + + const element = document.getElementById(id) + if (element) { + element.scrollIntoView({ behavior: 'smooth', block: 'start' }) + } + }, []) + + // Called by DocsContent's IntersectionObserver when a heading scrolls into view + const handleSectionVisible = useCallback((id: string) => { + setActiveSectionId(id) + }, []) + + // Close mobile sidebar when pressing Escape + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if (e.key === 'Escape' && mobileSidebarOpen) { + setMobileSidebarOpen(false) + } + } + + window.addEventListener('keydown', handleKeyDown) + return () => window.removeEventListener('keydown', handleKeyDown) + }, [mobileSidebarOpen]) + + // Prevent body scroll when mobile sidebar overlay is open + useEffect(() => { + if (mobileSidebarOpen) { + document.body.style.overflow = 'hidden' + } else { + document.body.style.overflow = '' + } + return () => { + document.body.style.overflow = '' + } + }, [mobileSidebarOpen]) + + return ( +
+ {/* Sticky header */} +
+
+
+ {/* Left side: hamburger (mobile) + title + badge */} +
+ {/* Mobile hamburger button -- only visible below lg breakpoint */} + + + + AutoCoder + + + + Documentation + +
+ + {/* Right side: theme controls + back button */} +
+ + + + + +
+
+
+
+ + {/* Body: sidebar + content */} +
+ {/* ---------------------------------------------------------------- + Desktop sidebar -- visible at lg and above + Fixed width, sticky below the header, independently scrollable + ---------------------------------------------------------------- */} + + + {/* ---------------------------------------------------------------- + Mobile sidebar overlay -- visible below lg breakpoint + ---------------------------------------------------------------- */} + {mobileSidebarOpen && ( + <> + {/* Backdrop */} +
setMobileSidebarOpen(false)} + aria-hidden="true" + /> + + {/* Sidebar panel */} + + + )} + + {/* ---------------------------------------------------------------- + Content area -- fills remaining space, scrollable + ---------------------------------------------------------------- */} +
+
+ +
+
+
+
+ ) +} diff --git a/ui/src/components/docs/DocsSearch.tsx b/ui/src/components/docs/DocsSearch.tsx new file mode 100644 index 0000000..896f3cd --- /dev/null +++ b/ui/src/components/docs/DocsSearch.tsx @@ -0,0 +1,78 @@ +/** + * DocsSearch Component + * + * Search input for the documentation sidebar. + * Supports Ctrl/Cmd+K keyboard shortcut to focus, + * and shows a keyboard hint when the input is empty. + */ + +import { useRef, useEffect } from 'react' +import { Search, X } from 'lucide-react' + +interface DocsSearchProps { + value: string + onChange: (value: string) => void +} + +export function DocsSearch({ value, onChange }: DocsSearchProps) { + const inputRef = useRef(null) + + // Global keyboard shortcut: Ctrl/Cmd+K focuses the search input + useEffect(() => { + const handleKeyDown = (e: KeyboardEvent) => { + if ((e.ctrlKey || e.metaKey) && e.key === 'k') { + e.preventDefault() + inputRef.current?.focus() + } + } + + window.addEventListener('keydown', handleKeyDown) + return () => window.removeEventListener('keydown', handleKeyDown) + }, []) + + return ( +
+ {/* Search icon */} + + + onChange(e.target.value)} + placeholder="Search docs..." + className="w-full pl-9 pr-16 py-2 text-sm bg-muted border border-border rounded-lg + text-foreground placeholder:text-muted-foreground + focus:outline-none focus:ring-2 focus:ring-ring/50 focus:border-ring + transition-colors" + /> + + {/* Right side: clear button when has value, otherwise Ctrl+K hint */} + {value ? ( + + ) : ( + + Ctrl+K + + )} +
+ ) +} diff --git a/ui/src/components/docs/DocsSidebar.tsx b/ui/src/components/docs/DocsSidebar.tsx new file mode 100644 index 0000000..4be7576 --- /dev/null +++ b/ui/src/components/docs/DocsSidebar.tsx @@ -0,0 +1,189 @@ +/** + * DocsSidebar Component + * + * Left sidebar navigation for the documentation page. + * Lists all sections from docsData with expandable subsections. + * Supports search filtering with auto-expansion of matching sections. + */ + +import { useState, useMemo } from 'react' +import { ChevronRight } from 'lucide-react' +import { DOC_SECTIONS, type DocSection } from './docsData' + +interface DocsSidebarProps { + activeSectionId: string | null + onSectionClick: (id: string) => void + searchQuery: string + onMobileClose?: () => void +} + +export function DocsSidebar({ + activeSectionId, + onSectionClick, + searchQuery, + onMobileClose, +}: DocsSidebarProps) { + // Track which top-level sections are manually expanded by the user + const [expandedSections, setExpandedSections] = useState>(() => { + // Start with the first section expanded so the sidebar is not fully collapsed + const initial = new Set() + if (DOC_SECTIONS.length > 0) { + initial.add(DOC_SECTIONS[0].id) + } + return initial + }) + + const normalizedQuery = searchQuery.trim().toLowerCase() + + // Filter sections based on search query, matching against section title, + // subsection titles, and keywords + const filteredSections = useMemo(() => { + if (!normalizedQuery) { + return DOC_SECTIONS + } + + return DOC_SECTIONS.filter((section) => { + // Check section title + if (section.title.toLowerCase().includes(normalizedQuery)) return true + + // Check keywords + if (section.keywords.some((kw) => kw.toLowerCase().includes(normalizedQuery))) return true + + // Check subsection titles + if (section.subsections.some((sub) => sub.title.toLowerCase().includes(normalizedQuery))) { + return true + } + + return false + }) + }, [normalizedQuery]) + + // Determine which sections should appear expanded: + // - When searching: auto-expand all matching sections + // - Otherwise: use manual expanded state, plus expand whichever section contains the active item + const isSectionExpanded = (sectionId: string): boolean => { + if (normalizedQuery) return true + + if (expandedSections.has(sectionId)) return true + + // Also expand the section that contains the currently active subsection + if (activeSectionId) { + const section = DOC_SECTIONS.find((s) => s.id === sectionId) + if (section) { + if (section.id === activeSectionId) return true + if (section.subsections.some((sub) => sub.id === activeSectionId)) return true + } + } + + return false + } + + const toggleSection = (sectionId: string) => { + setExpandedSections((prev) => { + const next = new Set(prev) + if (next.has(sectionId)) { + next.delete(sectionId) + } else { + next.add(sectionId) + } + return next + }) + } + + /** + * Checks whether a given id (section or subsection) is the currently active item. + * Active items get a highlighted visual treatment. + */ + const isActive = (id: string): boolean => activeSectionId === id + + /** + * Checks whether a section contains the active subsection. + * Used to highlight parent sections in a muted way. + */ + const sectionContainsActive = (section: DocSection): boolean => { + if (!activeSectionId) return false + return section.subsections.some((sub) => sub.id === activeSectionId) + } + + const handleItemClick = (id: string) => { + onSectionClick(id) + // On mobile, close the sidebar after navigation + onMobileClose?.() + } + + return ( + + ) +} diff --git a/ui/src/components/docs/docsData.ts b/ui/src/components/docs/docsData.ts new file mode 100644 index 0000000..d7b2875 --- /dev/null +++ b/ui/src/components/docs/docsData.ts @@ -0,0 +1,222 @@ +import { + Rocket, + FileText, + FolderTree, + LayoutGrid, + Bot, + Settings, + Terminal, + MessageSquare, + Clock, + Palette, + Shield, + Wrench, + HelpCircle, + type LucideIcon, +} from 'lucide-react' + +export interface DocSubsection { + id: string + title: string +} + +export interface DocSection { + id: string + title: string + icon: LucideIcon + subsections: DocSubsection[] + keywords: string[] +} + +export const DOC_SECTIONS: DocSection[] = [ + { + id: 'getting-started', + title: 'Getting Started', + icon: Rocket, + subsections: [ + { id: 'what-is-autocoder', title: 'What is AutoCoder?' }, + { id: 'quick-start', title: 'Quick Start' }, + { id: 'creating-a-project', title: 'Creating a New Project' }, + { id: 'existing-project', title: 'Adding to an Existing Project' }, + { id: 'system-requirements', title: 'System Requirements' }, + ], + keywords: ['install', 'setup', 'start', 'begin', 'new', 'requirements', 'prerequisites'], + }, + { + id: 'app-spec-setup', + title: 'App Spec & Project Setup', + icon: FileText, + subsections: [ + { id: 'what-is-app-spec', title: 'What is an App Spec?' }, + { id: 'creating-spec-with-claude', title: 'Creating a Spec with Claude' }, + { id: 'writing-spec-manually', title: 'Writing a Spec Manually' }, + { id: 'initializer-agent', title: 'The Initializer Agent' }, + { id: 'starting-after-spec', title: 'Starting After Spec Creation' }, + ], + keywords: ['spec', 'specification', 'xml', 'app_spec', 'initializer', 'prompt', 'template'], + }, + { + id: 'project-structure', + title: 'Target Project Structure', + icon: FolderTree, + subsections: [ + { id: 'autocoder-directory', title: '.autocoder/ Directory Layout' }, + { id: 'features-db', title: 'Features Database' }, + { id: 'prompts-directory', title: 'Prompts Directory' }, + { id: 'allowed-commands-yaml', title: 'Allowed Commands Config' }, + { id: 'claude-md', title: 'CLAUDE.md Convention' }, + { id: 'legacy-migration', title: 'Legacy Layout Migration' }, + { id: 'claude-inheritance', title: 'Claude Inheritance' }, + ], + keywords: ['folder', 'directory', 'structure', 'layout', 'files', 'database', 'sqlite', 'migration'], + }, + { + id: 'features-kanban', + title: 'Features & Kanban Board', + icon: LayoutGrid, + subsections: [ + { id: 'kanban-overview', title: 'Kanban Board Overview' }, + { id: 'feature-cards', title: 'Feature Cards' }, + { id: 'dependency-graph', title: 'Dependency Graph View' }, + { id: 'adding-features', title: 'Adding Features' }, + { id: 'editing-features', title: 'Editing & Deleting Features' }, + { id: 'feature-dependencies', title: 'Feature Dependencies' }, + { id: 'expanding-with-ai', title: 'Expanding Project with AI' }, + { id: 'feature-priority', title: 'Priority & Ordering' }, + ], + keywords: ['kanban', 'board', 'feature', 'card', 'dependency', 'graph', 'priority', 'pending', 'progress', 'done'], + }, + { + id: 'agent-system', + title: 'Agent System', + icon: Bot, + subsections: [ + { id: 'maestro-orchestrator', title: 'Maestro: The Orchestrator' }, + { id: 'coding-agents', title: 'Coding Agents' }, + { id: 'testing-agents', title: 'Testing Agents' }, + { id: 'agent-lifecycle', title: 'Agent Lifecycle' }, + { id: 'concurrency', title: 'Concurrency Control' }, + { id: 'mission-control', title: 'Agent Mission Control' }, + { id: 'agent-mascots', title: 'Agent Mascots & States' }, + { id: 'agent-logs', title: 'Viewing Agent Logs' }, + { id: 'process-limits', title: 'Process Limits' }, + ], + keywords: ['agent', 'maestro', 'orchestrator', 'coding', 'testing', 'parallel', 'concurrency', 'mascot', 'spark', 'fizz', 'octo', 'batch'], + }, + { + id: 'settings-config', + title: 'Settings & Configuration', + icon: Settings, + subsections: [ + { id: 'opening-settings', title: 'Opening Settings' }, + { id: 'yolo-mode', title: 'YOLO Mode' }, + { id: 'headless-browser', title: 'Headless Browser' }, + { id: 'model-selection', title: 'Model Selection' }, + { id: 'regression-agents', title: 'Regression Agents' }, + { id: 'features-per-agent', title: 'Features per Agent (Batch Size)' }, + { id: 'concurrency-setting', title: 'Concurrency' }, + { id: 'settings-persistence', title: 'How Settings are Persisted' }, + ], + keywords: ['settings', 'config', 'yolo', 'headless', 'model', 'opus', 'sonnet', 'haiku', 'batch', 'regression'], + }, + { + id: 'developer-tools', + title: 'Developer Tools', + icon: Terminal, + subsections: [ + { id: 'debug-panel', title: 'Debug Panel' }, + { id: 'agent-logs-tab', title: 'Agent Logs Tab' }, + { id: 'dev-server-logs', title: 'Dev Server Logs Tab' }, + { id: 'terminal', title: 'Terminal' }, + { id: 'dev-server-control', title: 'Dev Server Control' }, + { id: 'per-agent-logs', title: 'Per-Agent Logs' }, + ], + keywords: ['debug', 'terminal', 'logs', 'dev server', 'console', 'xterm', 'shell'], + }, + { + id: 'ai-assistant', + title: 'AI Assistant', + icon: MessageSquare, + subsections: [ + { id: 'what-is-assistant', title: 'What is the Assistant?' }, + { id: 'opening-assistant', title: 'Opening the Assistant' }, + { id: 'assistant-capabilities', title: 'What It Can Do' }, + { id: 'assistant-limitations', title: 'What It Cannot Do' }, + { id: 'conversation-history', title: 'Conversation History' }, + ], + keywords: ['assistant', 'ai', 'chat', 'help', 'question', 'conversation'], + }, + { + id: 'scheduling', + title: 'Scheduling', + icon: Clock, + subsections: [ + { id: 'what-scheduling-does', title: 'What Scheduling Does' }, + { id: 'creating-schedule', title: 'Creating a Schedule' }, + { id: 'schedule-settings', title: 'Schedule Settings' }, + { id: 'schedule-overrides', title: 'Schedule Overrides' }, + { id: 'crash-recovery', title: 'Crash Recovery' }, + ], + keywords: ['schedule', 'timer', 'automated', 'cron', 'run', 'recurring', 'utc'], + }, + { + id: 'appearance-themes', + title: 'Appearance & Themes', + icon: Palette, + subsections: [ + { id: 'themes-overview', title: 'Themes Overview' }, + { id: 'dark-light-mode', title: 'Dark & Light Mode' }, + { id: 'theme-selector', title: 'Theme Selector' }, + { id: 'keyboard-shortcuts', title: 'Keyboard Shortcuts' }, + ], + keywords: ['theme', 'dark', 'light', 'color', 'appearance', 'twitter', 'claude', 'neo', 'brutalism', 'retro', 'aurora', 'business', 'keyboard', 'shortcut'], + }, + { + id: 'security', + title: 'Security', + icon: Shield, + subsections: [ + { id: 'command-validation', title: 'Command Validation Overview' }, + { id: 'command-hierarchy', title: 'Command Hierarchy' }, + { id: 'hardcoded-blocklist', title: 'Hardcoded Blocklist' }, + { id: 'global-allowlist', title: 'Global Allowlist' }, + { id: 'project-allowlist', title: 'Per-Project Allowed Commands' }, + { id: 'org-config', title: 'Organization Configuration' }, + { id: 'extra-read-paths', title: 'Extra Read Paths' }, + { id: 'filesystem-sandboxing', title: 'Filesystem Sandboxing' }, + ], + keywords: ['security', 'sandbox', 'allowlist', 'blocklist', 'command', 'bash', 'permission', 'filesystem'], + }, + { + id: 'advanced-config', + title: 'Advanced Configuration', + icon: Wrench, + subsections: [ + { id: 'vertex-ai', title: 'Vertex AI Setup' }, + { id: 'ollama', title: 'Ollama Local Models' }, + { id: 'env-variables', title: 'Environment Variables' }, + { id: 'cli-arguments', title: 'CLI Arguments' }, + { id: 'webhooks', title: 'Webhook Support' }, + { id: 'project-registry', title: 'Project Registry' }, + ], + keywords: ['vertex', 'gcloud', 'ollama', 'local', 'env', 'environment', 'cli', 'webhook', 'n8n', 'registry', 'api'], + }, + { + id: 'faq', + title: 'FAQ & Troubleshooting', + icon: HelpCircle, + subsections: [ + { id: 'faq-new-project', title: 'Starting a New Project' }, + { id: 'faq-existing-project', title: 'Adding to Existing Project' }, + { id: 'faq-agent-crash', title: 'Agent Crashes' }, + { id: 'faq-custom-commands', title: 'Custom Bash Commands' }, + { id: 'faq-blocked-features', title: 'Blocked Features' }, + { id: 'faq-parallel', title: 'Running in Parallel' }, + { id: 'faq-local-model', title: 'Using Local Models' }, + { id: 'faq-reset', title: 'Resetting a Project' }, + { id: 'faq-agent-types', title: 'Coding vs Testing Agents' }, + { id: 'faq-real-time', title: 'Monitoring in Real Time' }, + ], + keywords: ['faq', 'troubleshoot', 'help', 'problem', 'issue', 'fix', 'error', 'stuck', 'reset', 'crash'], + }, +] diff --git a/ui/src/components/docs/sections/AIAssistant.tsx b/ui/src/components/docs/sections/AIAssistant.tsx new file mode 100644 index 0000000..cceb297 --- /dev/null +++ b/ui/src/components/docs/sections/AIAssistant.tsx @@ -0,0 +1,75 @@ +/** + * AIAssistant Documentation Section + * + * Covers the project assistant: what it is, how to open it, + * its capabilities and limitations, and conversation history. + */ + +import { Badge } from '@/components/ui/badge' + +export function AIAssistant() { + return ( +
+ {/* What is the Assistant? */} +

+ What is the Assistant? +

+

+ The AI Assistant is a read-only project helper that can answer questions about your project, search + code, view progress, and help you understand what’s happening — without making any changes. +

+ + {/* Opening the Assistant */} +

+ Opening the Assistant +

+
    +
  • + Press A to toggle the assistant panel +
  • +
  • Or click the floating action button (chat bubble) in the bottom-right corner
  • +
  • The panel slides in from the right side
  • +
+ + {/* What It Can Do */} +

+ What It Can Do +

+
    +
  • Read and search your project’s source code
  • +
  • Answer questions about code architecture and implementation
  • +
  • View feature progress and status
  • +
  • Create new features based on your description
  • +
  • Explain what agents have done or are currently doing
  • +
  • Help debug issues by analyzing code and logs
  • +
+ + {/* What It Cannot Do */} +

+ What It Cannot Do +

+
    +
  • Modify files (read-only access)
  • +
  • Run bash commands
  • +
  • Mark features as passing/failing
  • +
  • Start or stop agents
  • +
  • Access external APIs or the internet
  • +
+
+ This is a deliberate security design — the assistant is a safe way to interact with your project + without risk of unintended changes. +
+ + {/* Conversation History */} +

+ Conversation History +

+
    +
  • Conversations are stored per-project in SQLite database
  • +
  • Multiple conversations supported — start new ones as needed
  • +
  • Switch between conversations using the conversation selector
  • +
  • History persists across browser sessions
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/AdvancedConfig.tsx b/ui/src/components/docs/sections/AdvancedConfig.tsx new file mode 100644 index 0000000..2ed1584 --- /dev/null +++ b/ui/src/components/docs/sections/AdvancedConfig.tsx @@ -0,0 +1,220 @@ +/** + * AdvancedConfig Documentation Section + * + * Covers Vertex AI setup, Ollama local models, environment variables, + * CLI arguments, webhook support, and the project registry. + */ + +import { Badge } from '@/components/ui/badge' + +/** Environment variable descriptor for the reference table. */ +interface EnvVar { + name: string + description: string +} + +const ENV_VARS: EnvVar[] = [ + { name: 'CLAUDE_CODE_USE_VERTEX', description: 'Enable Vertex AI (1)' }, + { name: 'CLOUD_ML_REGION', description: 'GCP region' }, + { name: 'ANTHROPIC_VERTEX_PROJECT_ID', description: 'GCP project ID' }, + { name: 'ANTHROPIC_BASE_URL', description: 'Custom API base URL (for Ollama)' }, + { name: 'ANTHROPIC_AUTH_TOKEN', description: 'API auth token' }, + { name: 'API_TIMEOUT_MS', description: 'API timeout in milliseconds' }, + { name: 'EXTRA_READ_PATHS', description: 'Comma-separated extra read directories' }, + { name: 'ANTHROPIC_DEFAULT_OPUS_MODEL', description: 'Override Opus model name' }, + { name: 'ANTHROPIC_DEFAULT_SONNET_MODEL', description: 'Override Sonnet model name' }, + { name: 'ANTHROPIC_DEFAULT_HAIKU_MODEL', description: 'Override Haiku model name' }, +] + +/** CLI argument descriptor for the reference table. */ +interface CliArg { + name: string + description: string +} + +const CLI_ARGS: CliArg[] = [ + { name: '--project-dir', description: 'Project directory path or registered name' }, + { name: '--yolo', description: 'Enable YOLO mode' }, + { name: '--parallel', description: 'Enable parallel mode' }, + { name: '--max-concurrency N', description: 'Max concurrent agents (1-5)' }, + { name: '--batch-size N', description: 'Features per coding agent (1-3)' }, + { name: '--batch-features 1,2,3', description: 'Specific feature IDs to implement' }, + { name: '--testing-batch-size N', description: 'Features per testing batch (1-5)' }, + { name: '--testing-batch-features 1,2,3', description: 'Specific testing feature IDs' }, +] + +export function AdvancedConfig() { + return ( +
+ {/* Vertex AI Setup */} +

+ Vertex AI Setup +

+

+ Run coding agents via Google Cloud Vertex AI: +

+
    +
  1. + Install and authenticate the gcloud CLI:{' '} + + gcloud auth application-default login + +
  2. +
  3. + Configure your{' '} + .env file: +
  4. +
+
+
{`CLAUDE_CODE_USE_VERTEX=1
+CLOUD_ML_REGION=us-east5
+ANTHROPIC_VERTEX_PROJECT_ID=your-gcp-project-id
+ANTHROPIC_DEFAULT_OPUS_MODEL=claude-opus-4-5@20251101
+ANTHROPIC_DEFAULT_SONNET_MODEL=claude-sonnet-4-5@20250929
+ANTHROPIC_DEFAULT_HAIKU_MODEL=claude-3-5-haiku@20241022`}
+
+
+ Use @{' '} + instead of -{' '} + in model names for Vertex AI. +
+ + {/* Ollama Local Models */} +

+ Ollama Local Models +

+

+ Run coding agents using local models via Ollama v0.14.0+: +

+
    +
  1. + Install Ollama from{' '} + + ollama.com + +
  2. +
  3. + Start Ollama:{' '} + ollama serve +
  4. +
  5. + Pull a coding model:{' '} + ollama pull qwen3-coder +
  6. +
  7. + Configure your{' '} + .env: +
  8. +
+
+
{`ANTHROPIC_BASE_URL=http://localhost:11434
+ANTHROPIC_AUTH_TOKEN=ollama
+API_TIMEOUT_MS=3000000
+ANTHROPIC_DEFAULT_SONNET_MODEL=qwen3-coder`}
+
+

+ Recommended models:{' '} + qwen3-coder{' '} + deepseek-coder-v2{' '} + codellama +

+

+ Limitations: Smaller context windows than Claude + (model-dependent), extended context beta disabled (not supported by Ollama), and performance + depends on local hardware (GPU recommended). +

+ + {/* Environment Variables */} +

+ Environment Variables +

+

+ Key environment variables for configuring AutoCoder: +

+ + + + + + + + + {ENV_VARS.map((v) => ( + + + + + ))} + +
+ Variable + + Description +
+ {v.name} + {v.description}
+ + {/* CLI Arguments */} +

+ CLI Arguments +

+

+ Command-line arguments for{' '} + + autonomous_agent_demo.py + + : +

+ + + + + + + + + {CLI_ARGS.map((arg) => ( + + + + + ))} + +
+ Argument + + Description +
+ {arg.name} + {arg.description}
+ + {/* Webhook Support */} +

+ Webhook Support +

+
    +
  • AutoCoder can send webhook notifications on feature completion
  • +
  • Compatible with N8N and similar automation tools
  • +
  • Configure the webhook URL in project settings
  • +
  • + Payload includes: feature name, status, and project info +
  • +
+ + {/* Project Registry */} +

+ Project Registry +

+
    +
  • + All projects are registered in{' '} + ~/.autocoder/registry.db{' '} + (SQLite) +
  • +
  • Maps project names to filesystem paths
  • +
  • Uses POSIX path format (forward slashes) for cross-platform compatibility
  • +
  • SQLAlchemy ORM with SQLite's built-in transaction handling
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/AgentSystem.tsx b/ui/src/components/docs/sections/AgentSystem.tsx new file mode 100644 index 0000000..0edc2d3 --- /dev/null +++ b/ui/src/components/docs/sections/AgentSystem.tsx @@ -0,0 +1,280 @@ +/** + * AgentSystem Documentation Section + * + * Covers the orchestrator (Maestro), coding agents, testing agents, + * agent lifecycle, concurrency control, mission control dashboard, + * agent mascots and states, viewing logs, and process limits. + */ + +import { Badge } from '@/components/ui/badge' + +export function AgentSystem() { + return ( +
+ {/* Maestro: The Orchestrator */} +

+ Maestro: The Orchestrator +

+

+ Maestro is the central orchestrator that coordinates all agents. It acts as the conductor, + ensuring features are implemented efficiently and in the correct order. +

+
    +
  • Manages the full lifecycle of coding and testing agents
  • +
  • Schedules which features to work on based on dependencies and priority
  • +
  • Monitors agent health and restarts crashed agents automatically
  • +
  • Reports status to the UI in real time via WebSocket
  • +
+ + {/* Coding Agents */} +

+ Coding Agents +

+
    +
  • Implement features one at a time, or in batches of 1–3
  • +
  • + Claim features atomically via the{' '} + + feature_claim_and_get + {' '} + MCP tool — no two agents work on the same feature +
  • +
  • Run in isolated environments with their own browser context
  • +
  • + Use the Claude Code SDK with project-specific tools and{' '} + CLAUDE.md +
  • +
+ + {/* Testing Agents */} +

+ Testing Agents +

+
    +
  • Run regression tests after features are implemented
  • +
  • Verify that new code does not break existing features
  • +
  • Configurable ratio: 0–3 testing agents per coding agent
  • +
  • Can batch-test multiple features per session (1–5)
  • +
+ + {/* Agent Lifecycle */} +

+ Agent Lifecycle +

+

+ Agents are controlled through the UI or CLI. The lifecycle states are: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Action + + Behavior +
Start + Click the Play button or run the CLI command +
Stop + Gracefully terminates all running agents +
Pause + Temporarily halts work (agents finish their current task first) +
Resume + Continues from where the agents were paused +
+

+ Agents auto-continue between sessions with a 3-second delay, so they keep working until + all features are complete or they are explicitly stopped. +

+ + {/* Concurrency Control */} +

+ Concurrency Control +

+
    +
  • + A slider in the agent control bar sets the number of concurrent coding agents + (1–5) +
  • +
  • + More agents means faster progress, but also higher API usage +
  • +
  • Each agent runs as an independent subprocess
  • +
  • + Feature claiming is atomic — no two agents will ever work on the same feature + simultaneously +
  • +
+ + {/* Agent Mission Control */} +

+ Agent Mission Control +

+

+ The Mission Control dashboard provides a real-time overview of all active agents: +

+
    +
  • Active agent cards with mascot icons and current status
  • +
  • The feature each agent is currently working on
  • +
  • Agent state indicators (thinking, working, testing, etc.)
  • +
  • Orchestrator status and a recent activity feed
  • +
+ + {/* Agent Mascots & States */} +

+ Agent Mascots & States +

+

+ Each agent is assigned a unique mascot for easy identification:{' '} + Spark,{' '} + Fizz,{' '} + Octo,{' '} + Hoot,{' '} + Buzz, and more. Agent states include: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ State + + Animation + + Description +
+ Thinking + BouncingAgent is planning its approach
+ Working + ShakeActively writing code
+ Testing + RotatingRunning tests
+ Success + CelebrationFeature completed
+ Error + Red shakeEncountered an issue
+ Struggling + Concerned expressionMultiple consecutive failures
+ + {/* Viewing Agent Logs */} +

+ Viewing Agent Logs +

+
    +
  • Click any agent card in Mission Control to see its log output
  • +
  • Logs are color-coded by level (info, warning, error)
  • +
  • Output streams in real time via WebSocket
  • +
  • Each agent's logs are isolated and filterable
  • +
+ + {/* Process Limits */} +

+ Process Limits +

+

+ The orchestrator enforces strict bounds on concurrent processes to prevent resource + exhaustion: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Limit + + Value +
+ + MAX_PARALLEL_AGENTS + + 5 (maximum concurrent coding agents)
+ + MAX_TOTAL_AGENTS + + + 10 (hard limit on coding + testing combined) +
Testing agents + Capped at the same count as coding agents +
Total Python processes + Never exceeds 11 (1 orchestrator + 5 coding + 5 testing) +
+
+ ) +} diff --git a/ui/src/components/docs/sections/AppSpecSetup.tsx b/ui/src/components/docs/sections/AppSpecSetup.tsx new file mode 100644 index 0000000..a4084a9 --- /dev/null +++ b/ui/src/components/docs/sections/AppSpecSetup.tsx @@ -0,0 +1,130 @@ +/** + * AppSpecSetup Documentation Section + * + * Explains what an app spec is, how to create one interactively + * or manually, the initializer agent, and starting after spec creation. + */ + +export function AppSpecSetup() { + return ( +
+ {/* What is an App Spec? */} +

+ What is an App Spec? +

+

+ The app spec is an XML document that describes the application to be built. It lives at{' '} + + .autocoder/prompts/app_spec.txt + {' '} + and tells the initializer agent what features to create. The spec defines your app's name, + description, tech stack, and the features that should be implemented. +

+
+
{`
+  My App
+  A task management app
+  
+    User authentication with login/signup
+    Task CRUD with categories
+  
+`}
+
+ + {/* Creating a Spec with Claude */} +

+ Creating a Spec with Claude +

+
    +
  • + In the UI, select your project and click{' '} + Create Spec +
  • +
  • + An interactive chat with Claude helps you define your app — it asks about + your app's purpose, features, and tech stack +
  • +
  • The spec is generated and saved automatically
  • +
  • After creation, the initializer agent can be started immediately
  • +
+ + {/* Writing a Spec Manually */} +

+ Writing a Spec Manually +

+
    +
  • + Create{' '} + + .autocoder/prompts/app_spec.txt + {' '} + in your project directory +
  • +
  • + Use XML format with app name, description, tech stack, and a feature list +
  • +
  • + Be specific about each feature — the initializer creates test cases from these + descriptions +
  • +
  • + Include technical constraints where needed (e.g.,{' '} + + "use PostgreSQL" + + ,{' '} + + "React with TypeScript" + + ) +
  • +
+ + {/* The Initializer Agent */} +

+ The Initializer Agent +

+

+ The initializer agent is the first agent to run on a new project. It bridges the gap between + your spec and the coding agents that implement features. +

+
    +
  • Runs automatically on first agent start when no features exist in the database
  • +
  • Reads the app spec and creates features with descriptions, steps, and priorities
  • +
  • + Sets up feature dependencies (e.g., "auth must be done before user profile") +
  • +
  • + Creates the feature database at{' '} + + .autocoder/features.db + +
  • +
+ + {/* Starting After Spec Creation */} +

+ Starting After Spec Creation +

+

+ Once your spec is ready, you can kick off the agents: +

+
    +
  • + From the UI, click the Play button to start + the agent +
  • +
  • + Or run from the CLI: +
  • +
+
+
python autonomous_agent_demo.py --project-dir your-project
+
+

+ The initializer runs first to create features, then coding agents take over to implement + them. Progress is shown in real time on the Kanban board. +

+
+ ) +} diff --git a/ui/src/components/docs/sections/AppearanceThemes.tsx b/ui/src/components/docs/sections/AppearanceThemes.tsx new file mode 100644 index 0000000..d713983 --- /dev/null +++ b/ui/src/components/docs/sections/AppearanceThemes.tsx @@ -0,0 +1,185 @@ +/** + * AppearanceThemes Documentation Section + * + * Covers built-in themes with color previews, dark/light mode toggling, + * the theme selector dropdown, and global keyboard shortcuts. + */ + +import { Badge } from '@/components/ui/badge' + +/** Theme descriptor used to render the preview rows. */ +interface ThemePreview { + name: string + description: string + colors: { label: string; hex: string }[] +} + +const THEMES: ThemePreview[] = [ + { + name: 'Twitter', + description: 'Clean, modern blue design. Primary: blue, Background: white/dark gray.', + colors: [ + { label: 'Background', hex: '#ffffff' }, + { label: 'Primary', hex: '#4a9eff' }, + { label: 'Accent', hex: '#e8f4ff' }, + ], + }, + { + name: 'Claude', + description: "Warm beige/cream tones with orange accents. Inspired by Anthropic's Claude brand.", + colors: [ + { label: 'Background', hex: '#faf6f0' }, + { label: 'Primary', hex: '#c75b2a' }, + { label: 'Accent', hex: '#f5ede4' }, + ], + }, + { + name: 'Neo Brutalism', + description: 'Bold colors, hard shadows, no border radius. High contrast, expressive design.', + colors: [ + { label: 'Background', hex: '#ffffff' }, + { label: 'Primary', hex: '#ff4d00' }, + { label: 'Accent', hex: '#ffeb00' }, + ], + }, + { + name: 'Retro Arcade', + description: 'Vibrant pink and teal with pixel-art inspired styling.', + colors: [ + { label: 'Background', hex: '#f0e6d3' }, + { label: 'Primary', hex: '#e8457c' }, + { label: 'Accent', hex: '#4eb8a5' }, + ], + }, + { + name: 'Aurora', + description: 'Deep violet and luminous teal, inspired by the northern lights.', + colors: [ + { label: 'Background', hex: '#faf8ff' }, + { label: 'Primary', hex: '#8b5cf6' }, + { label: 'Accent', hex: '#2dd4bf' }, + ], + }, + { + name: 'Business', + description: 'Professional deep navy and gray monochrome palette for corporate use.', + colors: [ + { label: 'Background', hex: '#eaecef' }, + { label: 'Primary', hex: '#000e4e' }, + { label: 'Accent', hex: '#6b7280' }, + ], + }, +] + +/** Keyboard shortcut descriptor for the shortcuts table. */ +interface Shortcut { + key: string + action: string +} + +const SHORTCUTS: Shortcut[] = [ + { key: '?', action: 'Show keyboard shortcuts help' }, + { key: 'D', action: 'Toggle debug panel' }, + { key: 'T', action: 'Toggle terminal' }, + { key: 'G', action: 'Toggle Kanban/Graph view' }, + { key: 'N', action: 'Add new feature' }, + { key: 'E', action: 'Expand project with AI' }, + { key: 'A', action: 'Toggle AI assistant' }, + { key: ',', action: 'Open settings' }, + { key: 'R', action: 'Reset project' }, + { key: 'Escape', action: 'Close current modal' }, +] + +export function AppearanceThemes() { + return ( +
+ {/* Themes Overview */} +

+ Themes Overview +

+

+ AutoCoder comes with 6 built-in themes. Each theme provides a complete visual identity including + colors, accents, and dark mode variants. +

+
+ {THEMES.map((theme) => ( +
+ {/* Color swatches */} +
+ {theme.colors.map((color) => ( +
+ ))} +
+ {/* Description */} +
+ {theme.name} + {theme.name === 'Twitter' && ( + <> + {' '} + Default + + )} + — {theme.description} +
+
+ ))} +
+ + {/* Dark & Light Mode */} +

+ Dark & Light Mode +

+
    +
  • Toggle with the sun/moon icon in the header
  • +
  • All 6 themes have dedicated dark mode variants
  • +
  • + Preference is saved in browser{' '} + localStorage +
  • +
  • Dark mode affects all UI elements including the docs page
  • +
+ + {/* Theme Selector */} +

+ Theme Selector +

+
    +
  • Hover over the palette icon in the header to open the theme dropdown
  • +
  • Preview themes by hovering over each option (live preview)
  • +
  • Click to select — the change is applied instantly
  • +
  • Theme preference persists across sessions
  • +
+ + {/* Keyboard Shortcuts */} +

+ Keyboard Shortcuts +

+

+ Press ? anywhere in the UI to see the shortcuts help overlay. +

+ + + + + + + + + {SHORTCUTS.map((shortcut) => ( + + + + + ))} + +
KeyAction
+ {shortcut.key} + {shortcut.action}
+
+ ) +} diff --git a/ui/src/components/docs/sections/DeveloperTools.tsx b/ui/src/components/docs/sections/DeveloperTools.tsx new file mode 100644 index 0000000..06a5999 --- /dev/null +++ b/ui/src/components/docs/sections/DeveloperTools.tsx @@ -0,0 +1,104 @@ +/** + * DeveloperTools Documentation Section + * + * Covers the debug panel, agent logs tab, dev server logs, + * terminal, dev server control, and per-agent logs. + */ + +import { Badge } from '@/components/ui/badge' + +export function DeveloperTools() { + return ( +
+ {/* Debug Panel */} +

+ Debug Panel +

+
    +
  • + Press D to toggle the debug panel at the bottom of the screen +
  • +
  • Resizable by dragging the top edge
  • +
  • + Three tabs: Agent Logs,{' '} + Dev Server Logs, and{' '} + Terminal +
  • +
  • Shows real-time output from agents and dev server
  • +
+ + {/* Agent Logs Tab */} +

+ Agent Logs Tab +

+
    +
  • + Color-coded log levels:{' '} + Error,{' '} + Warning,{' '} + Info,{' '} + Debug,{' '} + Success +
  • +
  • Timestamps on each log entry
  • +
  • Auto-scrolls to latest entry
  • +
  • Clear button to reset log view
  • +
+ + {/* Dev Server Logs Tab */} +

+ Dev Server Logs Tab +

+
    +
  • + Shows stdout/stderr from the project’s dev server (e.g.,{' '} + npm run dev) +
  • +
  • Useful for seeing compilation errors, hot reload status
  • +
  • Clear button available
  • +
+ + {/* Terminal */} +

+ Terminal +

+
    +
  • + Press T to open terminal (opens debug panel on the terminal tab) +
  • +
  • Full xterm.js terminal emulator with WebSocket backend
  • +
  • Multi-tab support: create multiple terminal sessions
  • +
  • Rename tabs by double-clicking the tab title
  • +
  • Each tab runs an independent PTY (pseudo-terminal) session
  • +
  • Supports standard terminal features: colors, cursor movement, history
  • +
+ + {/* Dev Server Control */} +

+ Dev Server Control +

+
    +
  • Start/stop button in the header bar
  • +
  • + Auto-detects project type (Next.js, Vite, CRA, etc.) and runs the appropriate dev command +
  • +
  • Shows the dev server URL when running
  • +
  • Automatic crash detection and restart option
  • +
  • Dev server output piped to the Dev Server Logs tab
  • +
+ + {/* Per-Agent Logs */} +

+ Per-Agent Logs +

+
    +
  • In Agent Mission Control, click any agent card to see its individual logs
  • +
  • + Logs include: what feature the agent is working on, code changes, test results +
  • +
  • Separate logs for coding agents and testing agents
  • +
  • Real-time streaming — see agent output as it happens
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/FAQ.tsx b/ui/src/components/docs/sections/FAQ.tsx new file mode 100644 index 0000000..82b4f66 --- /dev/null +++ b/ui/src/components/docs/sections/FAQ.tsx @@ -0,0 +1,157 @@ +/** + * FAQ Documentation Section + * + * Covers frequently asked questions about project setup, agent behavior, + * customization, troubleshooting, and real-time monitoring. + */ + +export function FAQ() { + return ( +
+ {/* Starting a New Project */} +

+ Starting a New Project +

+

+ How do I use AutoCoder on a new project? +

+

+ From the UI, select "Create New Project" in the project dropdown. Choose a folder and + name. Then create an app spec using the interactive chat or write one manually. Click Start to run + the initializer agent, which creates features from your spec. Coding agents then implement features + automatically. +

+ + {/* Adding to Existing Project */} +

+ Adding to Existing Project +

+

+ How do I add AutoCoder to an existing project? +

+

+ Register the project folder through the UI project selector using "Add Existing". + AutoCoder creates a{' '} + .autocoder/ directory + alongside your existing code. Write an app spec describing what to build (new features), and the + agent works within your existing codebase. +

+ + {/* Agent Crashes */} +

+ Agent Crashes +

+

+ What happens if an agent crashes? +

+

+ The orchestrator (Maestro) automatically detects crashed agents and can restart them. Features + claimed by a crashed agent are released back to the pending queue. Scheduled runs use exponential + backoff with up to 3 retries. Check the agent logs in the debug panel for crash details. +

+ + {/* Custom Bash Commands */} +

+ Custom Bash Commands +

+

+ How do I customize which bash commands the agent can use? +

+

+ Create{' '} + + .autocoder/allowed_commands.yaml + {' '} + in your project with a list of allowed commands. Supports exact names, wildcards (e.g.,{' '} + swift*), and local + scripts. See the Security section for full details on the command hierarchy. +

+ + {/* Blocked Features */} +

+ Blocked Features +

+

+ Why are my features stuck in "blocked" status? +

+

+ Features with unmet dependencies show as blocked. Check the Dependency Graph view (press{' '} + G) to see which + features are waiting on others. A feature can only start when all its dependencies are marked as + "passing". Remove or reorder dependencies if needed. +

+ + {/* Running in Parallel */} +

+ Running in Parallel +

+

+ How do I run multiple agents in parallel? +

+

+ Use the concurrency slider in the agent control bar (1–5 agents) or pass{' '} + + --parallel --max-concurrency N + {' '} + on the CLI. Each agent claims features atomically, so there is no conflict. More agents means + faster progress but higher API cost. +

+ + {/* Using Local Models */} +

+ Using Local Models +

+

+ Can I use a local model instead of the Claude API? +

+

+ Yes, via Ollama v0.14.0+. Install Ollama, pull a coding model (e.g.,{' '} + qwen3-coder), and + configure your{' '} + .env to point to + localhost. See the Advanced Configuration section for full setup instructions. +

+ + {/* Resetting a Project */} +

+ Resetting a Project +

+

+ How do I reset a project and start over? +

+

+ Press R (when agents + are stopped) to open the Reset modal. Choose between: "Reset Features" (clears the + feature database, keeps the spec) or "Full Reset" (removes the spec too, starts fresh). + After a full reset, you will be prompted to create a new spec. +

+ + {/* Coding vs Testing Agents */} +

+ Coding vs Testing Agents +

+

+ What's the difference between coding and testing agents? +

+

+ Coding agents implement features — they write code, create files, and run feature-specific + tests. Testing agents run regression tests across completed features to ensure new code does not + break existing functionality. Configure the testing agent ratio (0–3) in settings. +

+ + {/* Monitoring in Real Time */} +

+ Monitoring in Real Time +

+

+ How do I view what an agent is doing in real time? +

+

+ Multiple ways: (1) Watch the Kanban board for feature status changes. (2) Open the debug panel + (D key) for live + agent logs. (3) Click agent cards in Mission Control for per-agent logs. (4) The progress bar + updates in real time via WebSocket. +

+
+ ) +} diff --git a/ui/src/components/docs/sections/FeaturesKanban.tsx b/ui/src/components/docs/sections/FeaturesKanban.tsx new file mode 100644 index 0000000..4076af8 --- /dev/null +++ b/ui/src/components/docs/sections/FeaturesKanban.tsx @@ -0,0 +1,182 @@ +/** + * FeaturesKanban Documentation Section + * + * Covers the Kanban board, feature cards, dependency graph view, + * adding/editing features, dependencies, expanding with AI, + * and priority ordering. + */ + +import { Badge } from '@/components/ui/badge' + +export function FeaturesKanban() { + return ( +
+ {/* Kanban Board Overview */} +

+ Kanban Board Overview +

+

+ The main view organizes features into three columns representing their current status: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Column + + Color + + Meaning +
Pending + Yellow + Waiting to be picked up
In Progress + Cyan + An agent is actively working on it
Done + Green + Implemented and passing
+

+ Each feature appears as a card showing its name, priority, and category. The board updates + in real time as agents work. +

+ + {/* Feature Cards */} +

+ Feature Cards +

+
    +
  • + Each card displays a priority badge (P1 through{' '} + P5), a category tag, and the feature name +
  • +
  • Status icons indicate the current state of the feature
  • +
  • Click a card to open the detail modal with the full description and test steps
  • +
  • + Cards in the "In Progress" column show which agent is currently working on them +
  • +
+ + {/* Dependency Graph View */} +

+ Dependency Graph View +

+

+ An alternative to the Kanban board that visualizes feature relationships as a directed graph. +

+
    +
  • + Press G to toggle between Kanban and Graph view +
  • +
  • Uses the dagre layout engine for automatic node positioning
  • +
  • + Nodes are colored by status — pending, in-progress, and done each have + distinct colors +
  • +
  • Arrows show dependency relationships between features
  • +
  • Click any node to open the feature detail modal
  • +
  • Supports both horizontal and vertical layout orientations
  • +
+ + {/* Adding Features */} +

+ Adding Features +

+
    +
  • + Press N to open the Add Feature form +
  • +
  • Fill in: name, description, category, and priority
  • +
  • Optionally define steps (test criteria the agent must pass to complete the feature)
  • +
  • New features are added to the Pending column immediately
  • +
+ + {/* Editing & Deleting Features */} +

+ Editing & Deleting Features +

+
    +
  • Click a feature card to open the detail modal
  • +
  • + Click Edit to modify the name, description, + category, priority, or steps +
  • +
  • + Delete removes the feature permanently +
  • +
  • + Skip moves a feature to the end of the queue + without deleting it +
  • +
+ + {/* Feature Dependencies */} +

+ Feature Dependencies +

+

+ Features can declare dependencies on other features, ensuring they are implemented in the + correct order. +

+
    +
  • Set dependencies in the feature edit modal
  • +
  • + Cycle detection prevents circular dependencies (uses Kahn's algorithm combined + with DFS) +
  • +
  • + Blocked features display a lock icon and cannot be claimed by agents until their + dependencies are met +
  • +
  • The Dependency Graph view makes these relationships easy to visualize
  • +
+ + {/* Expanding Project with AI */} +

+ Expanding Project with AI +

+
    +
  • + Press E to open the Expand Project modal +
  • +
  • Chat with Claude to describe the new features you want to add
  • +
  • Supports image attachments for UI mockups or design references
  • +
  • Claude creates properly structured features with appropriate dependencies
  • +
  • New features appear on the board immediately after creation
  • +
+ + {/* Priority & Ordering */} +

+ Priority & Ordering +

+
    +
  • + Features are ordered by priority: P1 is the highest + and P5 is the lowest +
  • +
  • Within the same priority level, features are ordered by creation time
  • +
  • Agents always pick up the highest-priority ready feature first
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/GettingStarted.tsx b/ui/src/components/docs/sections/GettingStarted.tsx new file mode 100644 index 0000000..1ee560b --- /dev/null +++ b/ui/src/components/docs/sections/GettingStarted.tsx @@ -0,0 +1,134 @@ +/** + * GettingStarted Documentation Section + * + * Covers what AutoCoder is, quick start commands, + * creating and adding projects, and system requirements. + */ + +import { Badge } from '@/components/ui/badge' + +export function GettingStarted() { + return ( +
+ {/* What is AutoCoder? */} +

+ What is AutoCoder? +

+

+ AutoCoder is an autonomous coding agent system that builds complete applications over multiple + sessions using a two-agent pattern: +

+
    +
  1. + Initializer Agent — reads your app spec + and creates features in a SQLite database +
  2. +
  3. + Coding Agent — implements features one by + one, marking each as passing when complete +
  4. +
+

+ It comes with a React-based UI for monitoring progress, managing features, and controlling agents + in real time. +

+ + {/* Quick Start */} +

+ Quick Start +

+

+ Launch AutoCoder with a single command. The CLI menu lets you create or select a project, + while the Web UI provides a full dashboard experience. +

+
+
{`# Windows
+start.bat          # CLI menu
+start_ui.bat       # Web UI
+
+# macOS/Linux
+./start.sh         # CLI menu
+./start_ui.sh      # Web UI`}
+
+ + {/* Creating a New Project */} +

+ Creating a New Project +

+
    +
  • + From the UI, click the project dropdown and select{' '} + Create New Project +
  • +
  • Enter a name and select or browse to a folder for the project
  • +
  • + Create an app spec interactively with Claude, or write one manually in XML format +
  • +
  • + The initializer agent reads your spec and creates features automatically +
  • +
+ + {/* Adding to an Existing Project */} +

+ Adding to an Existing Project +

+
    +
  • Register the project folder via the UI project selector
  • +
  • + AutoCoder creates a{' '} + .autocoder/{' '} + directory inside your project +
  • +
  • + Existing code is preserved — AutoCoder adds its configuration alongside it +
  • +
  • Write or generate an app spec describing what to build
  • +
+ + {/* System Requirements */} +

+ System Requirements +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ Requirement + + Details +
Python + 3.11+ +
Node.js + 20+{' '} + (for UI development) +
Claude Code CLI + Required for running agents +
Operating System + Windows, macOS, or Linux +
+
+ ) +} diff --git a/ui/src/components/docs/sections/ProjectStructure.tsx b/ui/src/components/docs/sections/ProjectStructure.tsx new file mode 100644 index 0000000..0b6aa6b --- /dev/null +++ b/ui/src/components/docs/sections/ProjectStructure.tsx @@ -0,0 +1,162 @@ +/** + * ProjectStructure Documentation Section + * + * Covers the .autocoder/ directory layout, features database, + * prompts directory, allowed commands, CLAUDE.md convention, + * legacy migration, and Claude inheritance. + */ + +export function ProjectStructure() { + return ( +
+ {/* .autocoder/ Directory Layout */} +

+ .autocoder/ Directory Layout +

+

+ Every AutoCoder project stores its configuration and runtime files in a{' '} + .autocoder/{' '} + directory at the project root. +

+
+
{`your-project/
+\u251C\u2500\u2500 .autocoder/
+\u2502   \u251C\u2500\u2500 features.db              # SQLite feature database
+\u2502   \u251C\u2500\u2500 .agent.lock              # Lock file (prevents multiple instances)
+\u2502   \u251C\u2500\u2500 .gitignore               # Ignores runtime files
+\u2502   \u251C\u2500\u2500 allowed_commands.yaml    # Per-project bash command allowlist
+\u2502   \u2514\u2500\u2500 prompts/
+\u2502       \u251C\u2500\u2500 app_spec.txt         # Application specification (XML)
+\u2502       \u251C\u2500\u2500 initializer_prompt.md # First session prompt
+\u2502       \u2514\u2500\u2500 coding_prompt.md     # Continuation session prompt
+\u251C\u2500\u2500 CLAUDE.md                    # Claude Code convention file
+\u2514\u2500\u2500 app_spec.txt                 # Root copy for template compatibility`}
+
+ + {/* Features Database */} +

+ Features Database +

+
    +
  • + SQLite database managed by SQLAlchemy, stored at{' '} + + .autocoder/features.db + +
  • +
  • + Each feature record includes: id, priority, category, name, description, steps, status + (pending,{' '} + in_progress,{' '} + passing,{' '} + failing), + and dependencies +
  • +
  • Agents interact with features through MCP server tools, not direct database access
  • +
  • Viewable in the UI via the Kanban board or the Dependency Graph view
  • +
+ + {/* Prompts Directory */} +

+ Prompts Directory +

+

+ Prompts control how agents behave during each session: +

+
    +
  • + app_spec.txt{' '} + — your application specification in XML format +
  • +
  • + + initializer_prompt.md + {' '} + — prompt for the initializer agent (creates features from the spec) +
  • +
  • + + coding_prompt.md + {' '} + — prompt for coding agents (implements features) +
  • +
+

+ These can be customized per project. If not present, defaults from{' '} + + .claude/templates/ + {' '} + are used as a fallback. +

+ + {/* Allowed Commands Config */} +

+ Allowed Commands Config +

+

+ The optional{' '} + + .autocoder/allowed_commands.yaml + {' '} + file lets you grant project-specific bash commands to the agent. This is useful when your + project requires tools beyond the default allowlist (e.g., language-specific compilers or + custom build scripts). +

+

+ See the Security section for full details on + the command hierarchy and how project-level commands interact with global and organization + policies. +

+ + {/* CLAUDE.md Convention */} +

+ CLAUDE.md Convention +

+
    +
  • + CLAUDE.md{' '} + lives at the project root, as required by the Claude Code SDK +
  • +
  • + Contains project-specific instructions that the agent follows during every coding session +
  • +
  • + Automatically inherited by all agents working on the project — no additional + configuration needed +
  • +
+ + {/* Legacy Layout Migration */} +

+ Legacy Layout Migration +

+

+ Older projects stored configuration files directly at the project root (e.g.,{' '} + features.db,{' '} + prompts/). +

+
    +
  • + On the next agent start, these files are automatically migrated into{' '} + .autocoder/ +
  • +
  • Dual-path resolution ensures both old and new layouts work transparently
  • +
  • No manual migration is needed — it happens seamlessly
  • +
+ + {/* Claude Inheritance */} +

+ Claude Inheritance +

+

+ Agents inherit all MCP servers, tools, skills, custom commands, and{' '} + CLAUDE.md{' '} + from the target project folder. +

+
+ If your project has its own MCP servers or Claude commands, the coding agent can use them. + The agent essentially runs as if Claude Code was opened in your project directory. +
+
+ ) +} diff --git a/ui/src/components/docs/sections/Scheduling.tsx b/ui/src/components/docs/sections/Scheduling.tsx new file mode 100644 index 0000000..913bb78 --- /dev/null +++ b/ui/src/components/docs/sections/Scheduling.tsx @@ -0,0 +1,102 @@ +/** + * Scheduling Documentation Section + * + * Covers schedule creation, per-schedule settings, + * overrides, and crash recovery with exponential backoff. + */ + +import { Badge } from '@/components/ui/badge' + +export function Scheduling() { + return ( +
+ {/* What Scheduling Does */} +

+ What Scheduling Does +

+

+ Scheduling automates agent runs at specific times. Set up a schedule and AutoCoder will automatically + start agents on your project — useful for overnight builds, periodic maintenance, or continuous + development. +

+ + {/* Creating a Schedule */} +

+ Creating a Schedule +

+
    +
  • Click the clock icon in the header to open the Schedule modal
  • +
  • Set: start time, duration (how long agents run), days of the week
  • +
  • Optionally configure: YOLO mode, concurrency, model selection
  • +
  • Schedule is saved and starts at the next matching time
  • +
+ + {/* Schedule Settings */} +

+ Schedule Settings +

+

+ Each schedule can override global settings: +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
SettingDetails
YOLO modeOn/off per schedule
Concurrency + 1–5 agents +
Model tierOpus / Sonnet / Haiku
DurationHow long the session runs before auto-stopping
+
+ All schedule times are in UTC timezone. +
+ + {/* Schedule Overrides */} +

+ Schedule Overrides +

+
    +
  • Manually skip a scheduled run (one-time override)
  • +
  • Pause a schedule temporarily (resumes on next period)
  • +
  • + View upcoming runs with{' '} + Running until /{' '} + Next run indicators +
  • +
  • Override without deleting the schedule
  • +
+ + {/* Crash Recovery */} +

+ Crash Recovery +

+
    +
  • If a scheduled agent crashes, it uses exponential backoff for retries
  • +
  • + Maximum 3 retry attempts per scheduled run +
  • +
  • Backoff prevents rapid restart loops
  • +
  • Failed runs are logged for troubleshooting
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/Security.tsx b/ui/src/components/docs/sections/Security.tsx new file mode 100644 index 0000000..2e4e09a --- /dev/null +++ b/ui/src/components/docs/sections/Security.tsx @@ -0,0 +1,218 @@ +/** + * Security Documentation Section + * + * Covers the defense-in-depth security model: command validation layers, + * the hierarchical allowlist/blocklist system, per-project and org-level + * configuration, extra read paths, and filesystem sandboxing. + */ + +import { Badge } from '@/components/ui/badge' + +export function Security() { + return ( +
+ {/* Command Validation Overview */} +

+ Command Validation Overview +

+

+ AutoCoder uses a defense-in-depth approach for security. All three layers must pass before any + command is executed: +

+
    +
  1. + OS-level sandbox — bash commands run inside + a restricted sandbox environment +
  2. +
  3. + Filesystem restriction — agents can only + access the project directory (plus configured extra read paths) +
  4. +
  5. + Hierarchical allowlist — every bash command + is validated against a multi-level allowlist system +
  6. +
+ + {/* Command Hierarchy */} +

+ Command Hierarchy +

+

+ Commands are evaluated against a 5-level hierarchy, from highest to lowest priority: +

+
    +
  1. + Hardcoded Blocklist{' '} + security.py{' '} + — NEVER allowed, cannot be overridden +
  2. +
  3. + Org Blocklist{' '} + ~/.autocoder/config.yaml{' '} + — org-wide blocks, cannot be project-overridden +
  4. +
  5. + Org Allowlist{' '} + ~/.autocoder/config.yaml{' '} + — available to all projects +
  6. +
  7. + Global Allowlist{' '} + security.py{' '} + — default commands (npm, git, curl, etc.) +
  8. +
  9. + Project Allowlist{' '} + + .autocoder/allowed_commands.yaml + {' '} + — project-specific additions +
  10. +
+
+ Higher priority levels always win. A command blocked at level 1 or 2 can never be allowed by + lower levels. +
+ + {/* Hardcoded Blocklist */} +

+ Hardcoded Blocklist +

+

+ The following commands can never be allowed, regardless + of any configuration. They are hardcoded in{' '} + security.py and + cannot be overridden: +

+
+ {['dd', 'sudo', 'su', 'shutdown', 'reboot', 'poweroff', 'mkfs', 'fdisk', 'mount', 'umount', 'systemctl'].map( + (cmd) => ( + + {cmd} + + ), + )} +
+ + {/* Global Allowlist */} +

+ Global Allowlist +

+

+ Default commands available to all projects out of the box. These are the standard development + commands needed for most projects: +

+
+ {['npm', 'npx', 'node', 'git', 'curl', 'python', 'pip', 'cat', 'ls', 'mkdir', 'cp', 'mv', 'rm', 'grep', 'find'].map( + (cmd) => ( + + {cmd} + + ), + )} +
+ + {/* Per-Project Allowed Commands */} +

+ Per-Project Allowed Commands +

+

+ Each project can define additional allowed commands in{' '} + + .autocoder/allowed_commands.yaml + + : +

+
+
{`# .autocoder/allowed_commands.yaml
+version: 1
+commands:
+  # Exact command name
+  - name: swift
+    description: Swift compiler
+
+  # Wildcard - matches swiftc, swiftlint, swiftformat
+  - name: swift*
+    description: All Swift tools (wildcard)
+
+  # Local project scripts
+  - name: ./scripts/build.sh
+    description: Project build script`}
+
+

+ Pattern matching: exact match ( + swift), wildcard ( + swift* matches swiftc, + swiftlint, etc.), and scripts ( + ./scripts/build.sh). + Limit: 100 commands per project. +

+ + {/* Organization Configuration */} +

+ Organization Configuration +

+

+ System administrators can set org-wide policies in{' '} + ~/.autocoder/config.yaml: +

+
+
{`# ~/.autocoder/config.yaml
+version: 1
+
+# Commands available to ALL projects
+allowed_commands:
+  - name: jq
+    description: JSON processor
+
+# Commands blocked across ALL projects (cannot be overridden)
+blocked_commands:
+  - aws        # Prevent accidental cloud operations
+  - kubectl    # Block production deployments`}
+
+

+ Org-level blocked commands cannot be overridden by any project configuration. +

+ + {/* Extra Read Paths */} +

+ Extra Read Paths +

+

+ Allow agents to read files from directories outside the project folder via the{' '} + EXTRA_READ_PATHS{' '} + environment variable: +

+
+
EXTRA_READ_PATHS=/path/to/docs,/path/to/shared-libs
+
+
    +
  • Must be absolute paths and must exist as directories
  • +
  • Only read operations allowed (Read, Glob, Grep — no Write/Edit)
  • +
  • + Sensitive directories are always blocked:{' '} + .ssh,{' '} + .aws,{' '} + .gnupg,{' '} + .docker,{' '} + .kube, etc. +
  • +
+ + {/* Filesystem Sandboxing */} +

+ Filesystem Sandboxing +

+
    +
  • Agents can only write to the project directory
  • +
  • Read access is limited to the project directory plus configured extra read paths
  • +
  • + Path traversal attacks are prevented via canonicalization ( + Path.resolve()) +
  • +
  • File operations are validated before execution
  • +
+
+ ) +} diff --git a/ui/src/components/docs/sections/SettingsConfig.tsx b/ui/src/components/docs/sections/SettingsConfig.tsx new file mode 100644 index 0000000..6045c5a --- /dev/null +++ b/ui/src/components/docs/sections/SettingsConfig.tsx @@ -0,0 +1,188 @@ +/** + * SettingsConfig Documentation Section + * + * Covers global settings: opening the modal, YOLO mode, headless browser, + * model selection, regression agents, batch size, concurrency, and persistence. + */ + +import { Badge } from '@/components/ui/badge' + +export function SettingsConfig() { + return ( +
+ {/* Opening Settings */} +

+ Opening Settings +

+

+ Press the , (comma) key or click the gear icon in the header bar to + open the Settings modal. Settings are global and apply to all projects. +

+ + {/* YOLO Mode */} +

+ YOLO Mode +

+

+ YOLO mode is for rapid prototyping — it skips testing for faster iteration: +

+
    +
  • + What’s skipped: Regression testing, Playwright MCP + server (browser automation disabled) +
  • +
  • + What still runs: Lint and type-check (to verify code + compiles), Feature MCP server for tracking +
  • +
  • + Toggle via the lightning bolt button in the UI or the{' '} + --yolo CLI flag +
  • +
  • + When to use: Early prototyping when you want to scaffold + features quickly without verification overhead +
  • +
  • Switch back to standard mode for production-quality development
  • +
+ + {/* Headless Browser */} +

+ Headless Browser +

+
    +
  • When enabled, Playwright runs without a visible browser window
  • +
  • Saves CPU/GPU resources on machines running multiple agents
  • +
  • Tests still run fully — just no visible browser UI
  • +
  • Toggle in settings or via the UI button
  • +
+ + {/* Model Selection */} +

+ Model Selection +

+

+ Choose which Claude model tier to use for your agents: +

+ + + + + + + + + + + + + + + + + + + + + +
Tier + Characteristics +
+ Opus + Most capable, highest quality
+ Sonnet + Balanced speed and quality
+ Haiku + Fastest, most economical
+
    +
  • Model can be set globally in settings
  • +
  • Per-schedule model override is also available
  • +
  • + When using Vertex AI, model names use{' '} + @ instead of{' '} + - (e.g.,{' '} + + claude-opus-4-5@20251101 + + ) +
  • +
+ + {/* Regression Agents */} +

+ Regression Agents +

+

+ Controls how many testing agents run alongside coding agents (0–3): +

+
    +
  • + 0: No regression testing (like YOLO but coding agents + still test their own feature) +
  • +
  • + 1: One testing agent runs in background verifying + completed features +
  • +
  • + 2–3: Multiple testing agents for thorough + verification +
  • +
  • Testing agents batch-test 1–5 features per session
  • +
+ + {/* Features per Agent / Batch Size */} +

+ Features per Agent (Batch Size) +

+

+ Controls how many features each coding agent implements per session (1–3): +

+
    +
  • + 1: One feature per session (most focused, lower risk of + conflicts) +
  • +
  • + 2–3: Multiple features per session (more efficient, + fewer session startups) +
  • +
  • + Set via settings UI or the{' '} + --batch-size CLI flag +
  • +
  • + Can also target specific features:{' '} + --batch-features 1,2,3 +
  • +
+ + {/* Concurrency */} +

+ Concurrency +

+
    +
  • Per-project default concurrency saved in project settings
  • +
  • Override at runtime with the concurrency slider in agent controls
  • +
  • + Range: 1–5 concurrent coding agents +
  • +
  • Higher concurrency = faster progress but more API cost
  • +
+ + {/* How Settings are Persisted */} +

+ How Settings are Persisted +

+
    +
  • + Global settings stored in SQLite registry at{' '} + ~/.autocoder/registry.db +
  • +
  • Per-project settings (like default concurrency) stored in the project registry entry
  • +
  • UI settings (theme, dark mode) stored in browser localStorage
  • +
  • Settings survive app restarts and are shared across UI sessions
  • +
+
+ ) +} diff --git a/ui/src/components/mascotData.tsx b/ui/src/components/mascotData.tsx new file mode 100644 index 0000000..5c5e7bd --- /dev/null +++ b/ui/src/components/mascotData.tsx @@ -0,0 +1,529 @@ +/** + * SVG mascot definitions and color palettes for agent avatars. + * + * Each mascot is a simple, cute SVG character rendered as a React component. + * Colors are keyed by AgentMascot name so avatars stay visually distinct + * when multiple agents run in parallel. + */ + +import type { AgentMascot } from '../lib/types' + +// --------------------------------------------------------------------------- +// Color types and palettes +// --------------------------------------------------------------------------- + +export interface MascotColorPalette { + primary: string + secondary: string + accent: string +} + +/** Props shared by every mascot SVG component. */ +export interface MascotSVGProps { + colors: MascotColorPalette + size: number +} + +/** Fallback colors for unknown / untracked agents (neutral gray). */ +export const UNKNOWN_COLORS: MascotColorPalette = { + primary: '#6B7280', + secondary: '#9CA3AF', + accent: '#F3F4F6', +} + +export const AVATAR_COLORS: Record = { + // Original 5 + Spark: { primary: '#3B82F6', secondary: '#60A5FA', accent: '#DBEAFE' }, // Blue robot + Fizz: { primary: '#F97316', secondary: '#FB923C', accent: '#FFEDD5' }, // Orange fox + Octo: { primary: '#8B5CF6', secondary: '#A78BFA', accent: '#EDE9FE' }, // Purple octopus + Hoot: { primary: '#22C55E', secondary: '#4ADE80', accent: '#DCFCE7' }, // Green owl + Buzz: { primary: '#EAB308', secondary: '#FACC15', accent: '#FEF9C3' }, // Yellow bee + // Tech-inspired + Pixel: { primary: '#EC4899', secondary: '#F472B6', accent: '#FCE7F3' }, // Pink + Byte: { primary: '#06B6D4', secondary: '#22D3EE', accent: '#CFFAFE' }, // Cyan + Nova: { primary: '#F43F5E', secondary: '#FB7185', accent: '#FFE4E6' }, // Rose + Chip: { primary: '#84CC16', secondary: '#A3E635', accent: '#ECFCCB' }, // Lime + Bolt: { primary: '#FBBF24', secondary: '#FCD34D', accent: '#FEF3C7' }, // Amber + // Energetic + Dash: { primary: '#14B8A6', secondary: '#2DD4BF', accent: '#CCFBF1' }, // Teal + Zap: { primary: '#A855F7', secondary: '#C084FC', accent: '#F3E8FF' }, // Violet + Gizmo: { primary: '#64748B', secondary: '#94A3B8', accent: '#F1F5F9' }, // Slate + Turbo: { primary: '#EF4444', secondary: '#F87171', accent: '#FEE2E2' }, // Red + Blip: { primary: '#10B981', secondary: '#34D399', accent: '#D1FAE5' }, // Emerald + // Playful + Neon: { primary: '#D946EF', secondary: '#E879F9', accent: '#FAE8FF' }, // Fuchsia + Widget: { primary: '#6366F1', secondary: '#818CF8', accent: '#E0E7FF' }, // Indigo + Zippy: { primary: '#F59E0B', secondary: '#FBBF24', accent: '#FEF3C7' }, // Orange-yellow + Quirk: { primary: '#0EA5E9', secondary: '#38BDF8', accent: '#E0F2FE' }, // Sky + Flux: { primary: '#7C3AED', secondary: '#8B5CF6', accent: '#EDE9FE' }, // Purple +} + +// --------------------------------------------------------------------------- +// SVG mascot components - simple cute characters +// --------------------------------------------------------------------------- + +function SparkSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Robot body */} + + {/* Robot head */} + + {/* Antenna */} + + + {/* Eyes */} + + + + + {/* Mouth */} + + {/* Arms */} + + + + ) +} + +function FizzSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Ears */} + + + + + {/* Head */} + + {/* Face */} + + {/* Eyes */} + + + + + {/* Nose */} + + {/* Whiskers */} + + + + + + ) +} + +function OctoSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Tentacles */} + + + + + + {/* Head */} + + {/* Eyes */} + + + + + {/* Smile */} + + + ) +} + +function HootSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Ear tufts */} + + + {/* Body */} + + {/* Head */} + + {/* Eye circles */} + + + {/* Eyes */} + + + + + {/* Beak */} + + {/* Belly */} + + + ) +} + +function BuzzSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Wings */} + + + {/* Body stripes */} + + + + {/* Head */} + + {/* Antennae */} + + + + + {/* Eyes */} + + + + + {/* Smile */} + + + ) +} + +function PixelSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Blocky body */} + + + + {/* Head */} + + {/* Eyes */} + + + + + {/* Mouth */} + + + ) +} + +function ByteSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* 3D cube body */} + + + + {/* Face */} + + + + + + + ) +} + +function NovaSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Star points */} + + + {/* Face */} + + + + + + + ) +} + +function ChipSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Chip body */} + + {/* Pins */} + + + + + + + {/* Face */} + + + + + + + ) +} + +function BoltSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Lightning bolt body */} + + + {/* Face */} + + + + + + ) +} + +function DashSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Speed lines */} + + + {/* Aerodynamic body */} + + + {/* Face */} + + + + + + + ) +} + +function ZapSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Electric sparks */} + + + {/* Orb */} + + + {/* Face */} + + + + + + + ) +} + +function GizmoSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Gear teeth */} + + + + + {/* Gear body */} + + + {/* Face */} + + + + + + + ) +} + +function TurboSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Flames */} + + + {/* Rocket body */} + + {/* Nose cone */} + + {/* Fins */} + + + {/* Window/Face */} + + + + + + ) +} + +function BlipSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Radar rings */} + + + {/* Main dot */} + + + {/* Face */} + + + + + + + ) +} + +function NeonSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Glow effect */} + + + {/* Body */} + + {/* Inner glow */} + + {/* Face */} + + + + + + + ) +} + +function WidgetSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Window frame */} + + {/* Title bar */} + + + + + {/* Content area / Face */} + + + + + + + + ) +} + +function ZippySVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Ears */} + + + + + {/* Head */} + + {/* Face */} + + + + + {/* Nose and mouth */} + + + + ) +} + +function QuirkSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Question mark body */} + + + {/* Face on the dot */} + + + + + {/* Decorative swirl */} + + + ) +} + +function FluxSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Wave body */} + + + {/* Face */} + + + + + {/* Sparkles */} + + + + ) +} + +/** Fallback icon for unknown / untracked agents. */ +function UnknownSVG({ colors, size }: MascotSVGProps) { + return ( + + {/* Circle background */} + + + {/* Question mark */} + ? + + ) +} + +// --------------------------------------------------------------------------- +// Mascot component lookup +// --------------------------------------------------------------------------- + +/** Maps each mascot name to its SVG component. */ +export const MASCOT_SVGS: Record> = { + // Original 5 + Spark: SparkSVG, + Fizz: FizzSVG, + Octo: OctoSVG, + Hoot: HootSVG, + Buzz: BuzzSVG, + // Tech-inspired + Pixel: PixelSVG, + Byte: ByteSVG, + Nova: NovaSVG, + Chip: ChipSVG, + Bolt: BoltSVG, + // Energetic + Dash: DashSVG, + Zap: ZapSVG, + Gizmo: GizmoSVG, + Turbo: TurboSVG, + Blip: BlipSVG, + // Playful + Neon: NeonSVG, + Widget: WidgetSVG, + Zippy: ZippySVG, + Quirk: QuirkSVG, + Flux: FluxSVG, +} + +/** The SVG component for unknown agents. Exported separately because + * it is not part of the AgentMascot union type. */ +export const UnknownMascotSVG: React.FC = UnknownSVG diff --git a/ui/src/components/ui/popover.tsx b/ui/src/components/ui/popover.tsx deleted file mode 100644 index 0df056f..0000000 --- a/ui/src/components/ui/popover.tsx +++ /dev/null @@ -1,87 +0,0 @@ -import * as React from "react" -import * as PopoverPrimitive from "@radix-ui/react-popover" - -import { cn } from "@/lib/utils" - -function Popover({ - ...props -}: React.ComponentProps) { - return -} - -function PopoverTrigger({ - ...props -}: React.ComponentProps) { - return -} - -function PopoverContent({ - className, - align = "center", - sideOffset = 4, - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -function PopoverAnchor({ - ...props -}: React.ComponentProps) { - return -} - -function PopoverHeader({ className, ...props }: React.ComponentProps<"div">) { - return ( -
- ) -} - -function PopoverTitle({ className, ...props }: React.ComponentProps<"h2">) { - return ( -
- ) -} - -function PopoverDescription({ - className, - ...props -}: React.ComponentProps<"p">) { - return ( -

- ) -} - -export { - Popover, - PopoverTrigger, - PopoverContent, - PopoverAnchor, - PopoverHeader, - PopoverTitle, - PopoverDescription, -} diff --git a/ui/src/components/ui/radio-group.tsx b/ui/src/components/ui/radio-group.tsx deleted file mode 100644 index 5e6778c..0000000 --- a/ui/src/components/ui/radio-group.tsx +++ /dev/null @@ -1,45 +0,0 @@ -"use client" - -import * as React from "react" -import * as RadioGroupPrimitive from "@radix-ui/react-radio-group" -import { CircleIcon } from "lucide-react" - -import { cn } from "@/lib/utils" - -function RadioGroup({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function RadioGroupItem({ - className, - ...props -}: React.ComponentProps) { - return ( - - - - - - ) -} - -export { RadioGroup, RadioGroupItem } diff --git a/ui/src/components/ui/scroll-area.tsx b/ui/src/components/ui/scroll-area.tsx deleted file mode 100644 index 9376f59..0000000 --- a/ui/src/components/ui/scroll-area.tsx +++ /dev/null @@ -1,56 +0,0 @@ -import * as React from "react" -import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area" - -import { cn } from "@/lib/utils" - -function ScrollArea({ - className, - children, - ...props -}: React.ComponentProps) { - return ( - - - {children} - - - - - ) -} - -function ScrollBar({ - className, - orientation = "vertical", - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -export { ScrollArea, ScrollBar } diff --git a/ui/src/components/ui/select.tsx b/ui/src/components/ui/select.tsx deleted file mode 100644 index 88302a8..0000000 --- a/ui/src/components/ui/select.tsx +++ /dev/null @@ -1,190 +0,0 @@ -"use client" - -import * as React from "react" -import * as SelectPrimitive from "@radix-ui/react-select" -import { CheckIcon, ChevronDownIcon, ChevronUpIcon } from "lucide-react" - -import { cn } from "@/lib/utils" - -function Select({ - ...props -}: React.ComponentProps) { - return -} - -function SelectGroup({ - ...props -}: React.ComponentProps) { - return -} - -function SelectValue({ - ...props -}: React.ComponentProps) { - return -} - -function SelectTrigger({ - className, - size = "default", - children, - ...props -}: React.ComponentProps & { - size?: "sm" | "default" -}) { - return ( - - {children} - - - - - ) -} - -function SelectContent({ - className, - children, - position = "item-aligned", - align = "center", - ...props -}: React.ComponentProps) { - return ( - - - - - {children} - - - - - ) -} - -function SelectLabel({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function SelectItem({ - className, - children, - ...props -}: React.ComponentProps) { - return ( - - - - - - - {children} - - ) -} - -function SelectSeparator({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function SelectScrollUpButton({ - className, - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -function SelectScrollDownButton({ - className, - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -export { - Select, - SelectContent, - SelectGroup, - SelectItem, - SelectLabel, - SelectScrollDownButton, - SelectScrollUpButton, - SelectSeparator, - SelectTrigger, - SelectValue, -} diff --git a/ui/src/components/ui/tabs.tsx b/ui/src/components/ui/tabs.tsx deleted file mode 100644 index bb946fc..0000000 --- a/ui/src/components/ui/tabs.tsx +++ /dev/null @@ -1,89 +0,0 @@ -import * as React from "react" -import * as TabsPrimitive from "@radix-ui/react-tabs" -import { cva, type VariantProps } from "class-variance-authority" - -import { cn } from "@/lib/utils" - -function Tabs({ - className, - orientation = "horizontal", - ...props -}: React.ComponentProps) { - return ( - - ) -} - -const tabsListVariants = cva( - "rounded-lg p-[3px] group-data-[orientation=horizontal]/tabs:h-9 data-[variant=line]:rounded-none group/tabs-list text-muted-foreground inline-flex w-fit items-center justify-center group-data-[orientation=vertical]/tabs:h-fit group-data-[orientation=vertical]/tabs:flex-col", - { - variants: { - variant: { - default: "bg-muted", - line: "gap-1 bg-transparent", - }, - }, - defaultVariants: { - variant: "default", - }, - } -) - -function TabsList({ - className, - variant = "default", - ...props -}: React.ComponentProps & - VariantProps) { - return ( - - ) -} - -function TabsTrigger({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function TabsContent({ - className, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -export { Tabs, TabsList, TabsTrigger, TabsContent, tabsListVariants } diff --git a/ui/src/components/ui/toggle.tsx b/ui/src/components/ui/toggle.tsx deleted file mode 100644 index 94ec8f5..0000000 --- a/ui/src/components/ui/toggle.tsx +++ /dev/null @@ -1,47 +0,0 @@ -"use client" - -import * as React from "react" -import * as TogglePrimitive from "@radix-ui/react-toggle" -import { cva, type VariantProps } from "class-variance-authority" - -import { cn } from "@/lib/utils" - -const toggleVariants = cva( - "inline-flex items-center justify-center gap-2 rounded-md text-sm font-medium hover:bg-muted hover:text-muted-foreground disabled:pointer-events-none disabled:opacity-50 data-[state=on]:bg-accent data-[state=on]:text-accent-foreground [&_svg]:pointer-events-none [&_svg:not([class*='size-'])]:size-4 [&_svg]:shrink-0 focus-visible:border-ring focus-visible:ring-ring/50 focus-visible:ring-[3px] outline-none transition-[color,box-shadow] aria-invalid:ring-destructive/20 dark:aria-invalid:ring-destructive/40 aria-invalid:border-destructive whitespace-nowrap", - { - variants: { - variant: { - default: "bg-transparent", - outline: - "border border-input bg-transparent shadow-xs hover:bg-accent hover:text-accent-foreground", - }, - size: { - default: "h-9 px-2 min-w-9", - sm: "h-8 px-1.5 min-w-8", - lg: "h-10 px-2.5 min-w-10", - }, - }, - defaultVariants: { - variant: "default", - size: "default", - }, - } -) - -function Toggle({ - className, - variant, - size, - ...props -}: React.ComponentProps & - VariantProps) { - return ( - - ) -} - -export { Toggle, toggleVariants } diff --git a/ui/src/components/ui/tooltip.tsx b/ui/src/components/ui/tooltip.tsx deleted file mode 100644 index a4e90d4..0000000 --- a/ui/src/components/ui/tooltip.tsx +++ /dev/null @@ -1,61 +0,0 @@ -"use client" - -import * as React from "react" -import * as TooltipPrimitive from "@radix-ui/react-tooltip" - -import { cn } from "@/lib/utils" - -function TooltipProvider({ - delayDuration = 0, - ...props -}: React.ComponentProps) { - return ( - - ) -} - -function Tooltip({ - ...props -}: React.ComponentProps) { - return ( - - - - ) -} - -function TooltipTrigger({ - ...props -}: React.ComponentProps) { - return -} - -function TooltipContent({ - className, - sideOffset = 0, - children, - ...props -}: React.ComponentProps) { - return ( - - - {children} - - - - ) -} - -export { Tooltip, TooltipTrigger, TooltipContent, TooltipProvider } diff --git a/ui/src/hooks/useHashRoute.ts b/ui/src/hooks/useHashRoute.ts new file mode 100644 index 0000000..1482199 --- /dev/null +++ b/ui/src/hooks/useHashRoute.ts @@ -0,0 +1,36 @@ +import { useState, useEffect, useCallback } from 'react' + +export type Route = 'app' | 'docs' + +interface HashRouteState { + route: Route + section: string | null + navigate: (hash: string) => void +} + +function parseHash(hash: string): { route: Route; section: string | null } { + const cleaned = hash.replace(/^#\/?/, '') + if (cleaned === 'docs' || cleaned.startsWith('docs/')) { + const section = cleaned.slice(5) || null // Remove 'docs/' prefix + return { route: 'docs', section } + } + return { route: 'app', section: null } +} + +export function useHashRoute(): HashRouteState { + const [state, setState] = useState(() => parseHash(window.location.hash)) + + useEffect(() => { + const handleHashChange = () => { + setState(parseHash(window.location.hash)) + } + window.addEventListener('hashchange', handleHashChange) + return () => window.removeEventListener('hashchange', handleHashChange) + }, []) + + const navigate = useCallback((hash: string) => { + window.location.hash = hash + }, []) + + return { ...state, navigate } +} diff --git a/ui/src/hooks/useProjects.ts b/ui/src/hooks/useProjects.ts index e914909..676311c 100644 --- a/ui/src/hooks/useProjects.ts +++ b/ui/src/hooks/useProjects.ts @@ -266,6 +266,8 @@ const DEFAULT_SETTINGS: Settings = { glm_mode: false, ollama_mode: false, testing_agent_ratio: 1, + playwright_headless: true, + batch_size: 3, } export function useAvailableModels() { diff --git a/ui/src/hooks/useWebSocket.ts b/ui/src/hooks/useWebSocket.ts index 18b117e..1a44435 100644 --- a/ui/src/hooks/useWebSocket.ts +++ b/ui/src/hooks/useWebSocket.ts @@ -210,6 +210,7 @@ export function useProjectWebSocket(projectName: string | null) { agentName: message.agentName, agentType: message.agentType || 'coding', // Default to coding for backwards compat featureId: message.featureId, + featureIds: message.featureIds || [message.featureId], featureName: message.featureName, state: message.state, thought: message.thought, @@ -225,6 +226,7 @@ export function useProjectWebSocket(projectName: string | null) { agentName: message.agentName, agentType: message.agentType || 'coding', // Default to coding for backwards compat featureId: message.featureId, + featureIds: message.featureIds || [message.featureId], featureName: message.featureName, state: message.state, thought: message.thought, diff --git a/ui/src/lib/types.ts b/ui/src/lib/types.ts index 269c2ef..cec91ec 100644 --- a/ui/src/lib/types.ts +++ b/ui/src/lib/types.ts @@ -199,7 +199,8 @@ export interface ActiveAgent { agentIndex: number // -1 for synthetic completions agentName: AgentMascot | 'Unknown' agentType: AgentType // "coding" or "testing" - featureId: number + featureId: number // Current/primary feature (backward compat) + featureIds: number[] // All features in batch featureName: string state: AgentState thought?: string @@ -270,6 +271,7 @@ export interface WSAgentUpdateMessage { agentName: AgentMascot | 'Unknown' agentType: AgentType // "coding" or "testing" featureId: number + featureIds?: number[] // All features in batch (may be absent for backward compat) featureName: string state: AgentState thought?: string @@ -529,12 +531,16 @@ export interface Settings { glm_mode: boolean ollama_mode: boolean testing_agent_ratio: number // Regression testing agents (0-3) + playwright_headless: boolean + batch_size: number // Features per coding agent batch (1-3) } export interface SettingsUpdate { yolo_mode?: boolean model?: string testing_agent_ratio?: number + playwright_headless?: boolean + batch_size?: number } export interface ProjectSettingsUpdate { diff --git a/ui/src/main.tsx b/ui/src/main.tsx index fa4dad9..b4d89a2 100644 --- a/ui/src/main.tsx +++ b/ui/src/main.tsx @@ -1,7 +1,9 @@ import { StrictMode } from 'react' import { createRoot } from 'react-dom/client' import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { useHashRoute } from './hooks/useHashRoute' import App from './App' +import { DocsPage } from './components/docs/DocsPage' import './styles/globals.css' // Note: Custom theme removed - using shadcn/ui theming instead @@ -14,10 +16,16 @@ const queryClient = new QueryClient({ }, }) +function Router() { + const { route } = useHashRoute() + if (route === 'docs') return + return +} + createRoot(document.getElementById('root')!).render( - + , ) diff --git a/ui/src/styles/globals.css b/ui/src/styles/globals.css index 386ddf6..035bffe 100644 --- a/ui/src/styles/globals.css +++ b/ui/src/styles/globals.css @@ -1,5 +1,5 @@ @import "tailwindcss"; -@import url("tw-animate-css"); +@import "tw-animate-css"; /* Enable class-based dark mode in Tailwind v4 */ @custom-variant dark (&:where(.dark, .dark *)); @@ -1134,6 +1134,143 @@ } } +/* ============================================================================ + Documentation Prose Typography + ============================================================================ */ + +.docs-prose { + line-height: 1.7; + color: var(--muted-foreground); +} + +.docs-prose h2 { + font-size: 1.5rem; + font-weight: 700; + color: var(--foreground); + margin-top: 3rem; + margin-bottom: 1rem; + padding-bottom: 0.5rem; + border-bottom: 2px solid var(--border); + scroll-margin-top: 5rem; +} + +.docs-prose h2:first-child { + margin-top: 0; +} + +.docs-prose h3 { + font-size: 1.15rem; + font-weight: 600; + color: var(--foreground); + margin-top: 2rem; + margin-bottom: 0.75rem; + scroll-margin-top: 5rem; +} + +.docs-prose p { + margin-bottom: 1rem; + max-width: 65ch; +} + +.docs-prose ul, +.docs-prose ol { + margin-bottom: 1rem; + padding-left: 1.5rem; +} + +.docs-prose ul { + list-style-type: disc; +} + +.docs-prose ol { + list-style-type: decimal; +} + +.docs-prose li { + margin-bottom: 0.375rem; +} + +.docs-prose li > ul, +.docs-prose li > ol { + margin-top: 0.375rem; + margin-bottom: 0; +} + +.docs-prose pre { + background: var(--muted); + border: 1px solid var(--border); + border-radius: var(--radius); + padding: 1rem; + overflow-x: auto; + margin-bottom: 1rem; + font-family: var(--font-mono); + font-size: 0.8125rem; + line-height: 1.6; +} + +.docs-prose code:not(pre code) { + background: var(--muted); + padding: 0.125rem 0.375rem; + border-radius: 0.25rem; + font-family: var(--font-mono); + font-size: 0.8125rem; +} + +.docs-prose table { + width: 100%; + border-collapse: collapse; + margin-bottom: 1rem; + font-size: 0.875rem; +} + +.docs-prose th { + background: var(--muted); + font-weight: 600; + color: var(--foreground); + text-align: left; + padding: 0.5rem 0.75rem; + border: 1px solid var(--border); +} + +.docs-prose td { + padding: 0.5rem 0.75rem; + border: 1px solid var(--border); +} + +.docs-prose tr:nth-child(even) td { + background: var(--muted); + opacity: 0.5; +} + +.docs-prose blockquote { + border-left: 4px solid var(--primary); + padding-left: 1rem; + margin-bottom: 1rem; + font-style: italic; + color: var(--muted-foreground); +} + +.docs-prose a { + color: var(--primary); + text-decoration: underline; + text-underline-offset: 2px; +} + +.docs-prose a:hover { + opacity: 0.8; +} + +.docs-prose strong { + color: var(--foreground); + font-weight: 600; +} + +.docs-prose hr { + border: none; + border-top: 1px solid var(--border); + margin: 2rem 0; +} + /* ============================================================================ Scrollbar Styling ============================================================================ */ diff --git a/ui/vite.config.ts b/ui/vite.config.ts index f7c6aa1..69fbe08 100644 --- a/ui/vite.config.ts +++ b/ui/vite.config.ts @@ -30,7 +30,6 @@ export default defineConfig({ 'vendor-ui': [ '@radix-ui/react-dialog', '@radix-ui/react-dropdown-menu', - '@radix-ui/react-tooltip', 'lucide-react', ], },