From 45f6f17eb0b0e1e6f28a9a77069397255c3b6e8f Mon Sep 17 00:00:00 2001 From: Kacper Date: Mon, 2 Feb 2026 15:47:18 +0100 Subject: [PATCH 01/89] fix(docker): Pre-install Playwright Chromium browsers for automated testing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #725 AI agents in automated testing mode require Playwright to verify implementations, but Docker containers had only system dependencies installed, not browser binaries. This caused verification failures with permissions errors. Changes: - Install Playwright Chromium in Dockerfile (~300MB increase) - Update docker-compose.override.yml.example with clearer Playwright documentation - Add "Playwright for Automated Testing" section to README - Document optional volume mount for persisting browsers across rebuilds Browsers are now pre-installed and work out of the box for Docker users. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- Dockerfile | 6 ++++++ README.md | 27 +++++++++++++++++++++++++++ docker-compose.override.yml.example | 11 ++++++++--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 03911b45..7d22858c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -118,6 +118,12 @@ RUN curl -fsSL https://opencode.ai/install | bash && \ echo "=== Checking OpenCode CLI installation ===" && \ ls -la /home/automaker/.local/bin/ && \ (which opencode && opencode --version) || echo "opencode installed (may need auth setup)" + +# Install Playwright Chromium browser for AI agent verification tests +# This adds ~300MB to the image but enables automated testing mode out of the box +RUN npx playwright install chromium && \ + echo "=== Playwright Chromium installed ===" && \ + ls -la /home/automaker/.cache/ms-playwright/ || echo "Playwright browsers installed" USER root # Add PATH to profile so it's available in all interactive shells (for login shells) diff --git a/README.md b/README.md index 75705673..645ba722 100644 --- a/README.md +++ b/README.md @@ -338,6 +338,33 @@ services: The Docker image supports both AMD64 and ARM64 architectures. The GitHub CLI and Claude CLI are automatically downloaded for the correct architecture during build. +##### Playwright for Automated Testing + +The Docker image includes **Playwright Chromium pre-installed** for AI agent verification tests. When agents implement features in automated testing mode, they use Playwright to verify the implementation works correctly. + +**No additional setup required** - Playwright verification works out of the box. + +**Optional: Persist browsers across container rebuilds** + +To avoid re-downloading browsers when rebuilding the Docker image, add this to your `docker-compose.override.yml`: + +```yaml +services: + server: + volumes: + - playwright-cache:/home/automaker/.cache/ms-playwright + +volumes: + playwright-cache: + name: automaker-playwright-cache +``` + +**Updating browsers manually:** + +```bash +docker exec automaker-server npx playwright install chromium +``` + ### Testing #### End-to-End Tests (Playwright) diff --git a/docker-compose.override.yml.example b/docker-compose.override.yml.example index 3815c197..d1f0c216 100644 --- a/docker-compose.override.yml.example +++ b/docker-compose.override.yml.example @@ -21,9 +21,13 @@ services: # - ~/.local/share/opencode:/home/automaker/.local/share/opencode # - ~/.config/opencode:/home/automaker/.config/opencode - # Playwright browser cache - persists installed browsers across container restarts - # Run 'npx playwright install --with-deps chromium' once, and it will persist + # ===== Playwright Browser Cache (Optional) ===== + # Playwright Chromium is PRE-INSTALLED in the Docker image for automated testing. + # Uncomment below to persist browser cache across container rebuilds (saves ~300MB download): # - playwright-cache:/home/automaker/.cache/ms-playwright + # + # To update Playwright browsers manually: + # docker exec automaker-server npx playwright install chromium environment: # Set root directory for all projects and file operations # Users can only create/open projects within this directory @@ -37,6 +41,7 @@ services: # - CURSOR_API_KEY=${CURSOR_API_KEY:-} volumes: - # Playwright cache volume (persists Chromium installs) + # Playwright cache volume - optional, persists browser updates across container rebuilds + # Uncomment if you mounted the playwright-cache volume above # playwright-cache: # name: automaker-playwright-cache From b37a287c9c63b6955e454e8497a16746a2231712 Mon Sep 17 00:00:00 2001 From: Kacper Date: Mon, 2 Feb 2026 15:55:11 +0100 Subject: [PATCH 02/89] fix(docker): Address PR #745 review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Clean up npx cache after Playwright installation to reduce image size - Clarify README: volume mounts persist cache across container lifecycles, not image rebuilds - Add first-use warning: empty volume overrides pre-installed browsers, users must re-install with docker exec command 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- Dockerfile | 4 +++- README.md | 13 +++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7d22858c..f5c3511e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -121,9 +121,11 @@ RUN curl -fsSL https://opencode.ai/install | bash && \ # Install Playwright Chromium browser for AI agent verification tests # This adds ~300MB to the image but enables automated testing mode out of the box +# Clean up npx cache after installation to reduce image size RUN npx playwright install chromium && \ echo "=== Playwright Chromium installed ===" && \ - ls -la /home/automaker/.cache/ms-playwright/ || echo "Playwright browsers installed" + ls -la /home/automaker/.cache/ms-playwright/ || echo "Playwright browsers installed" && \ + rm -rf /home/automaker/.npm/_npx USER root # Add PATH to profile so it's available in all interactive shells (for login shells) diff --git a/README.md b/README.md index 645ba722..0c21245b 100644 --- a/README.md +++ b/README.md @@ -344,9 +344,18 @@ The Docker image includes **Playwright Chromium pre-installed** for AI agent ver **No additional setup required** - Playwright verification works out of the box. -**Optional: Persist browsers across container rebuilds** +**Optional: Persist browsers for manual updates** -To avoid re-downloading browsers when rebuilding the Docker image, add this to your `docker-compose.override.yml`: +By default, Playwright Chromium is pre-installed in the Docker image. If you need to manually update browsers or want to persist browser installations across container restarts (not image rebuilds), you can mount a volume. + +**Important:** When you first add this volume mount to an existing setup, the empty volume will override the pre-installed browsers. You must re-install them: + +```bash +# After adding the volume mount for the first time +docker exec automaker-server npx playwright install chromium +``` + +Add this to your `docker-compose.override.yml`: ```yaml services: From 3ccea7a67beee54f06060ecc4cfb8ebb0c307673 Mon Sep 17 00:00:00 2001 From: Kacper Date: Mon, 2 Feb 2026 16:07:53 +0100 Subject: [PATCH 03/89] fix(docker): Address remaining PR #745 review comments - Move Playwright install after node_modules copy to use pinned version - Use local playwright binary instead of npx to avoid registry fetch - Add --user automaker -w /app flags to docker exec commands - Change bold text to proper heading in README (MD036 lint fix) Co-Authored-By: Claude Opus 4.5 --- Dockerfile | 16 +++++++++------- README.md | 6 +++--- docker-compose.override.yml.example | 2 +- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index f5c3511e..2e745e4c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -119,13 +119,6 @@ RUN curl -fsSL https://opencode.ai/install | bash && \ ls -la /home/automaker/.local/bin/ && \ (which opencode && opencode --version) || echo "opencode installed (may need auth setup)" -# Install Playwright Chromium browser for AI agent verification tests -# This adds ~300MB to the image but enables automated testing mode out of the box -# Clean up npx cache after installation to reduce image size -RUN npx playwright install chromium && \ - echo "=== Playwright Chromium installed ===" && \ - ls -la /home/automaker/.cache/ms-playwright/ || echo "Playwright browsers installed" && \ - rm -rf /home/automaker/.npm/_npx USER root # Add PATH to profile so it's available in all interactive shells (for login shells) @@ -155,6 +148,15 @@ COPY --from=server-builder /app/apps/server/package*.json ./apps/server/ # Copy node_modules (includes symlinks to libs) COPY --from=server-builder /app/node_modules ./node_modules +# Install Playwright Chromium browser for AI agent verification tests +# This adds ~300MB to the image but enables automated testing mode out of the box +# Using the locally installed playwright ensures we use the pinned version from package-lock.json +USER automaker +RUN ./node_modules/.bin/playwright install chromium && \ + echo "=== Playwright Chromium installed ===" && \ + ls -la /home/automaker/.cache/ms-playwright/ || echo "Playwright browsers installed" +USER root + # Create data and projects directories RUN mkdir -p /data /projects && chown automaker:automaker /data /projects diff --git a/README.md b/README.md index 0c21245b..95beefe1 100644 --- a/README.md +++ b/README.md @@ -344,7 +344,7 @@ The Docker image includes **Playwright Chromium pre-installed** for AI agent ver **No additional setup required** - Playwright verification works out of the box. -**Optional: Persist browsers for manual updates** +#### Optional: Persist browsers for manual updates By default, Playwright Chromium is pre-installed in the Docker image. If you need to manually update browsers or want to persist browser installations across container restarts (not image rebuilds), you can mount a volume. @@ -352,7 +352,7 @@ By default, Playwright Chromium is pre-installed in the Docker image. If you nee ```bash # After adding the volume mount for the first time -docker exec automaker-server npx playwright install chromium +docker exec --user automaker -w /app automaker-server npx playwright install chromium ``` Add this to your `docker-compose.override.yml`: @@ -371,7 +371,7 @@ volumes: **Updating browsers manually:** ```bash -docker exec automaker-server npx playwright install chromium +docker exec --user automaker -w /app automaker-server npx playwright install chromium ``` ### Testing diff --git a/docker-compose.override.yml.example b/docker-compose.override.yml.example index d1f0c216..e92ce119 100644 --- a/docker-compose.override.yml.example +++ b/docker-compose.override.yml.example @@ -27,7 +27,7 @@ services: # - playwright-cache:/home/automaker/.cache/ms-playwright # # To update Playwright browsers manually: - # docker exec automaker-server npx playwright install chromium + # docker exec --user automaker -w /app automaker-server npx playwright install chromium environment: # Set root directory for all projects and file operations # Users can only create/open projects within this directory From aad3ff2cdf74b7f872dc57eb8aa8117dae3b6952 Mon Sep 17 00:00:00 2001 From: Kacper Date: Mon, 2 Feb 2026 17:35:03 +0100 Subject: [PATCH 04/89] fix(auth): Improve OAuth credential detection and startup warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Enhanced getClaudeAuthIndicators() to return detailed check information including file paths checked and specific error details for debugging - Added debug logging to server startup credential detection for easier troubleshooting in Docker environments - Show paths that were checked in the warning message to help users debug mount issues - Added support for CLAUDE_CODE_OAUTH_TOKEN environment variable - Return authType in verify-claude-auth response to distinguish between OAuth and CLI authentication methods - Updated UI to show specific success messages for Claude Code subscription vs generic CLI auth - Added Docker troubleshooting tips to sandbox risk dialog - Added comprehensive unit tests for OAuth credential detection scenarios Closes #721 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- apps/server/src/index.ts | 101 ++- .../routes/setup/routes/verify-claude-auth.ts | 19 + .../dialogs/sandbox-risk-dialog.tsx | 23 + .../setup-view/steps/claude-setup-step.tsx | 25 +- apps/ui/src/lib/electron.ts | 1 + apps/ui/src/lib/http-api-client.ts | 1 + libs/platform/src/index.ts | 2 + libs/platform/src/system-paths.ts | 156 +++- .../tests/oauth-credential-detection.test.ts | 736 ++++++++++++++++++ 9 files changed, 1028 insertions(+), 36 deletions(-) create mode 100644 libs/platform/tests/oauth-credential-detection.test.ts diff --git a/apps/server/src/index.ts b/apps/server/src/index.ts index 4bd496bc..c10702bb 100644 --- a/apps/server/src/index.ts +++ b/apps/server/src/index.ts @@ -121,21 +121,89 @@ const BOX_CONTENT_WIDTH = 67; // The Claude Agent SDK can use either ANTHROPIC_API_KEY or Claude Code CLI authentication (async () => { const hasAnthropicKey = !!process.env.ANTHROPIC_API_KEY; + const hasEnvOAuthToken = !!process.env.CLAUDE_CODE_OAUTH_TOKEN; + + logger.debug('[CREDENTIAL_CHECK] Starting credential detection...'); + logger.debug('[CREDENTIAL_CHECK] Environment variables:', { + hasAnthropicKey, + hasEnvOAuthToken, + }); if (hasAnthropicKey) { logger.info('✓ ANTHROPIC_API_KEY detected'); return; } + if (hasEnvOAuthToken) { + logger.info('✓ CLAUDE_CODE_OAUTH_TOKEN detected'); + return; + } + // Check for Claude Code CLI authentication + // Store indicators outside the try block so we can use them in the warning message + let cliAuthIndicators: Awaited> | null = null; + try { - const indicators = await getClaudeAuthIndicators(); + cliAuthIndicators = await getClaudeAuthIndicators(); + const indicators = cliAuthIndicators; + + // Log detailed credential detection results + logger.debug('[CREDENTIAL_CHECK] Claude CLI auth indicators:', { + hasCredentialsFile: indicators.hasCredentialsFile, + hasSettingsFile: indicators.hasSettingsFile, + hasStatsCacheWithActivity: indicators.hasStatsCacheWithActivity, + hasProjectsSessions: indicators.hasProjectsSessions, + credentials: indicators.credentials, + }); + + logger.debug('[CREDENTIAL_CHECK] File check details:', { + settingsFile: { + path: indicators.checks.settingsFile.path, + exists: indicators.checks.settingsFile.exists, + readable: indicators.checks.settingsFile.readable, + error: indicators.checks.settingsFile.error, + }, + statsCache: { + path: indicators.checks.statsCache.path, + exists: indicators.checks.statsCache.exists, + readable: indicators.checks.statsCache.readable, + hasDailyActivity: indicators.checks.statsCache.hasDailyActivity, + error: indicators.checks.statsCache.error, + }, + projectsDir: { + path: indicators.checks.projectsDir.path, + exists: indicators.checks.projectsDir.exists, + readable: indicators.checks.projectsDir.readable, + entryCount: indicators.checks.projectsDir.entryCount, + error: indicators.checks.projectsDir.error, + }, + credentialFiles: indicators.checks.credentialFiles.map((cf) => ({ + path: cf.path, + exists: cf.exists, + readable: cf.readable, + error: cf.error, + })), + }); + const hasCliAuth = indicators.hasStatsCacheWithActivity || (indicators.hasSettingsFile && indicators.hasProjectsSessions) || (indicators.hasCredentialsFile && (indicators.credentials?.hasOAuthToken || indicators.credentials?.hasApiKey)); + logger.debug('[CREDENTIAL_CHECK] Auth determination:', { + hasCliAuth, + reason: hasCliAuth + ? indicators.hasStatsCacheWithActivity + ? 'stats cache with activity' + : indicators.hasSettingsFile && indicators.hasProjectsSessions + ? 'settings file + project sessions' + : indicators.credentials?.hasOAuthToken + ? 'credentials file with OAuth token' + : 'credentials file with API key' + : 'no valid credentials found', + }); + if (hasCliAuth) { logger.info('✓ Claude Code CLI authentication detected'); return; @@ -145,7 +213,7 @@ const BOX_CONTENT_WIDTH = 67; logger.warn('Error checking for Claude Code CLI authentication:', error); } - // No authentication found - show warning + // No authentication found - show warning with paths that were checked const wHeader = '⚠️ WARNING: No Claude authentication configured'.padEnd(BOX_CONTENT_WIDTH); const w1 = 'The Claude Agent SDK requires authentication to function.'.padEnd(BOX_CONTENT_WIDTH); const w2 = 'Options:'.padEnd(BOX_CONTENT_WIDTH); @@ -158,6 +226,33 @@ const BOX_CONTENT_WIDTH = 67; BOX_CONTENT_WIDTH ); + // Build paths checked summary from the indicators (if available) + let pathsCheckedInfo = ''; + if (cliAuthIndicators) { + const pathsChecked: string[] = []; + + // Collect paths that were checked + if (cliAuthIndicators.checks.settingsFile.path) { + pathsChecked.push(`Settings: ${cliAuthIndicators.checks.settingsFile.path}`); + } + if (cliAuthIndicators.checks.statsCache.path) { + pathsChecked.push(`Stats cache: ${cliAuthIndicators.checks.statsCache.path}`); + } + if (cliAuthIndicators.checks.projectsDir.path) { + pathsChecked.push(`Projects dir: ${cliAuthIndicators.checks.projectsDir.path}`); + } + for (const credFile of cliAuthIndicators.checks.credentialFiles) { + pathsChecked.push(`Credentials: ${credFile.path}`); + } + + if (pathsChecked.length > 0) { + pathsCheckedInfo = ` +║ ║ +║ ${'Paths checked:'.padEnd(BOX_CONTENT_WIDTH)}║ +${pathsChecked.map((p) => `║ ${p.substring(0, BOX_CONTENT_WIDTH - 2).padEnd(BOX_CONTENT_WIDTH - 2)} ║`).join('\n')}`; + } + } + logger.warn(` ╔═════════════════════════════════════════════════════════════════════╗ ║ ${wHeader}║ @@ -169,7 +264,7 @@ const BOX_CONTENT_WIDTH = 67; ║ ${w3}║ ║ ${w4}║ ║ ${w5}║ -║ ${w6}║ +║ ${w6}║${pathsCheckedInfo} ║ ║ ╚═════════════════════════════════════════════════════════════════════╝ `); diff --git a/apps/server/src/routes/setup/routes/verify-claude-auth.ts b/apps/server/src/routes/setup/routes/verify-claude-auth.ts index df04d462..2a8d21b0 100644 --- a/apps/server/src/routes/setup/routes/verify-claude-auth.ts +++ b/apps/server/src/routes/setup/routes/verify-claude-auth.ts @@ -320,9 +320,28 @@ export function createVerifyClaudeAuthHandler() { authMethod, }); + // Determine specific auth type for success messages + let authType: 'oauth' | 'api_key' | 'cli' | undefined; + if (authenticated) { + if (authMethod === 'api_key') { + authType = 'api_key'; + } else if (authMethod === 'cli') { + // Check if CLI auth is via OAuth (Claude Code subscription) or generic CLI + // OAuth tokens are stored in the credentials file by the Claude CLI + const { getClaudeAuthIndicators } = await import('@automaker/platform'); + const indicators = await getClaudeAuthIndicators(); + if (indicators.credentials?.hasOAuthToken) { + authType = 'oauth'; + } else { + authType = 'cli'; + } + } + } + res.json({ success: true, authenticated, + authType, error: errorMessage || undefined, }); } catch (error) { diff --git a/apps/ui/src/components/dialogs/sandbox-risk-dialog.tsx b/apps/ui/src/components/dialogs/sandbox-risk-dialog.tsx index 3a5f6d35..7b597c8c 100644 --- a/apps/ui/src/components/dialogs/sandbox-risk-dialog.tsx +++ b/apps/ui/src/components/dialogs/sandbox-risk-dialog.tsx @@ -69,6 +69,29 @@ export function SandboxRiskDialog({ open, onConfirm, onDeny }: SandboxRiskDialog For safer operation, consider running Automaker in Docker. See the README for instructions.

+ +
+

+ Already running in Docker? Try these troubleshooting steps: +

+
    +
  • + Ensure IS_CONTAINERIZED=true is + set in your docker-compose environment +
  • +
  • + Verify the server container has the environment variable:{' '} + + docker exec automaker-server printenv IS_CONTAINERIZED + +
  • +
  • Rebuild and restart containers if you recently changed the configuration
  • +
  • + Check the server logs for startup messages:{' '} + docker-compose logs server +
  • +
+
diff --git a/apps/ui/src/components/views/setup-view/steps/claude-setup-step.tsx b/apps/ui/src/components/views/setup-view/steps/claude-setup-step.tsx index b864bfdb..0b4799d6 100644 --- a/apps/ui/src/components/views/setup-view/steps/claude-setup-step.tsx +++ b/apps/ui/src/components/views/setup-view/steps/claude-setup-step.tsx @@ -59,6 +59,7 @@ export function ClaudeSetupStep({ onNext, onBack, onSkip }: ClaudeSetupStepProps // CLI Verification state const [cliVerificationStatus, setCliVerificationStatus] = useState('idle'); const [cliVerificationError, setCliVerificationError] = useState(null); + const [cliAuthType, setCliAuthType] = useState<'oauth' | 'cli' | null>(null); // API Key Verification state const [apiKeyVerificationStatus, setApiKeyVerificationStatus] = @@ -119,6 +120,7 @@ export function ClaudeSetupStep({ onNext, onBack, onSkip }: ClaudeSetupStepProps const verifyCliAuth = useCallback(async () => { setCliVerificationStatus('verifying'); setCliVerificationError(null); + setCliAuthType(null); try { const api = getElectronAPI(); @@ -138,12 +140,21 @@ export function ClaudeSetupStep({ onNext, onBack, onSkip }: ClaudeSetupStepProps if (result.authenticated && !hasLimitReachedError) { setCliVerificationStatus('verified'); + // Store the auth type for displaying specific success message + const authType = result.authType === 'oauth' ? 'oauth' : 'cli'; + setCliAuthType(authType); setClaudeAuthStatus({ authenticated: true, - method: 'cli_authenticated', + method: authType === 'oauth' ? 'oauth_token' : 'cli_authenticated', hasCredentialsFile: claudeAuthStatus?.hasCredentialsFile || false, + oauthTokenValid: authType === 'oauth', }); - toast.success('Claude CLI authentication verified!'); + // Show specific success message based on auth type + if (authType === 'oauth') { + toast.success('Claude Code subscription detected and verified!'); + } else { + toast.success('Claude CLI authentication verified!'); + } } else { setCliVerificationStatus('error'); setCliVerificationError( @@ -436,9 +447,15 @@ export function ClaudeSetupStep({ onNext, onBack, onSkip }: ClaudeSetupStepProps
-

CLI Authentication verified!

+

+ {cliAuthType === 'oauth' + ? 'Claude Code subscription verified!' + : 'CLI Authentication verified!'} +

- Your Claude CLI is working correctly. + {cliAuthType === 'oauth' + ? 'Your Claude Code subscription is active and ready to use.' + : 'Your Claude CLI is working correctly.'}

diff --git a/apps/ui/src/lib/electron.ts b/apps/ui/src/lib/electron.ts index 89aa07ba..22079822 100644 --- a/apps/ui/src/lib/electron.ts +++ b/apps/ui/src/lib/electron.ts @@ -1442,6 +1442,7 @@ interface SetupAPI { verifyClaudeAuth: (authMethod?: 'cli' | 'api_key') => Promise<{ success: boolean; authenticated: boolean; + authType?: 'oauth' | 'api_key' | 'cli'; error?: string; }>; getGhStatus?: () => Promise<{ diff --git a/apps/ui/src/lib/http-api-client.ts b/apps/ui/src/lib/http-api-client.ts index 1f79ff07..acd75d22 100644 --- a/apps/ui/src/lib/http-api-client.ts +++ b/apps/ui/src/lib/http-api-client.ts @@ -1350,6 +1350,7 @@ export class HttpApiClient implements ElectronAPI { ): Promise<{ success: boolean; authenticated: boolean; + authType?: 'oauth' | 'api_key' | 'cli'; error?: string; }> => this.post('/api/setup/verify-claude-auth', { authMethod, apiKey }), diff --git a/libs/platform/src/index.ts b/libs/platform/src/index.ts index 5952ba2d..5c0b8078 100644 --- a/libs/platform/src/index.ts +++ b/libs/platform/src/index.ts @@ -134,6 +134,8 @@ export { findClaudeCliPath, getClaudeAuthIndicators, type ClaudeAuthIndicators, + type FileCheckResult, + type DirectoryCheckResult, findCodexCliPath, getCodexAuthIndicators, type CodexAuthIndicators, diff --git a/libs/platform/src/system-paths.ts b/libs/platform/src/system-paths.ts index 0d900dfa..fb5e6bd3 100644 --- a/libs/platform/src/system-paths.ts +++ b/libs/platform/src/system-paths.ts @@ -976,6 +976,27 @@ export async function findGitBashPath(): Promise { return findFirstExistingPath(getGitBashPaths()); } +/** + * Details about a file check performed during auth detection + */ +export interface FileCheckResult { + path: string; + exists: boolean; + readable: boolean; + error?: string; +} + +/** + * Details about a directory check performed during auth detection + */ +export interface DirectoryCheckResult { + path: string; + exists: boolean; + readable: boolean; + entryCount: number; + error?: string; +} + /** * Get Claude authentication status by checking various indicators */ @@ -988,67 +1009,144 @@ export interface ClaudeAuthIndicators { hasOAuthToken: boolean; hasApiKey: boolean; } | null; + /** Detailed information about what was checked */ + checks: { + settingsFile: FileCheckResult; + statsCache: FileCheckResult & { hasDailyActivity?: boolean }; + projectsDir: DirectoryCheckResult; + credentialFiles: FileCheckResult[]; + }; } export async function getClaudeAuthIndicators(): Promise { + const settingsPath = getClaudeSettingsPath(); + const statsCachePath = getClaudeStatsCachePath(); + const projectsDir = getClaudeProjectsDir(); + const credentialPaths = getClaudeCredentialPaths(); + + // Initialize checks with paths + const settingsFileCheck: FileCheckResult = { + path: settingsPath, + exists: false, + readable: false, + }; + + const statsCacheCheck: FileCheckResult & { hasDailyActivity?: boolean } = { + path: statsCachePath, + exists: false, + readable: false, + }; + + const projectsDirCheck: DirectoryCheckResult = { + path: projectsDir, + exists: false, + readable: false, + entryCount: 0, + }; + + const credentialFileChecks: FileCheckResult[] = credentialPaths.map((p) => ({ + path: p, + exists: false, + readable: false, + })); + const result: ClaudeAuthIndicators = { hasCredentialsFile: false, hasSettingsFile: false, hasStatsCacheWithActivity: false, hasProjectsSessions: false, credentials: null, + checks: { + settingsFile: settingsFileCheck, + statsCache: statsCacheCheck, + projectsDir: projectsDirCheck, + credentialFiles: credentialFileChecks, + }, }; // Check settings file try { - if (await systemPathAccess(getClaudeSettingsPath())) { + if (await systemPathAccess(settingsPath)) { + settingsFileCheck.exists = true; + settingsFileCheck.readable = true; result.hasSettingsFile = true; } - } catch { - // Ignore errors + } catch (err) { + settingsFileCheck.error = err instanceof Error ? err.message : String(err); } // Check stats cache for recent activity try { - const statsContent = await systemPathReadFile(getClaudeStatsCachePath()); - const stats = JSON.parse(statsContent); - if (stats.dailyActivity && stats.dailyActivity.length > 0) { - result.hasStatsCacheWithActivity = true; + const statsContent = await systemPathReadFile(statsCachePath); + statsCacheCheck.exists = true; + statsCacheCheck.readable = true; + try { + const stats = JSON.parse(statsContent); + if (stats.dailyActivity && stats.dailyActivity.length > 0) { + statsCacheCheck.hasDailyActivity = true; + result.hasStatsCacheWithActivity = true; + } else { + statsCacheCheck.hasDailyActivity = false; + } + } catch (parseErr) { + statsCacheCheck.error = `JSON parse error: ${parseErr instanceof Error ? parseErr.message : String(parseErr)}`; + } + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + statsCacheCheck.exists = false; + } else { + statsCacheCheck.error = err instanceof Error ? err.message : String(err); } - } catch { - // Ignore errors } // Check for sessions in projects directory try { - const sessions = await systemPathReaddir(getClaudeProjectsDir()); + const sessions = await systemPathReaddir(projectsDir); + projectsDirCheck.exists = true; + projectsDirCheck.readable = true; + projectsDirCheck.entryCount = sessions.length; if (sessions.length > 0) { result.hasProjectsSessions = true; } - } catch { - // Ignore errors + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + projectsDirCheck.exists = false; + } else { + projectsDirCheck.error = err instanceof Error ? err.message : String(err); + } } // Check credentials files - const credentialPaths = getClaudeCredentialPaths(); - for (const credPath of credentialPaths) { + for (let i = 0; i < credentialPaths.length; i++) { + const credPath = credentialPaths[i]; + const credCheck = credentialFileChecks[i]; try { const content = await systemPathReadFile(credPath); - const credentials = JSON.parse(content); - result.hasCredentialsFile = true; - // Support multiple credential formats: - // 1. Claude Code CLI format: { claudeAiOauth: { accessToken, refreshToken } } - // 2. Legacy format: { oauth_token } or { access_token } - // 3. API key format: { api_key } - const hasClaudeOauth = !!credentials.claudeAiOauth?.accessToken; - const hasLegacyOauth = !!(credentials.oauth_token || credentials.access_token); - result.credentials = { - hasOAuthToken: hasClaudeOauth || hasLegacyOauth, - hasApiKey: !!credentials.api_key, - }; - break; - } catch { - // Continue to next path + credCheck.exists = true; + credCheck.readable = true; + try { + const credentials = JSON.parse(content); + result.hasCredentialsFile = true; + // Support multiple credential formats: + // 1. Claude Code CLI format: { claudeAiOauth: { accessToken, refreshToken } } + // 2. Legacy format: { oauth_token } or { access_token } + // 3. API key format: { api_key } + const hasClaudeOauth = !!credentials.claudeAiOauth?.accessToken; + const hasLegacyOauth = !!(credentials.oauth_token || credentials.access_token); + result.credentials = { + hasOAuthToken: hasClaudeOauth || hasLegacyOauth, + hasApiKey: !!credentials.api_key, + }; + break; + } catch (parseErr) { + credCheck.error = `JSON parse error: ${parseErr instanceof Error ? parseErr.message : String(parseErr)}`; + } + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') { + credCheck.exists = false; + } else { + credCheck.error = err instanceof Error ? err.message : String(err); + } } } diff --git a/libs/platform/tests/oauth-credential-detection.test.ts b/libs/platform/tests/oauth-credential-detection.test.ts new file mode 100644 index 00000000..cf5a4705 --- /dev/null +++ b/libs/platform/tests/oauth-credential-detection.test.ts @@ -0,0 +1,736 @@ +/** + * Unit tests for OAuth credential detection scenarios + * + * Tests the various Claude credential detection formats including: + * - Claude Code CLI OAuth format (claudeAiOauth) + * - Legacy OAuth token format (oauth_token, access_token) + * - API key format (api_key) + * - Invalid/malformed credential files + * + * These tests use real temp directories to avoid complex fs mocking issues. + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import fs from 'fs/promises'; +import path from 'path'; +import os from 'os'; + +describe('OAuth Credential Detection', () => { + let tempDir: string; + let originalHomedir: () => string; + let mockClaudeDir: string; + let mockCodexDir: string; + let mockOpenCodeDir: string; + + beforeEach(async () => { + // Reset modules to get fresh state + vi.resetModules(); + + // Create a temporary directory + tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'oauth-detection-test-')); + + // Create mock home directory structure + mockClaudeDir = path.join(tempDir, '.claude'); + mockCodexDir = path.join(tempDir, '.codex'); + mockOpenCodeDir = path.join(tempDir, '.local', 'share', 'opencode'); + + await fs.mkdir(mockClaudeDir, { recursive: true }); + await fs.mkdir(mockCodexDir, { recursive: true }); + await fs.mkdir(mockOpenCodeDir, { recursive: true }); + + // Mock os.homedir to return our temp directory + originalHomedir = os.homedir; + vi.spyOn(os, 'homedir').mockReturnValue(tempDir); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + // Clean up temp directory + try { + await fs.rm(tempDir, { recursive: true, force: true }); + } catch { + // Ignore cleanup errors + } + }); + + describe('getClaudeAuthIndicators', () => { + it('should detect Claude Code CLI OAuth format (claudeAiOauth)', async () => { + const credentialsContent = JSON.stringify({ + claudeAiOauth: { + accessToken: 'oauth-access-token-12345', + refreshToken: 'oauth-refresh-token-67890', + expiresAt: Date.now() + 3600000, + }, + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), credentialsContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials).not.toBeNull(); + expect(indicators.credentials?.hasOAuthToken).toBe(true); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + + it('should detect legacy OAuth token format (oauth_token)', async () => { + const credentialsContent = JSON.stringify({ + oauth_token: 'legacy-oauth-token-abcdef', + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), credentialsContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(true); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + + it('should detect legacy access_token format', async () => { + const credentialsContent = JSON.stringify({ + access_token: 'legacy-access-token-xyz', + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), credentialsContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(true); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + + it('should detect API key format', async () => { + const credentialsContent = JSON.stringify({ + api_key: 'sk-ant-api03-xxxxxxxxxxxx', + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), credentialsContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(false); + expect(indicators.credentials?.hasApiKey).toBe(true); + }); + + it('should detect both OAuth and API key when present', async () => { + const credentialsContent = JSON.stringify({ + claudeAiOauth: { + accessToken: 'oauth-token', + refreshToken: 'refresh-token', + }, + api_key: 'sk-ant-api03-xxxxxxxxxxxx', + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), credentialsContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(true); + expect(indicators.credentials?.hasApiKey).toBe(true); + }); + + it('should handle missing credentials file gracefully', async () => { + // No credentials file created + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(false); + expect(indicators.credentials).toBeNull(); + expect(indicators.checks.credentialFiles).toBeDefined(); + expect(indicators.checks.credentialFiles.length).toBeGreaterThan(0); + expect(indicators.checks.credentialFiles[0].exists).toBe(false); + }); + + it('should handle malformed JSON in credentials file', async () => { + const malformedContent = '{ invalid json }'; + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), malformedContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + // File exists but parsing fails + expect(indicators.hasCredentialsFile).toBe(false); + expect(indicators.credentials).toBeNull(); + expect(indicators.checks.credentialFiles[0].exists).toBe(true); + expect(indicators.checks.credentialFiles[0].error).toContain('JSON parse error'); + }); + + it('should handle empty credentials file', async () => { + const emptyContent = JSON.stringify({}); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), emptyContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials).not.toBeNull(); + expect(indicators.credentials?.hasOAuthToken).toBe(false); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + + it('should handle credentials file with null values', async () => { + const nullContent = JSON.stringify({ + claudeAiOauth: null, + api_key: null, + oauth_token: null, + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), nullContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(false); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + + it('should handle credentials with empty string values', async () => { + const emptyStrings = JSON.stringify({ + claudeAiOauth: { + accessToken: '', + refreshToken: '', + }, + api_key: '', + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), emptyStrings); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + // Empty strings should not be treated as valid credentials + expect(indicators.credentials?.hasOAuthToken).toBe(false); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + + it('should detect settings file presence', async () => { + await fs.writeFile( + path.join(mockClaudeDir, 'settings.json'), + JSON.stringify({ theme: 'dark' }) + ); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasSettingsFile).toBe(true); + expect(indicators.checks.settingsFile.exists).toBe(true); + expect(indicators.checks.settingsFile.readable).toBe(true); + }); + + it('should detect stats cache with activity', async () => { + const statsContent = JSON.stringify({ + dailyActivity: [ + { date: '2025-01-15', messagesCount: 10 }, + { date: '2025-01-16', messagesCount: 5 }, + ], + }); + + await fs.writeFile(path.join(mockClaudeDir, 'stats-cache.json'), statsContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasStatsCacheWithActivity).toBe(true); + expect(indicators.checks.statsCache.exists).toBe(true); + expect(indicators.checks.statsCache.hasDailyActivity).toBe(true); + }); + + it('should detect stats cache without activity', async () => { + const statsContent = JSON.stringify({ + dailyActivity: [], + }); + + await fs.writeFile(path.join(mockClaudeDir, 'stats-cache.json'), statsContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasStatsCacheWithActivity).toBe(false); + expect(indicators.checks.statsCache.exists).toBe(true); + expect(indicators.checks.statsCache.hasDailyActivity).toBe(false); + }); + + it('should detect project sessions', async () => { + const projectsDir = path.join(mockClaudeDir, 'projects'); + await fs.mkdir(projectsDir, { recursive: true }); + await fs.mkdir(path.join(projectsDir, 'session-1')); + await fs.mkdir(path.join(projectsDir, 'session-2')); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasProjectsSessions).toBe(true); + expect(indicators.checks.projectsDir.exists).toBe(true); + expect(indicators.checks.projectsDir.entryCount).toBe(2); + }); + + it('should return comprehensive check details', async () => { + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + // Verify all check detail objects are present + expect(indicators.checks).toBeDefined(); + expect(indicators.checks.settingsFile).toBeDefined(); + expect(indicators.checks.settingsFile.path).toContain('settings.json'); + expect(indicators.checks.statsCache).toBeDefined(); + expect(indicators.checks.statsCache.path).toContain('stats-cache.json'); + expect(indicators.checks.projectsDir).toBeDefined(); + expect(indicators.checks.projectsDir.path).toContain('projects'); + expect(indicators.checks.credentialFiles).toBeDefined(); + expect(Array.isArray(indicators.checks.credentialFiles)).toBe(true); + }); + + it('should try both .credentials.json and credentials.json paths', async () => { + // Write to credentials.json (without leading dot) + const credentialsContent = JSON.stringify({ + api_key: 'sk-test-key', + }); + + await fs.writeFile(path.join(mockClaudeDir, 'credentials.json'), credentialsContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + // Should find credentials in the second path + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasApiKey).toBe(true); + }); + + it('should prefer first credentials file if both exist', async () => { + // Write OAuth to .credentials.json (first path checked) + await fs.writeFile( + path.join(mockClaudeDir, '.credentials.json'), + JSON.stringify({ + claudeAiOauth: { + accessToken: 'oauth-token', + refreshToken: 'refresh-token', + }, + }) + ); + + // Write API key to credentials.json (second path) + await fs.writeFile( + path.join(mockClaudeDir, 'credentials.json'), + JSON.stringify({ + api_key: 'sk-test-key', + }) + ); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + // Should use first file (.credentials.json) which has OAuth + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(true); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + }); + + describe('getCodexAuthIndicators', () => { + it('should detect OAuth token in Codex auth file', async () => { + const authContent = JSON.stringify({ + access_token: 'codex-oauth-token-12345', + }); + + await fs.writeFile(path.join(mockCodexDir, 'auth.json'), authContent); + + const { getCodexAuthIndicators } = await import('../src/system-paths'); + const indicators = await getCodexAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(true); + expect(indicators.hasApiKey).toBe(false); + }); + + it('should detect API key in Codex auth file', async () => { + const authContent = JSON.stringify({ + OPENAI_API_KEY: 'sk-xxxxxxxxxxxxxxxx', + }); + + await fs.writeFile(path.join(mockCodexDir, 'auth.json'), authContent); + + const { getCodexAuthIndicators } = await import('../src/system-paths'); + const indicators = await getCodexAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(false); + expect(indicators.hasApiKey).toBe(true); + }); + + it('should detect nested tokens in Codex auth file', async () => { + const authContent = JSON.stringify({ + tokens: { + oauth_token: 'nested-oauth-token', + }, + }); + + await fs.writeFile(path.join(mockCodexDir, 'auth.json'), authContent); + + const { getCodexAuthIndicators } = await import('../src/system-paths'); + const indicators = await getCodexAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(true); + }); + + it('should handle missing Codex auth file', async () => { + // No auth file created + const { getCodexAuthIndicators } = await import('../src/system-paths'); + const indicators = await getCodexAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(false); + expect(indicators.hasOAuthToken).toBe(false); + expect(indicators.hasApiKey).toBe(false); + }); + + it('should detect api_key field in Codex auth', async () => { + const authContent = JSON.stringify({ + api_key: 'sk-api-key-value', + }); + + await fs.writeFile(path.join(mockCodexDir, 'auth.json'), authContent); + + const { getCodexAuthIndicators } = await import('../src/system-paths'); + const indicators = await getCodexAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasApiKey).toBe(true); + }); + }); + + describe('getOpenCodeAuthIndicators', () => { + it('should detect provider-specific OAuth credentials', async () => { + const authContent = JSON.stringify({ + anthropic: { + type: 'oauth', + access: 'oauth-access-token', + refresh: 'oauth-refresh-token', + }, + }); + + await fs.writeFile(path.join(mockOpenCodeDir, 'auth.json'), authContent); + + const { getOpenCodeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getOpenCodeAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(true); + expect(indicators.hasApiKey).toBe(false); + }); + + it('should detect GitHub Copilot refresh token as OAuth', async () => { + const authContent = JSON.stringify({ + 'github-copilot': { + type: 'oauth', + access: '', // Empty access token + refresh: 'gh-refresh-token', // But has refresh token + }, + }); + + await fs.writeFile(path.join(mockOpenCodeDir, 'auth.json'), authContent); + + const { getOpenCodeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getOpenCodeAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(true); + }); + + it('should detect provider-specific API key credentials', async () => { + const authContent = JSON.stringify({ + openai: { + type: 'api_key', + key: 'sk-xxxxxxxxxxxx', + }, + }); + + await fs.writeFile(path.join(mockOpenCodeDir, 'auth.json'), authContent); + + const { getOpenCodeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getOpenCodeAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(false); + expect(indicators.hasApiKey).toBe(true); + }); + + it('should detect multiple providers', async () => { + const authContent = JSON.stringify({ + anthropic: { + type: 'oauth', + access: 'anthropic-token', + refresh: 'refresh-token', + }, + openai: { + type: 'api_key', + key: 'sk-xxxxxxxxxxxx', + }, + }); + + await fs.writeFile(path.join(mockOpenCodeDir, 'auth.json'), authContent); + + const { getOpenCodeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getOpenCodeAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(true); + expect(indicators.hasApiKey).toBe(true); + }); + + it('should handle missing OpenCode auth file', async () => { + // No auth file created + const { getOpenCodeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getOpenCodeAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(false); + expect(indicators.hasOAuthToken).toBe(false); + expect(indicators.hasApiKey).toBe(false); + }); + + it('should handle legacy top-level OAuth keys', async () => { + const authContent = JSON.stringify({ + access_token: 'legacy-access-token', + }); + + await fs.writeFile(path.join(mockOpenCodeDir, 'auth.json'), authContent); + + const { getOpenCodeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getOpenCodeAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(true); + }); + + it('should detect copilot provider OAuth', async () => { + const authContent = JSON.stringify({ + copilot: { + type: 'oauth', + access: 'copilot-access-token', + refresh: 'copilot-refresh-token', + }, + }); + + await fs.writeFile(path.join(mockOpenCodeDir, 'auth.json'), authContent); + + const { getOpenCodeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getOpenCodeAuthIndicators(); + + expect(indicators.hasAuthFile).toBe(true); + expect(indicators.hasOAuthToken).toBe(true); + }); + }); + + describe('Credential path helpers', () => { + it('should return correct Claude credential paths', async () => { + const { getClaudeCredentialPaths, getClaudeConfigDir } = await import('../src/system-paths'); + + const configDir = getClaudeConfigDir(); + expect(configDir).toContain('.claude'); + + const credPaths = getClaudeCredentialPaths(); + expect(credPaths.length).toBeGreaterThan(0); + expect(credPaths.some((p) => p.includes('.credentials.json'))).toBe(true); + expect(credPaths.some((p) => p.includes('credentials.json'))).toBe(true); + }); + + it('should return correct Codex auth path', async () => { + const { getCodexAuthPath, getCodexConfigDir } = await import('../src/system-paths'); + + const configDir = getCodexConfigDir(); + expect(configDir).toContain('.codex'); + + const authPath = getCodexAuthPath(); + expect(authPath).toContain('.codex'); + expect(authPath).toContain('auth.json'); + }); + + it('should return correct OpenCode auth path', async () => { + const { getOpenCodeAuthPath, getOpenCodeConfigDir } = await import('../src/system-paths'); + + const configDir = getOpenCodeConfigDir(); + expect(configDir).toContain('opencode'); + + const authPath = getOpenCodeAuthPath(); + expect(authPath).toContain('opencode'); + expect(authPath).toContain('auth.json'); + }); + }); + + describe('Edge cases for credential detection', () => { + it('should handle credentials file with unexpected structure', async () => { + const unexpectedContent = JSON.stringify({ + someUnexpectedKey: 'value', + nested: { + deeply: { + unexpected: true, + }, + }, + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), unexpectedContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(false); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + + it('should handle array instead of object in credentials', async () => { + const arrayContent = JSON.stringify(['token1', 'token2']); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), arrayContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + // Array is valid JSON but wrong structure - should handle gracefully + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(false); + expect(indicators.credentials?.hasApiKey).toBe(false); + }); + + it('should handle numeric values in credential fields', async () => { + const numericContent = JSON.stringify({ + api_key: 12345, + oauth_token: 67890, + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), numericContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + // Note: Current implementation uses JavaScript truthiness which accepts numbers + // This documents the actual behavior - ideally would validate string type + expect(indicators.hasCredentialsFile).toBe(true); + // The implementation checks truthiness, not strict string type + expect(indicators.credentials?.hasOAuthToken).toBe(true); + expect(indicators.credentials?.hasApiKey).toBe(true); + }); + + it('should handle boolean values in credential fields', async () => { + const booleanContent = JSON.stringify({ + api_key: true, + oauth_token: false, + }); + + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), booleanContent); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + // Note: Current implementation uses JavaScript truthiness + // api_key: true is truthy, oauth_token: false is falsy + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(false); // false is falsy + expect(indicators.credentials?.hasApiKey).toBe(true); // true is truthy + }); + + it('should handle malformed stats-cache.json gracefully', async () => { + await fs.writeFile(path.join(mockClaudeDir, 'stats-cache.json'), '{ invalid json }'); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasStatsCacheWithActivity).toBe(false); + expect(indicators.checks.statsCache.exists).toBe(true); + expect(indicators.checks.statsCache.error).toBeDefined(); + }); + + it('should handle empty projects directory', async () => { + const projectsDir = path.join(mockClaudeDir, 'projects'); + await fs.mkdir(projectsDir, { recursive: true }); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasProjectsSessions).toBe(false); + expect(indicators.checks.projectsDir.exists).toBe(true); + expect(indicators.checks.projectsDir.entryCount).toBe(0); + }); + }); + + describe('Combined authentication scenarios', () => { + it('should detect CLI authenticated state with settings + sessions', async () => { + // Create settings file + await fs.writeFile( + path.join(mockClaudeDir, 'settings.json'), + JSON.stringify({ theme: 'dark' }) + ); + + // Create projects directory with sessions + const projectsDir = path.join(mockClaudeDir, 'projects'); + await fs.mkdir(projectsDir, { recursive: true }); + await fs.mkdir(path.join(projectsDir, 'session-1')); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasSettingsFile).toBe(true); + expect(indicators.hasProjectsSessions).toBe(true); + }); + + it('should detect recent activity indicating working auth', async () => { + // Create stats cache with recent activity + await fs.writeFile( + path.join(mockClaudeDir, 'stats-cache.json'), + JSON.stringify({ + dailyActivity: [{ date: new Date().toISOString().split('T')[0], messagesCount: 10 }], + }) + ); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasStatsCacheWithActivity).toBe(true); + }); + + it('should handle complete auth setup', async () => { + // Create all auth indicators + await fs.writeFile( + path.join(mockClaudeDir, '.credentials.json'), + JSON.stringify({ + claudeAiOauth: { + accessToken: 'token', + refreshToken: 'refresh', + }, + }) + ); + await fs.writeFile( + path.join(mockClaudeDir, 'settings.json'), + JSON.stringify({ theme: 'dark' }) + ); + await fs.writeFile( + path.join(mockClaudeDir, 'stats-cache.json'), + JSON.stringify({ dailyActivity: [{ date: '2025-01-15', messagesCount: 5 }] }) + ); + const projectsDir = path.join(mockClaudeDir, 'projects'); + await fs.mkdir(projectsDir, { recursive: true }); + await fs.mkdir(path.join(projectsDir, 'session-1')); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.hasSettingsFile).toBe(true); + expect(indicators.hasStatsCacheWithActivity).toBe(true); + expect(indicators.hasProjectsSessions).toBe(true); + expect(indicators.credentials?.hasOAuthToken).toBe(true); + }); + }); +}); From 0aef72540e5f9eff7919f5b2157ce7c5f1141204 Mon Sep 17 00:00:00 2001 From: Kacper Date: Mon, 2 Feb 2026 17:54:23 +0100 Subject: [PATCH 05/89] fix(auth): Enhance credential detection logic for OAuth - Updated getClaudeAuthIndicators() to ensure that empty or token-less credential files do not prevent the detection of valid credentials in subsequent paths. - Improved error handling for settings file readability checks, providing clearer feedback on file access issues. - Added unit tests to validate the new behavior, ensuring that the system continues to check all credential paths even when some files are empty or invalid. This change improves the robustness of the credential detection process and enhances user experience by allowing for more flexible credential management. --- libs/platform/src/system-paths.ts | 37 ++++++++--- .../tests/oauth-credential-detection.test.ts | 61 +++++++++++++------ 2 files changed, 72 insertions(+), 26 deletions(-) diff --git a/libs/platform/src/system-paths.ts b/libs/platform/src/system-paths.ts index fb5e6bd3..f1749464 100644 --- a/libs/platform/src/system-paths.ts +++ b/libs/platform/src/system-paths.ts @@ -1065,11 +1065,20 @@ export async function getClaudeAuthIndicators(): Promise { }; // Check settings file + // First check existence, then try to read to confirm it's actually readable try { if (await systemPathAccess(settingsPath)) { settingsFileCheck.exists = true; - settingsFileCheck.readable = true; - result.hasSettingsFile = true; + // Try to actually read the file to confirm read permissions + try { + await systemPathReadFile(settingsPath); + settingsFileCheck.readable = true; + result.hasSettingsFile = true; + } catch (readErr) { + // File exists but cannot be read (permission denied, etc.) + settingsFileCheck.readable = false; + settingsFileCheck.error = `Cannot read: ${readErr instanceof Error ? readErr.message : String(readErr)}`; + } } } catch (err) { settingsFileCheck.error = err instanceof Error ? err.message : String(err); @@ -1117,6 +1126,9 @@ export async function getClaudeAuthIndicators(): Promise { } // Check credentials files + // We iterate through all credential paths and only stop when we find a file + // that contains actual credentials (OAuth tokens or API keys). An empty or + // token-less file should not prevent checking subsequent credential paths. for (let i = 0; i < credentialPaths.length; i++) { const credPath = credentialPaths[i]; const credCheck = credentialFileChecks[i]; @@ -1126,18 +1138,27 @@ export async function getClaudeAuthIndicators(): Promise { credCheck.readable = true; try { const credentials = JSON.parse(content); - result.hasCredentialsFile = true; // Support multiple credential formats: // 1. Claude Code CLI format: { claudeAiOauth: { accessToken, refreshToken } } // 2. Legacy format: { oauth_token } or { access_token } // 3. API key format: { api_key } const hasClaudeOauth = !!credentials.claudeAiOauth?.accessToken; const hasLegacyOauth = !!(credentials.oauth_token || credentials.access_token); - result.credentials = { - hasOAuthToken: hasClaudeOauth || hasLegacyOauth, - hasApiKey: !!credentials.api_key, - }; - break; + const hasOAuthToken = hasClaudeOauth || hasLegacyOauth; + const hasApiKey = !!credentials.api_key; + + // Only consider this a valid credentials file if it actually contains tokens + // An empty JSON file ({}) or file without tokens should not stop us from + // checking subsequent credential paths + if (hasOAuthToken || hasApiKey) { + result.hasCredentialsFile = true; + result.credentials = { + hasOAuthToken, + hasApiKey, + }; + break; // Found valid credentials, stop searching + } + // File exists and is valid JSON but contains no tokens - continue checking other paths } catch (parseErr) { credCheck.error = `JSON parse error: ${parseErr instanceof Error ? parseErr.message : String(parseErr)}`; } diff --git a/libs/platform/tests/oauth-credential-detection.test.ts b/libs/platform/tests/oauth-credential-detection.test.ts index cf5a4705..6e445b22 100644 --- a/libs/platform/tests/oauth-credential-detection.test.ts +++ b/libs/platform/tests/oauth-credential-detection.test.ts @@ -173,10 +173,14 @@ describe('OAuth Credential Detection', () => { const { getClaudeAuthIndicators } = await import('../src/system-paths'); const indicators = await getClaudeAuthIndicators(); - expect(indicators.hasCredentialsFile).toBe(true); - expect(indicators.credentials).not.toBeNull(); - expect(indicators.credentials?.hasOAuthToken).toBe(false); - expect(indicators.credentials?.hasApiKey).toBe(false); + // Empty credentials file ({}) should NOT be treated as having credentials + // because it contains no actual tokens. This allows the system to continue + // checking subsequent credential paths that might have valid tokens. + expect(indicators.hasCredentialsFile).toBe(false); + expect(indicators.credentials).toBeNull(); + // But the file should still show as existing and readable in the checks + expect(indicators.checks.credentialFiles[0].exists).toBe(true); + expect(indicators.checks.credentialFiles[0].readable).toBe(true); }); it('should handle credentials file with null values', async () => { @@ -191,9 +195,10 @@ describe('OAuth Credential Detection', () => { const { getClaudeAuthIndicators } = await import('../src/system-paths'); const indicators = await getClaudeAuthIndicators(); - expect(indicators.hasCredentialsFile).toBe(true); - expect(indicators.credentials?.hasOAuthToken).toBe(false); - expect(indicators.credentials?.hasApiKey).toBe(false); + // File with all null values should NOT be treated as having credentials + // because null values are not valid tokens + expect(indicators.hasCredentialsFile).toBe(false); + expect(indicators.credentials).toBeNull(); }); it('should handle credentials with empty string values', async () => { @@ -210,10 +215,10 @@ describe('OAuth Credential Detection', () => { const { getClaudeAuthIndicators } = await import('../src/system-paths'); const indicators = await getClaudeAuthIndicators(); - expect(indicators.hasCredentialsFile).toBe(true); - // Empty strings should not be treated as valid credentials - expect(indicators.credentials?.hasOAuthToken).toBe(false); - expect(indicators.credentials?.hasApiKey).toBe(false); + // Empty strings should NOT be treated as having credentials + // This allows checking subsequent credential paths for valid tokens + expect(indicators.hasCredentialsFile).toBe(false); + expect(indicators.credentials).toBeNull(); }); it('should detect settings file presence', async () => { @@ -337,6 +342,27 @@ describe('OAuth Credential Detection', () => { expect(indicators.credentials?.hasOAuthToken).toBe(true); expect(indicators.credentials?.hasApiKey).toBe(false); }); + + it('should check second credentials file if first file has no tokens', async () => { + // Write empty/token-less content to .credentials.json (first path checked) + // This tests the bug fix: previously, an empty JSON file would stop the search + await fs.writeFile(path.join(mockClaudeDir, '.credentials.json'), JSON.stringify({})); + + // Write actual credentials to credentials.json (second path) + await fs.writeFile( + path.join(mockClaudeDir, 'credentials.json'), + JSON.stringify({ + api_key: 'sk-test-key-from-second-file', + }) + ); + + const { getClaudeAuthIndicators } = await import('../src/system-paths'); + const indicators = await getClaudeAuthIndicators(); + + // Should find credentials in second file since first file has no tokens + expect(indicators.hasCredentialsFile).toBe(true); + expect(indicators.credentials?.hasApiKey).toBe(true); + }); }); describe('getCodexAuthIndicators', () => { @@ -585,9 +611,9 @@ describe('OAuth Credential Detection', () => { const { getClaudeAuthIndicators } = await import('../src/system-paths'); const indicators = await getClaudeAuthIndicators(); - expect(indicators.hasCredentialsFile).toBe(true); - expect(indicators.credentials?.hasOAuthToken).toBe(false); - expect(indicators.credentials?.hasApiKey).toBe(false); + // File with unexpected structure but no valid tokens should NOT be treated as having credentials + expect(indicators.hasCredentialsFile).toBe(false); + expect(indicators.credentials).toBeNull(); }); it('should handle array instead of object in credentials', async () => { @@ -598,10 +624,9 @@ describe('OAuth Credential Detection', () => { const { getClaudeAuthIndicators } = await import('../src/system-paths'); const indicators = await getClaudeAuthIndicators(); - // Array is valid JSON but wrong structure - should handle gracefully - expect(indicators.hasCredentialsFile).toBe(true); - expect(indicators.credentials?.hasOAuthToken).toBe(false); - expect(indicators.credentials?.hasApiKey).toBe(false); + // Array is valid JSON but wrong structure - no valid tokens, so not treated as credentials file + expect(indicators.hasCredentialsFile).toBe(false); + expect(indicators.credentials).toBeNull(); }); it('should handle numeric values in credential fields', async () => { From 3b361cb0b918b9e37b43c5136af68b7d91921a60 Mon Sep 17 00:00:00 2001 From: Kacper Date: Thu, 5 Feb 2026 22:17:55 +0100 Subject: [PATCH 06/89] chore: update Codex SDK to version 0.98.0 and add GPT-5.3-Codex model - Upgraded @openai/codex-sdk from version 0.77.0 to 0.98.0 in package-lock.json and package.json. - Introduced new model 'GPT-5.3-Codex' with enhanced capabilities in codex-models.ts and related files. - Updated descriptions for existing models to reflect their latest features and improvements. - Adjusted Codex model configuration and display to include the new model and its attributes. These changes enhance the Codex model offerings and ensure compatibility with the latest SDK version. --- apps/server/package.json | 2 +- apps/server/src/providers/codex-models.ts | 28 +++++++++++++------ .../providers/codex-model-configuration.tsx | 14 +++++++--- libs/platform/src/system-paths.ts | 24 ++++++++++++++-- libs/types/src/codex-models.ts | 16 ++++++++--- libs/types/src/model-display.ts | 17 ++++++++--- libs/types/src/model.ts | 11 +++++--- package-lock.json | 8 +++--- 8 files changed, 88 insertions(+), 32 deletions(-) diff --git a/apps/server/package.json b/apps/server/package.json index c9015aea..0b4deeac 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -34,7 +34,7 @@ "@automaker/utils": "1.0.0", "@github/copilot-sdk": "^0.1.16", "@modelcontextprotocol/sdk": "1.25.2", - "@openai/codex-sdk": "^0.77.0", + "@openai/codex-sdk": "^0.98.0", "cookie-parser": "1.4.7", "cors": "2.8.5", "dotenv": "17.2.3", diff --git a/apps/server/src/providers/codex-models.ts b/apps/server/src/providers/codex-models.ts index 141d5355..7840888b 100644 --- a/apps/server/src/providers/codex-models.ts +++ b/apps/server/src/providers/codex-models.ts @@ -19,12 +19,11 @@ const MAX_OUTPUT_16K = 16000; export const CODEX_MODELS: ModelDefinition[] = [ // ========== Recommended Codex Models ========== { - id: CODEX_MODEL_MAP.gpt52Codex, - name: 'GPT-5.2-Codex', - modelString: CODEX_MODEL_MAP.gpt52Codex, + id: CODEX_MODEL_MAP.gpt53Codex, + name: 'GPT-5.3-Codex', + modelString: CODEX_MODEL_MAP.gpt53Codex, provider: 'openai', - description: - 'Most advanced agentic coding model for complex software engineering (default for ChatGPT users).', + description: 'Latest frontier agentic coding model.', contextWindow: CONTEXT_WINDOW_256K, maxOutputTokens: MAX_OUTPUT_32K, supportsVision: true, @@ -33,12 +32,25 @@ export const CODEX_MODELS: ModelDefinition[] = [ default: true, hasReasoning: true, }, + { + id: CODEX_MODEL_MAP.gpt52Codex, + name: 'GPT-5.2-Codex', + modelString: CODEX_MODEL_MAP.gpt52Codex, + provider: 'openai', + description: 'Frontier agentic coding model.', + contextWindow: CONTEXT_WINDOW_256K, + maxOutputTokens: MAX_OUTPUT_32K, + supportsVision: true, + supportsTools: true, + tier: 'premium' as const, + hasReasoning: true, + }, { id: CODEX_MODEL_MAP.gpt51CodexMax, name: 'GPT-5.1-Codex-Max', modelString: CODEX_MODEL_MAP.gpt51CodexMax, provider: 'openai', - description: 'Optimized for long-horizon, agentic coding tasks in Codex.', + description: 'Codex-optimized flagship for deep and fast reasoning.', contextWindow: CONTEXT_WINDOW_256K, maxOutputTokens: MAX_OUTPUT_32K, supportsVision: true, @@ -51,7 +63,7 @@ export const CODEX_MODELS: ModelDefinition[] = [ name: 'GPT-5.1-Codex-Mini', modelString: CODEX_MODEL_MAP.gpt51CodexMini, provider: 'openai', - description: 'Smaller, more cost-effective version for faster workflows.', + description: 'Optimized for codex. Cheaper, faster, but less capable.', contextWindow: CONTEXT_WINDOW_128K, maxOutputTokens: MAX_OUTPUT_16K, supportsVision: true, @@ -66,7 +78,7 @@ export const CODEX_MODELS: ModelDefinition[] = [ name: 'GPT-5.2', modelString: CODEX_MODEL_MAP.gpt52, provider: 'openai', - description: 'Best general agentic model for tasks across industries and domains.', + description: 'Latest frontier model with improvements across knowledge, reasoning and coding.', contextWindow: CONTEXT_WINDOW_256K, maxOutputTokens: MAX_OUTPUT_32K, supportsVision: true, diff --git a/apps/ui/src/components/views/settings-view/providers/codex-model-configuration.tsx b/apps/ui/src/components/views/settings-view/providers/codex-model-configuration.tsx index a9d8c06e..de1d9555 100644 --- a/apps/ui/src/components/views/settings-view/providers/codex-model-configuration.tsx +++ b/apps/ui/src/components/views/settings-view/providers/codex-model-configuration.tsx @@ -27,25 +27,30 @@ interface CodexModelInfo { } const CODEX_MODEL_INFO: Record = { + 'codex-gpt-5.3-codex': { + id: 'codex-gpt-5.3-codex', + label: 'GPT-5.3-Codex', + description: 'Latest frontier agentic coding model', + }, 'codex-gpt-5.2-codex': { id: 'codex-gpt-5.2-codex', label: 'GPT-5.2-Codex', - description: 'Most advanced agentic coding model for complex software engineering', + description: 'Frontier agentic coding model', }, 'codex-gpt-5.1-codex-max': { id: 'codex-gpt-5.1-codex-max', label: 'GPT-5.1-Codex-Max', - description: 'Optimized for long-horizon, agentic coding tasks in Codex', + description: 'Codex-optimized flagship for deep and fast reasoning', }, 'codex-gpt-5.1-codex-mini': { id: 'codex-gpt-5.1-codex-mini', label: 'GPT-5.1-Codex-Mini', - description: 'Smaller, more cost-effective version for faster workflows', + description: 'Optimized for codex. Cheaper, faster, but less capable', }, 'codex-gpt-5.2': { id: 'codex-gpt-5.2', label: 'GPT-5.2', - description: 'Best general agentic model for tasks across industries and domains', + description: 'Latest frontier model with improvements across knowledge, reasoning and coding', }, 'codex-gpt-5.1': { id: 'codex-gpt-5.1', @@ -160,6 +165,7 @@ export function CodexModelConfiguration({ function supportsReasoningEffort(modelId: string): boolean { const reasoningModels = [ + 'codex-gpt-5.3-codex', 'codex-gpt-5.2-codex', 'codex-gpt-5.1-codex-max', 'codex-gpt-5.2', diff --git a/libs/platform/src/system-paths.ts b/libs/platform/src/system-paths.ts index 0d900dfa..ce1246eb 100644 --- a/libs/platform/src/system-paths.ts +++ b/libs/platform/src/system-paths.ts @@ -54,13 +54,19 @@ export function getClaudeCliPaths(): string[] { if (isWindows) { const appData = process.env.APPDATA || path.join(os.homedir(), 'AppData', 'Roaming'); - return [ + const nvmSymlink = process.env.NVM_SYMLINK; + const paths = [ path.join(os.homedir(), '.local', 'bin', 'claude.exe'), path.join(appData, 'npm', 'claude.cmd'), path.join(appData, 'npm', 'claude'), path.join(appData, '.npm-global', 'bin', 'claude.cmd'), path.join(appData, '.npm-global', 'bin', 'claude'), ]; + // nvm4w (NVM for Windows) symlink path + if (nvmSymlink) { + paths.push(path.join(nvmSymlink, 'claude.cmd'), path.join(nvmSymlink, 'claude')); + } + return paths; } return [ @@ -130,7 +136,8 @@ export function getCodexCliPaths(): string[] { if (isWindows) { const appData = process.env.APPDATA || path.join(homeDir, 'AppData', 'Roaming'); const localAppData = process.env.LOCALAPPDATA || path.join(homeDir, 'AppData', 'Local'); - return [ + const nvmSymlink = process.env.NVM_SYMLINK; + const paths = [ path.join(homeDir, '.local', 'bin', 'codex.exe'), path.join(appData, 'npm', 'codex.cmd'), path.join(appData, 'npm', 'codex'), @@ -142,6 +149,11 @@ export function getCodexCliPaths(): string[] { path.join(localAppData, 'pnpm', 'codex.cmd'), path.join(localAppData, 'pnpm', 'codex'), ]; + // nvm4w (NVM for Windows) symlink path + if (nvmSymlink) { + paths.push(path.join(nvmSymlink, 'codex.cmd'), path.join(nvmSymlink, 'codex')); + } + return paths; } // Include NVM bin paths for codex installed via npm global under NVM @@ -1126,7 +1138,8 @@ export function getOpenCodeCliPaths(): string[] { if (isWindows) { const appData = process.env.APPDATA || path.join(homeDir, 'AppData', 'Roaming'); const localAppData = process.env.LOCALAPPDATA || path.join(homeDir, 'AppData', 'Local'); - return [ + const nvmSymlink = process.env.NVM_SYMLINK; + const paths = [ // OpenCode's default installation directory path.join(homeDir, '.opencode', 'bin', 'opencode.exe'), path.join(homeDir, '.local', 'bin', 'opencode.exe'), @@ -1143,6 +1156,11 @@ export function getOpenCodeCliPaths(): string[] { path.join(homeDir, 'go', 'bin', 'opencode.exe'), path.join(process.env.GOPATH || path.join(homeDir, 'go'), 'bin', 'opencode.exe'), ]; + // nvm4w (NVM for Windows) symlink path + if (nvmSymlink) { + paths.push(path.join(nvmSymlink, 'opencode.cmd'), path.join(nvmSymlink, 'opencode')); + } + return paths; } // Include NVM bin paths for opencode installed via npm global under NVM diff --git a/libs/types/src/codex-models.ts b/libs/types/src/codex-models.ts index cf4db0ea..934218ea 100644 --- a/libs/types/src/codex-models.ts +++ b/libs/types/src/codex-models.ts @@ -6,6 +6,7 @@ * IMPORTANT: All Codex models use 'codex-' prefix to distinguish from Cursor CLI models */ export type CodexModelId = + | 'codex-gpt-5.3-codex' | 'codex-gpt-5.2-codex' | 'codex-gpt-5.1-codex-max' | 'codex-gpt-5.1-codex-mini' @@ -29,31 +30,38 @@ export interface CodexModelConfig { * All keys use 'codex-' prefix to distinguish from Cursor CLI models */ export const CODEX_MODEL_CONFIG_MAP: Record = { + 'codex-gpt-5.3-codex': { + id: 'codex-gpt-5.3-codex', + label: 'GPT-5.3-Codex', + description: 'Latest frontier agentic coding model', + hasThinking: true, + supportsVision: true, + }, 'codex-gpt-5.2-codex': { id: 'codex-gpt-5.2-codex', label: 'GPT-5.2-Codex', - description: 'Most advanced agentic coding model for complex software engineering', + description: 'Frontier agentic coding model', hasThinking: true, supportsVision: true, }, 'codex-gpt-5.1-codex-max': { id: 'codex-gpt-5.1-codex-max', label: 'GPT-5.1-Codex-Max', - description: 'Optimized for long-horizon, agentic coding tasks in Codex', + description: 'Codex-optimized flagship for deep and fast reasoning', hasThinking: true, supportsVision: true, }, 'codex-gpt-5.1-codex-mini': { id: 'codex-gpt-5.1-codex-mini', label: 'GPT-5.1-Codex-Mini', - description: 'Smaller, more cost-effective version for faster workflows', + description: 'Optimized for codex. Cheaper, faster, but less capable', hasThinking: false, supportsVision: true, }, 'codex-gpt-5.2': { id: 'codex-gpt-5.2', label: 'GPT-5.2 (Codex)', - description: 'Best general agentic model for tasks across industries and domains via Codex', + description: 'Latest frontier model with improvements across knowledge, reasoning and coding', hasThinking: true, supportsVision: true, }, diff --git a/libs/types/src/model-display.ts b/libs/types/src/model-display.ts index 28670328..08eaf208 100644 --- a/libs/types/src/model-display.ts +++ b/libs/types/src/model-display.ts @@ -72,10 +72,18 @@ export const CLAUDE_MODELS: ModelOption[] = [ * Official models from https://developers.openai.com/codex/models/ */ export const CODEX_MODELS: (ModelOption & { hasReasoning?: boolean })[] = [ + { + id: CODEX_MODEL_MAP.gpt53Codex, + label: 'GPT-5.3-Codex', + description: 'Latest frontier agentic coding model.', + badge: 'Premium', + provider: 'codex', + hasReasoning: true, + }, { id: CODEX_MODEL_MAP.gpt52Codex, label: 'GPT-5.2-Codex', - description: 'Most advanced agentic coding model for complex software engineering.', + description: 'Frontier agentic coding model.', badge: 'Premium', provider: 'codex', hasReasoning: true, @@ -83,7 +91,7 @@ export const CODEX_MODELS: (ModelOption & { hasReasoning?: boolean })[] = [ { id: CODEX_MODEL_MAP.gpt51CodexMax, label: 'GPT-5.1-Codex-Max', - description: 'Optimized for long-horizon, agentic coding tasks in Codex.', + description: 'Codex-optimized flagship for deep and fast reasoning.', badge: 'Premium', provider: 'codex', hasReasoning: true, @@ -91,7 +99,7 @@ export const CODEX_MODELS: (ModelOption & { hasReasoning?: boolean })[] = [ { id: CODEX_MODEL_MAP.gpt51CodexMini, label: 'GPT-5.1-Codex-Mini', - description: 'Smaller, more cost-effective version for faster workflows.', + description: 'Optimized for codex. Cheaper, faster, but less capable.', badge: 'Speed', provider: 'codex', hasReasoning: false, @@ -99,7 +107,7 @@ export const CODEX_MODELS: (ModelOption & { hasReasoning?: boolean })[] = [ { id: CODEX_MODEL_MAP.gpt52, label: 'GPT-5.2', - description: 'Best general agentic model for tasks across industries and domains.', + description: 'Latest frontier model with improvements across knowledge, reasoning and coding.', badge: 'Balanced', provider: 'codex', hasReasoning: true, @@ -211,6 +219,7 @@ export function getModelDisplayName(model: ModelAlias | string): string { haiku: 'Claude Haiku', sonnet: 'Claude Sonnet', opus: 'Claude Opus', + [CODEX_MODEL_MAP.gpt53Codex]: 'GPT-5.3-Codex', [CODEX_MODEL_MAP.gpt52Codex]: 'GPT-5.2-Codex', [CODEX_MODEL_MAP.gpt51CodexMax]: 'GPT-5.1-Codex-Max', [CODEX_MODEL_MAP.gpt51CodexMini]: 'GPT-5.1-Codex-Mini', diff --git a/libs/types/src/model.ts b/libs/types/src/model.ts index 5538989e..b6b90da9 100644 --- a/libs/types/src/model.ts +++ b/libs/types/src/model.ts @@ -50,15 +50,17 @@ export const LEGACY_CLAUDE_ALIAS_MAP: Record = { */ export const CODEX_MODEL_MAP = { // Recommended Codex-specific models - /** Most advanced agentic coding model for complex software engineering (default for ChatGPT users) */ + /** Latest frontier agentic coding model */ + gpt53Codex: 'codex-gpt-5.3-codex', + /** Frontier agentic coding model */ gpt52Codex: 'codex-gpt-5.2-codex', - /** Optimized for long-horizon, agentic coding tasks in Codex */ + /** Codex-optimized flagship for deep and fast reasoning */ gpt51CodexMax: 'codex-gpt-5.1-codex-max', - /** Smaller, more cost-effective version for faster workflows */ + /** Optimized for codex. Cheaper, faster, but less capable */ gpt51CodexMini: 'codex-gpt-5.1-codex-mini', // General-purpose GPT models (also available in Codex) - /** Best general agentic model for tasks across industries and domains */ + /** Latest frontier model with improvements across knowledge, reasoning and coding */ gpt52: 'codex-gpt-5.2', /** Great for coding and agentic tasks across domains */ gpt51: 'codex-gpt-5.1', @@ -71,6 +73,7 @@ export const CODEX_MODEL_IDS = Object.values(CODEX_MODEL_MAP); * These models can use reasoning.effort parameter */ export const REASONING_CAPABLE_MODELS = new Set([ + CODEX_MODEL_MAP.gpt53Codex, CODEX_MODEL_MAP.gpt52Codex, CODEX_MODEL_MAP.gpt51CodexMax, CODEX_MODEL_MAP.gpt52, diff --git a/package-lock.json b/package-lock.json index 9f4f4d28..0649982d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -45,7 +45,7 @@ "@automaker/utils": "1.0.0", "@github/copilot-sdk": "^0.1.16", "@modelcontextprotocol/sdk": "1.25.2", - "@openai/codex-sdk": "^0.77.0", + "@openai/codex-sdk": "^0.98.0", "cookie-parser": "1.4.7", "cors": "2.8.5", "dotenv": "17.2.3", @@ -3949,9 +3949,9 @@ } }, "node_modules/@openai/codex-sdk": { - "version": "0.77.0", - "resolved": "https://registry.npmjs.org/@openai/codex-sdk/-/codex-sdk-0.77.0.tgz", - "integrity": "sha512-bvJQ4dASnZ7jgfxmseViQwdRupHxs0TwHSZFeYB0gpdOAXnWwDWdGJRCMyphLSHwExRp27JNOk7EBFVmZRBanQ==", + "version": "0.98.0", + "resolved": "https://registry.npmjs.org/@openai/codex-sdk/-/codex-sdk-0.98.0.tgz", + "integrity": "sha512-TbPgrBpuSNMJyOXys0HNsh6UoP5VIHu1fVh2KDdACi5XyB0vuPtzBZC+qOsxHz7WXEQPFlomPLyxS6JnE5Okmg==", "license": "Apache-2.0", "engines": { "node": ">=18" From 835ffe31853dde25d3676ca6dd4a100268f39570 Mon Sep 17 00:00:00 2001 From: Kacper Date: Thu, 5 Feb 2026 22:43:22 +0100 Subject: [PATCH 07/89] feat: update Claude model to Opus 4.6 and enhance adaptive thinking support - Changed model identifier from `claude-opus-4-5-20251101` to `claude-opus-4-6` across various files, including documentation and code references. - Updated the SDK to support adaptive thinking for Opus 4.6, allowing the model to determine its own reasoning depth. - Enhanced the thinking level options to include 'adaptive' and adjusted related components to reflect this change. - Updated tests to ensure compatibility with the new model and its features. These changes improve the model's capabilities and user experience by leveraging adaptive reasoning. --- CLAUDE.md | 2 +- apps/server/package.json | 2 +- apps/server/src/lib/sdk-options.ts | 18 ++++++++++- apps/server/src/providers/claude-provider.ts | 17 ++++++---- apps/server/src/providers/provider-factory.ts | 2 +- .../tests/unit/lib/model-resolver.test.ts | 4 +-- .../server/tests/unit/lib/sdk-options.test.ts | 24 ++++++++++++++ .../unit/providers/claude-provider.test.ts | 32 +++++++++---------- .../unit/providers/provider-factory.test.ts | 6 ++-- apps/ui/docs/AGENT_ARCHITECTURE.md | 2 +- .../board-view/dialogs/add-feature-dialog.tsx | 16 +++++++++- .../board-view/shared/model-constants.ts | 10 +++++- .../shared/thinking-level-selector.tsx | 14 ++++++-- .../model-defaults/phase-model-selector.tsx | 18 ++++++++--- apps/ui/src/lib/agent-context-parser.ts | 3 +- docs/llm-shared-packages.md | 2 +- docs/server/providers.md | 6 ++-- docs/server/utilities.md | 12 +++---- libs/model-resolver/README.md | 12 +++---- libs/model-resolver/tests/resolver.test.ts | 4 +-- libs/types/src/index.ts | 2 ++ libs/types/src/model-display.ts | 2 ++ libs/types/src/model.ts | 6 ++-- libs/types/src/settings.ts | 23 ++++++++++++- package-lock.json | 10 +++--- 25 files changed, 178 insertions(+), 71 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 128cd8d7..84dd1fbb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -161,7 +161,7 @@ Use `resolveModelString()` from `@automaker/model-resolver` to convert model ali - `haiku` → `claude-haiku-4-5` - `sonnet` → `claude-sonnet-4-20250514` -- `opus` → `claude-opus-4-5-20251101` +- `opus` → `claude-opus-4-6` ## Environment Variables diff --git a/apps/server/package.json b/apps/server/package.json index 0b4deeac..ed005c54 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -24,7 +24,7 @@ "test:unit": "vitest run tests/unit" }, "dependencies": { - "@anthropic-ai/claude-agent-sdk": "0.1.76", + "@anthropic-ai/claude-agent-sdk": "0.2.32", "@automaker/dependency-resolver": "1.0.0", "@automaker/git-utils": "1.0.0", "@automaker/model-resolver": "1.0.0", diff --git a/apps/server/src/lib/sdk-options.ts b/apps/server/src/lib/sdk-options.ts index cc1df2f5..674350a5 100644 --- a/apps/server/src/lib/sdk-options.ts +++ b/apps/server/src/lib/sdk-options.ts @@ -253,11 +253,27 @@ function buildMcpOptions(config: CreateSdkOptionsConfig): McpOptions { /** * Build thinking options for SDK configuration. * Converts ThinkingLevel to maxThinkingTokens for the Claude SDK. + * For adaptive thinking (Opus 4.6), omits maxThinkingTokens to let the model + * decide its own reasoning depth. * * @param thinkingLevel - The thinking level to convert - * @returns Object with maxThinkingTokens if thinking is enabled + * @returns Object with maxThinkingTokens if thinking is enabled with a budget */ function buildThinkingOptions(thinkingLevel?: ThinkingLevel): Partial { + if (!thinkingLevel || thinkingLevel === 'none') { + return {}; + } + + // Adaptive thinking (Opus 4.6): don't set maxThinkingTokens + // The model will use adaptive thinking by default + if (thinkingLevel === 'adaptive') { + logger.debug( + `buildThinkingOptions: thinkingLevel="adaptive" -> no maxThinkingTokens (model decides)` + ); + return {}; + } + + // Manual budget-based thinking for Haiku/Sonnet const maxThinkingTokens = getThinkingTokenBudget(thinkingLevel); logger.debug( `buildThinkingOptions: thinkingLevel="${thinkingLevel}" -> maxThinkingTokens=${maxThinkingTokens}` diff --git a/apps/server/src/providers/claude-provider.ts b/apps/server/src/providers/claude-provider.ts index cfb59093..78a0a0c7 100644 --- a/apps/server/src/providers/claude-provider.ts +++ b/apps/server/src/providers/claude-provider.ts @@ -219,8 +219,11 @@ export class ClaudeProvider extends BaseProvider { // claudeCompatibleProvider takes precedence over claudeApiProfile const providerConfig = claudeCompatibleProvider || claudeApiProfile; - // Convert thinking level to token budget - const maxThinkingTokens = getThinkingTokenBudget(thinkingLevel); + // Build thinking configuration + // Adaptive thinking (Opus 4.6): don't set maxThinkingTokens, model uses adaptive by default + // Manual thinking (Haiku/Sonnet): use budget_tokens + const maxThinkingTokens = + thinkingLevel === 'adaptive' ? undefined : getThinkingTokenBudget(thinkingLevel); // Build Claude SDK options const sdkOptions: Options = { @@ -349,13 +352,13 @@ export class ClaudeProvider extends BaseProvider { getAvailableModels(): ModelDefinition[] { const models = [ { - id: 'claude-opus-4-5-20251101', - name: 'Claude Opus 4.5', - modelString: 'claude-opus-4-5-20251101', + id: 'claude-opus-4-6', + name: 'Claude Opus 4.6', + modelString: 'claude-opus-4-6', provider: 'anthropic', - description: 'Most capable Claude model', + description: 'Most capable Claude model with adaptive thinking', contextWindow: 200000, - maxOutputTokens: 16000, + maxOutputTokens: 128000, supportsVision: true, supportsTools: true, tier: 'premium' as const, diff --git a/apps/server/src/providers/provider-factory.ts b/apps/server/src/providers/provider-factory.ts index 1e91760f..a6dff69e 100644 --- a/apps/server/src/providers/provider-factory.ts +++ b/apps/server/src/providers/provider-factory.ts @@ -103,7 +103,7 @@ export class ProviderFactory { /** * Get the appropriate provider for a given model ID * - * @param modelId Model identifier (e.g., "claude-opus-4-5-20251101", "cursor-gpt-4o", "cursor-auto") + * @param modelId Model identifier (e.g., "claude-opus-4-6", "cursor-gpt-4o", "cursor-auto") * @param options Optional settings * @param options.throwOnDisconnected Throw error if provider is disconnected (default: true) * @returns Provider instance for the model diff --git a/apps/server/tests/unit/lib/model-resolver.test.ts b/apps/server/tests/unit/lib/model-resolver.test.ts index c1bff78d..65e3115d 100644 --- a/apps/server/tests/unit/lib/model-resolver.test.ts +++ b/apps/server/tests/unit/lib/model-resolver.test.ts @@ -35,7 +35,7 @@ describe('model-resolver.ts', () => { it("should resolve 'opus' alias to full model string", () => { const result = resolveModelString('opus'); - expect(result).toBe('claude-opus-4-5-20251101'); + expect(result).toBe('claude-opus-4-6'); expect(consoleSpy.log).toHaveBeenCalledWith( expect.stringContaining('Migrated legacy ID: "opus" -> "claude-opus"') ); @@ -117,7 +117,7 @@ describe('model-resolver.ts', () => { describe('getEffectiveModel', () => { it('should prioritize explicit model over session and default', () => { const result = getEffectiveModel('opus', 'haiku', 'gpt-5.2'); - expect(result).toBe('claude-opus-4-5-20251101'); + expect(result).toBe('claude-opus-4-6'); }); it('should use session model when explicit is not provided', () => { diff --git a/apps/server/tests/unit/lib/sdk-options.test.ts b/apps/server/tests/unit/lib/sdk-options.test.ts index 029cd8fa..69d69794 100644 --- a/apps/server/tests/unit/lib/sdk-options.test.ts +++ b/apps/server/tests/unit/lib/sdk-options.test.ts @@ -491,5 +491,29 @@ describe('sdk-options.ts', () => { expect(options.maxThinkingTokens).toBeUndefined(); }); }); + + describe('adaptive thinking for Opus 4.6', () => { + it('should not set maxThinkingTokens for adaptive thinking (model decides)', async () => { + const { createAutoModeOptions } = await import('@/lib/sdk-options.js'); + + const options = createAutoModeOptions({ + cwd: '/test/path', + thinkingLevel: 'adaptive', + }); + + expect(options.maxThinkingTokens).toBeUndefined(); + }); + + it('should not include maxThinkingTokens when thinkingLevel is "none"', async () => { + const { createAutoModeOptions } = await import('@/lib/sdk-options.js'); + + const options = createAutoModeOptions({ + cwd: '/test/path', + thinkingLevel: 'none', + }); + + expect(options.maxThinkingTokens).toBeUndefined(); + }); + }); }); }); diff --git a/apps/server/tests/unit/providers/claude-provider.test.ts b/apps/server/tests/unit/providers/claude-provider.test.ts index c3f83f8f..7df211ef 100644 --- a/apps/server/tests/unit/providers/claude-provider.test.ts +++ b/apps/server/tests/unit/providers/claude-provider.test.ts @@ -39,7 +39,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Hello', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', }); @@ -59,7 +59,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Test prompt', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test/dir', systemPrompt: 'You are helpful', maxTurns: 10, @@ -71,7 +71,7 @@ describe('claude-provider.ts', () => { expect(sdk.query).toHaveBeenCalledWith({ prompt: 'Test prompt', options: expect.objectContaining({ - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', systemPrompt: 'You are helpful', maxTurns: 10, cwd: '/test/dir', @@ -91,7 +91,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Test', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', }); @@ -116,7 +116,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Test', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', abortController, }); @@ -145,7 +145,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Current message', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', conversationHistory, sdkSessionId: 'test-session-id', @@ -176,7 +176,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: arrayPrompt as any, - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', }); @@ -196,7 +196,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Test', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', }); @@ -222,7 +222,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Test', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', }); @@ -286,7 +286,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Test', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', }); @@ -313,7 +313,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Test', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', }); @@ -341,7 +341,7 @@ describe('claude-provider.ts', () => { const generator = provider.executeQuery({ prompt: 'Test', - model: 'claude-opus-4-5-20251101', + model: 'claude-opus-4-6', cwd: '/test', }); @@ -366,12 +366,12 @@ describe('claude-provider.ts', () => { expect(models).toHaveLength(4); }); - it('should include Claude Opus 4.5', () => { + it('should include Claude Opus 4.6', () => { const models = provider.getAvailableModels(); - const opus = models.find((m) => m.id === 'claude-opus-4-5-20251101'); + const opus = models.find((m) => m.id === 'claude-opus-4-6'); expect(opus).toBeDefined(); - expect(opus?.name).toBe('Claude Opus 4.5'); + expect(opus?.name).toBe('Claude Opus 4.6'); expect(opus?.provider).toBe('anthropic'); }); @@ -400,7 +400,7 @@ describe('claude-provider.ts', () => { it('should mark Opus as default', () => { const models = provider.getAvailableModels(); - const opus = models.find((m) => m.id === 'claude-opus-4-5-20251101'); + const opus = models.find((m) => m.id === 'claude-opus-4-6'); expect(opus?.default).toBe(true); }); diff --git a/apps/server/tests/unit/providers/provider-factory.test.ts b/apps/server/tests/unit/providers/provider-factory.test.ts index fbf01e90..b9aef928 100644 --- a/apps/server/tests/unit/providers/provider-factory.test.ts +++ b/apps/server/tests/unit/providers/provider-factory.test.ts @@ -54,8 +54,8 @@ describe('provider-factory.ts', () => { describe('getProviderForModel', () => { describe('Claude models (claude-* prefix)', () => { - it('should return ClaudeProvider for claude-opus-4-5-20251101', () => { - const provider = ProviderFactory.getProviderForModel('claude-opus-4-5-20251101'); + it('should return ClaudeProvider for claude-opus-4-6', () => { + const provider = ProviderFactory.getProviderForModel('claude-opus-4-6'); expect(provider).toBeInstanceOf(ClaudeProvider); }); @@ -70,7 +70,7 @@ describe('provider-factory.ts', () => { }); it('should be case-insensitive for claude models', () => { - const provider = ProviderFactory.getProviderForModel('CLAUDE-OPUS-4-5-20251101'); + const provider = ProviderFactory.getProviderForModel('CLAUDE-OPUS-4-6'); expect(provider).toBeInstanceOf(ClaudeProvider); }); }); diff --git a/apps/ui/docs/AGENT_ARCHITECTURE.md b/apps/ui/docs/AGENT_ARCHITECTURE.md index 4c9f0d11..f5c374c4 100644 --- a/apps/ui/docs/AGENT_ARCHITECTURE.md +++ b/apps/ui/docs/AGENT_ARCHITECTURE.md @@ -199,7 +199,7 @@ The agent is configured with: ```javascript { - model: "claude-opus-4-5-20251101", + model: "claude-opus-4-6", maxTurns: 20, cwd: workingDirectory, allowedTools: [ diff --git a/apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx b/apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx index b8dd8776..2dbf0808 100644 --- a/apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx +++ b/apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx @@ -264,7 +264,21 @@ export function AddFeatureDialog({ }, [planningMode]); const handleModelChange = (entry: PhaseModelEntry) => { - setModelEntry(entry); + // Normalize thinking level when switching between adaptive and non-adaptive models + const isNewModelAdaptive = + entry.model === 'claude-opus' || + (typeof entry.model === 'string' && entry.model.includes('opus-4-6')); + const currentLevel = entry.thinkingLevel || 'none'; + + if (isNewModelAdaptive && currentLevel !== 'none' && currentLevel !== 'adaptive') { + // Switching TO Opus 4.6 with a manual level -> auto-switch to 'adaptive' + setModelEntry({ ...entry, thinkingLevel: 'adaptive' }); + } else if (!isNewModelAdaptive && currentLevel === 'adaptive') { + // Switching FROM Opus 4.6 with adaptive -> auto-switch to 'high' + setModelEntry({ ...entry, thinkingLevel: 'high' }); + } else { + setModelEntry(entry); + } }; const buildFeatureData = (): FeatureData | null => { diff --git a/apps/ui/src/components/views/board-view/shared/model-constants.ts b/apps/ui/src/components/views/board-view/shared/model-constants.ts index c56ad46a..2816e556 100644 --- a/apps/ui/src/components/views/board-view/shared/model-constants.ts +++ b/apps/ui/src/components/views/board-view/shared/model-constants.ts @@ -167,7 +167,14 @@ export const ALL_MODELS: ModelOption[] = [ ...COPILOT_MODELS, ]; -export const THINKING_LEVELS: ThinkingLevel[] = ['none', 'low', 'medium', 'high', 'ultrathink']; +export const THINKING_LEVELS: ThinkingLevel[] = [ + 'none', + 'low', + 'medium', + 'high', + 'ultrathink', + 'adaptive', +]; export const THINKING_LEVEL_LABELS: Record = { none: 'None', @@ -175,6 +182,7 @@ export const THINKING_LEVEL_LABELS: Record = { medium: 'Med', high: 'High', ultrathink: 'Ultra', + adaptive: 'Adaptive', }; /** diff --git a/apps/ui/src/components/views/board-view/shared/thinking-level-selector.tsx b/apps/ui/src/components/views/board-view/shared/thinking-level-selector.tsx index 74b791a3..3e111a31 100644 --- a/apps/ui/src/components/views/board-view/shared/thinking-level-selector.tsx +++ b/apps/ui/src/components/views/board-view/shared/thinking-level-selector.tsx @@ -2,19 +2,25 @@ import { Label } from '@/components/ui/label'; import { Brain } from 'lucide-react'; import { cn } from '@/lib/utils'; import { ThinkingLevel } from '@/store/app-store'; -import { THINKING_LEVELS, THINKING_LEVEL_LABELS } from './model-constants'; +import { THINKING_LEVEL_LABELS } from './model-constants'; +import { getThinkingLevelsForModel } from '@automaker/types'; interface ThinkingLevelSelectorProps { selectedLevel: ThinkingLevel; onLevelSelect: (level: ThinkingLevel) => void; testIdPrefix?: string; + /** Optional model ID to filter available thinking levels (e.g., Opus 4.6 only shows None/Adaptive) */ + model?: string; } export function ThinkingLevelSelector({ selectedLevel, onLevelSelect, testIdPrefix = 'thinking-level', + model, }: ThinkingLevelSelectorProps) { + const levels = model ? getThinkingLevelsForModel(model) : getThinkingLevelsForModel(''); + return (
- {THINKING_LEVELS.map((level) => ( + {levels.map((level) => (
); diff --git a/apps/ui/src/components/views/settings-view/model-defaults/phase-model-selector.tsx b/apps/ui/src/components/views/settings-view/model-defaults/phase-model-selector.tsx index 20420388..25424fa6 100644 --- a/apps/ui/src/components/views/settings-view/model-defaults/phase-model-selector.tsx +++ b/apps/ui/src/components/views/settings-view/model-defaults/phase-model-selector.tsx @@ -21,6 +21,7 @@ import { isGroupSelected, getSelectedVariant, codexModelHasThinking, + getThinkingLevelsForModel, } from '@automaker/types'; import { CLAUDE_MODELS, @@ -28,7 +29,6 @@ import { OPENCODE_MODELS, GEMINI_MODELS, COPILOT_MODELS, - THINKING_LEVELS, THINKING_LEVEL_LABELS, REASONING_EFFORT_LEVELS, REASONING_EFFORT_LABELS, @@ -1296,7 +1296,9 @@ export function PhaseModelSelector({
Thinking Level
- {THINKING_LEVELS.map((level) => ( + {getThinkingLevelsForModel( + model.mapsToClaudeModel === 'opus' ? 'claude-opus' : '' + ).map((level) => (
{isSelected && currentThinking === level && ( @@ -1402,7 +1405,9 @@ export function PhaseModelSelector({
Thinking Level
- {THINKING_LEVELS.map((level) => ( + {getThinkingLevelsForModel( + model.mapsToClaudeModel === 'opus' ? 'claude-opus' : '' + ).map((level) => ( - {feature.planSpec?.content && onViewPlan && ( + {!isCurrentAutoTask && + (feature.status === 'backlog' || + feature.status === 'interrupted' || + feature.status === 'ready') && ( + <> - )} - {onImplement && ( - - )} - - )} + {feature.planSpec?.content && onViewPlan && ( + + )} + {onImplement && ( + + )} + + )} ); }); diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/card-header.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/card-header.tsx index 793c3191..bdf028b9 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/card-header.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/card-header.tsx @@ -126,35 +126,39 @@ export const CardHeaderSection = memo(function CardHeaderSection({ )} - {/* Backlog header */} - {!isCurrentAutoTask && !isSelectionMode && feature.status === 'backlog' && ( -
- - -
- )} + {/* Backlog header (also handles 'interrupted' and 'ready' statuses that display in backlog column) */} + {!isCurrentAutoTask && + !isSelectionMode && + (feature.status === 'backlog' || + feature.status === 'interrupted' || + feature.status === 'ready') && ( +
+ + +
+ )} {/* Waiting approval / Verified header */} {!isCurrentAutoTask && diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx index a332f305..f6725a7d 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx @@ -121,6 +121,8 @@ export const KanbanCard = memo(function KanbanCard({ const isDraggable = !isSelectionMode && (feature.status === 'backlog' || + feature.status === 'interrupted' || + feature.status === 'ready' || feature.status === 'waiting_approval' || feature.status === 'verified' || feature.status.startsWith('pipeline_') || From 41014f6ab6ccc96ba13b8e15edbcfea5403bc75f Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Sun, 15 Feb 2026 21:04:18 -0800 Subject: [PATCH 75/89] fix: resolve TypeScript errors after upstream merge Add missing 'adaptive' thinking level to kanban card labels and export TerminalPromptTheme type from @automaker/types package. Co-Authored-By: Claude Opus 4.6 --- .../board-view/components/kanban-card/agent-info-panel.tsx | 1 + libs/types/src/index.ts | 2 ++ 2 files changed, 3 insertions(+) diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx index 03b2b0f5..a3540cd7 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx @@ -22,6 +22,7 @@ function formatThinkingLevel(level: ThinkingLevel | undefined): string { medium: 'Med', high: 'High', ultrathink: 'Ultra', + adaptive: 'Adaptive', }; return labels[level]; } diff --git a/libs/types/src/index.ts b/libs/types/src/index.ts index e9193327..e0d21470 100644 --- a/libs/types/src/index.ts +++ b/libs/types/src/index.ts @@ -183,6 +183,8 @@ export type { // Claude API profile types (deprecated) ClaudeApiProfile, ClaudeApiProfileTemplate, + // Terminal prompt theme type + TerminalPromptTheme, } from './settings.js'; export { DEFAULT_KEYBOARD_SHORTCUTS, From eed5e20438b66e7a4d81dfeac6c1dc035a08115e Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 10:47:52 -0800 Subject: [PATCH 76/89] fix(agent-service): fallback to effectiveModel when requestedModel is undefined --- apps/server/src/services/agent-service.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apps/server/src/services/agent-service.ts b/apps/server/src/services/agent-service.ts index e5458a98..0ecec44e 100644 --- a/apps/server/src/services/agent-service.ts +++ b/apps/server/src/services/agent-service.ts @@ -389,14 +389,16 @@ export class AgentService { // Get provider for this model (with prefix) // When using custom provider (GLM, MiniMax), requestedModel routes to Claude provider - const modelForProvider = claudeCompatibleProvider ? requestedModel : effectiveModel; + const modelForProvider = claudeCompatibleProvider + ? (requestedModel ?? effectiveModel) + : effectiveModel; const provider = ProviderFactory.getProviderForModel(modelForProvider); // Strip provider prefix - providers should receive bare model IDs // CRITICAL: For custom providers (GLM, MiniMax), pass the provider's model ID (e.g. "GLM-4.7") // to the API, NOT the resolved Claude model - otherwise we get "model not found" - const bareModel = claudeCompatibleProvider - ? requestedModel + const bareModel: string = claudeCompatibleProvider + ? (requestedModel ?? effectiveModel) : stripProviderPrefix(effectiveModel); // Build options for provider From 462dbf1522d91bc2e7610d7b6e19b172bc50f87e Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 11:53:09 -0800 Subject: [PATCH 77/89] fix: Address code review comments --- .../routes/auto-mode/routes/approve-plan.ts | 12 +++- .../server/src/routes/features/routes/list.ts | 23 ++++--- .../routes/github/routes/validate-issue.ts | 5 +- apps/server/src/services/agent-executor.ts | 2 + .../src/services/auto-loop-coordinator.ts | 20 ++++-- apps/server/src/services/auto-mode/facade.ts | 67 +++++++++++-------- apps/server/src/services/execution-service.ts | 9 ++- .../src/services/feature-state-manager.ts | 7 ++ apps/server/src/services/merge-service.ts | 44 +++++++----- .../src/services/pipeline-orchestrator.ts | 3 +- .../src/services/plan-approval-service.ts | 13 +++- .../src/components/views/graph-view-page.tsx | 16 +++-- 12 files changed, 147 insertions(+), 74 deletions(-) diff --git a/apps/server/src/routes/auto-mode/routes/approve-plan.ts b/apps/server/src/routes/auto-mode/routes/approve-plan.ts index 277b50e2..14673e31 100644 --- a/apps/server/src/routes/auto-mode/routes/approve-plan.ts +++ b/apps/server/src/routes/auto-mode/routes/approve-plan.ts @@ -17,7 +17,7 @@ export function createApprovePlanHandler(autoModeService: AutoModeServiceCompat) approved: boolean; editedPlan?: string; feedback?: string; - projectPath?: string; + projectPath: string; }; if (!featureId) { @@ -36,6 +36,14 @@ export function createApprovePlanHandler(autoModeService: AutoModeServiceCompat) return; } + if (!projectPath) { + res.status(400).json({ + success: false, + error: 'projectPath is required', + }); + return; + } + // Note: We no longer check hasPendingApproval here because resolvePlanApproval // can handle recovery when pending approval is not in Map but feature has planSpec.status='generated' // This supports cases where the server restarted while waiting for approval @@ -48,7 +56,7 @@ export function createApprovePlanHandler(autoModeService: AutoModeServiceCompat) // Resolve the pending approval (with recovery support) const result = await autoModeService.resolvePlanApproval( - projectPath || '', + projectPath, featureId, approved, editedPlan, diff --git a/apps/server/src/routes/features/routes/list.ts b/apps/server/src/routes/features/routes/list.ts index 766e625c..c0f22d33 100644 --- a/apps/server/src/routes/features/routes/list.ts +++ b/apps/server/src/routes/features/routes/list.ts @@ -33,18 +33,23 @@ export function createListHandler( // We don't await this to keep the list response fast // Note: detectOrphanedFeatures handles errors internally and always resolves if (autoModeService) { - autoModeService.detectOrphanedFeatures(projectPath).then((orphanedFeatures) => { - if (orphanedFeatures.length > 0) { - logger.info( - `[ProjectLoad] Detected ${orphanedFeatures.length} orphaned feature(s) in ${projectPath}` - ); - for (const { feature, missingBranch } of orphanedFeatures) { + autoModeService + .detectOrphanedFeatures(projectPath) + .then((orphanedFeatures) => { + if (orphanedFeatures.length > 0) { logger.info( - `[ProjectLoad] Orphaned: ${feature.title || feature.id} - branch "${missingBranch}" no longer exists` + `[ProjectLoad] Detected ${orphanedFeatures.length} orphaned feature(s) in ${projectPath}` ); + for (const { feature, missingBranch } of orphanedFeatures) { + logger.info( + `[ProjectLoad] Orphaned: ${feature.title || feature.id} - branch "${missingBranch}" no longer exists` + ); + } } - } - }); + }) + .catch((error) => { + logger.warn(`[ProjectLoad] Orphan detection failed for ${projectPath}:`, error); + }); } res.json({ success: true, features }); diff --git a/apps/server/src/routes/github/routes/validate-issue.ts b/apps/server/src/routes/github/routes/validate-issue.ts index 9f3af5cf..38220f6d 100644 --- a/apps/server/src/routes/github/routes/validate-issue.ts +++ b/apps/server/src/routes/github/routes/validate-issue.ts @@ -25,7 +25,7 @@ import { isOpencodeModel, supportsStructuredOutput, } from '@automaker/types'; -import { resolvePhaseModel } from '@automaker/model-resolver'; +import { resolvePhaseModel, resolveModelString } from '@automaker/model-resolver'; import { extractJson } from '../../../lib/json-extractor.js'; import { writeValidation } from '../../../lib/validation-storage.js'; import { streamingQuery } from '../../../providers/simple-query-service.js'; @@ -190,9 +190,10 @@ ${basePrompt}`; // CRITICAL: For custom providers (GLM, MiniMax), pass the provider's model ID (e.g. "GLM-4.7") // to the API, NOT the resolved Claude model - otherwise we get "model not found" + // For standard Claude models, resolve aliases (e.g., 'opus' -> 'claude-opus-4-20250514') const effectiveModel = claudeCompatibleProvider ? (model as string) - : providerResolvedModel || (model as string); + : providerResolvedModel || resolveModelString(model as string); logger.info(`Using model: ${effectiveModel}`); // Use streamingQuery with event callbacks diff --git a/apps/server/src/services/agent-executor.ts b/apps/server/src/services/agent-executor.ts index 1ccb7497..dd1c179c 100644 --- a/apps/server/src/services/agent-executor.ts +++ b/apps/server/src/services/agent-executor.ts @@ -560,6 +560,7 @@ export class AgentExecutor { revText += b.text || ''; this.eventBus.emitAutoModeEvent('auto_mode_progress', { featureId, + branchName, content: b.text, }); } @@ -638,6 +639,7 @@ export class AgentExecutor { cwd: o.workDir, allowedTools: o.sdkOptions?.allowedTools as string[] | undefined, abortController: o.abortController, + thinkingLevel: o.thinkingLevel, mcpServers: o.mcpServers && Object.keys(o.mcpServers).length > 0 ? (o.mcpServers as Record) diff --git a/apps/server/src/services/auto-loop-coordinator.ts b/apps/server/src/services/auto-loop-coordinator.ts index 0b03e5f8..3e63cff1 100644 --- a/apps/server/src/services/auto-loop-coordinator.ts +++ b/apps/server/src/services/auto-loop-coordinator.ts @@ -31,8 +31,16 @@ export interface ProjectAutoLoopState { branchName: string | null; } +/** + * Generate a unique key for a worktree auto-loop instance. + * + * When branchName is null, this represents the main worktree (uses '__main__' sentinel). + * Named branches always use their exact name — the caller is responsible for passing + * null for the primary branch (main/master/etc.) so key matching stays consistent + * with ConcurrencyManager's dynamic primary branch resolution. + */ export function getWorktreeAutoLoopKey(projectPath: string, branchName: string | null): string { - return `${projectPath}::${(branchName === 'main' ? null : branchName) ?? '__main__'}`; + return `${projectPath}::${branchName ?? '__main__'}`; } export type ExecuteFeatureFn = ( @@ -404,11 +412,15 @@ export class AutoLoopCoordinator { reject(new Error('Aborted')); return; } - const timeout = setTimeout(resolve, ms); - signal?.addEventListener('abort', () => { + const onAbort = () => { clearTimeout(timeout); reject(new Error('Aborted')); - }); + }; + const timeout = setTimeout(() => { + signal?.removeEventListener('abort', onAbort); + resolve(); + }, ms); + signal?.addEventListener('abort', onAbort); }); } } diff --git a/apps/server/src/services/auto-mode/facade.ts b/apps/server/src/services/auto-mode/facade.ts index 83acf678..af909d30 100644 --- a/apps/server/src/services/auto-mode/facade.ts +++ b/apps/server/src/services/auto-mode/facade.ts @@ -17,7 +17,7 @@ import { promisify } from 'util'; import type { Feature, PlanningMode, ThinkingLevel } from '@automaker/types'; import { DEFAULT_MAX_CONCURRENCY, stripProviderPrefix } from '@automaker/types'; import { createLogger, loadContextFiles, classifyError } from '@automaker/utils'; -import { getFeatureDir } from '@automaker/platform'; +import { getFeatureDir, spawnProcess } from '@automaker/platform'; import * as secureFs from '../../lib/secure-fs.js'; import { validateWorkingDirectory } from '../../lib/sdk-options.js'; import { getPromptCustomization, getProviderByModelId } from '../../lib/settings-helpers.js'; @@ -48,6 +48,24 @@ import type { const execAsync = promisify(exec); const logger = createLogger('AutoModeServiceFacade'); +/** + * Execute git command with array arguments to prevent command injection. + */ +async function execGitCommand(args: string[], cwd: string): Promise { + const result = await spawnProcess({ + command: 'git', + args, + cwd, + }); + + if (result.exitCode === 0) { + return result.stdout; + } else { + const errorMessage = result.stderr || `Git command failed with code ${result.exitCode}`; + throw new Error(errorMessage); + } +} + /** * AutoModeServiceFacade provides a clean interface for auto-mode functionality. * @@ -589,19 +607,8 @@ ${prompt} Address the follow-up instructions above. Review the previous work and make the requested changes or fixes.`; try { - this.eventBus.emitAutoModeEvent('auto_mode_feature_start', { - featureId, - projectPath: this.projectPath, - branchName: feature?.branchName ?? null, - feature: { - id: featureId, - title: feature?.title || 'Follow-up', - description: feature?.description || 'Following up on feature', - }, - }); - // NOTE: Facade does not have runAgent - this method requires AutoModeService - // For now, throw to indicate routes should use AutoModeService.followUpFeature + // Do NOT emit start events before throwing to prevent false start events throw new Error( 'followUpFeature not fully implemented in facade - use AutoModeService.followUpFeature instead' ); @@ -691,18 +698,22 @@ Address the follow-up instructions above. Review the previous work and make the // Use project path } } else { - const sanitizedFeatureId = featureId.replace(/[^a-zA-Z0-9_-]/g, '-'); - const legacyWorktreePath = path.join(this.projectPath, '.worktrees', sanitizedFeatureId); - try { - await secureFs.access(legacyWorktreePath); - workDir = legacyWorktreePath; - } catch { - // Use project path + // Use worktreeResolver instead of manual .worktrees lookup + const feature = await this.featureStateManager.loadFeature(this.projectPath, featureId); + const branchName = feature?.branchName; + if (branchName) { + const resolved = await this.worktreeResolver.findWorktreeForBranch( + this.projectPath, + branchName + ); + if (resolved) { + workDir = resolved; + } } } try { - const { stdout: status } = await execAsync('git status --porcelain', { cwd: workDir }); + const status = await execGitCommand(['status', '--porcelain'], workDir); if (!status.trim()) { return null; } @@ -712,9 +723,9 @@ Address the follow-up instructions above. Review the previous work and make the feature?.description?.split('\n')[0]?.substring(0, 60) || `Feature ${featureId}`; const commitMessage = `feat: ${title}\n\nImplemented by Automaker auto-mode`; - await execAsync('git add -A', { cwd: workDir }); - await execAsync(`git commit -m "${commitMessage.replace(/"/g, '\\"')}"`, { cwd: workDir }); - const { stdout: hash } = await execAsync('git rev-parse HEAD', { cwd: workDir }); + await execGitCommand(['add', '-A'], workDir); + await execGitCommand(['commit', '-m', commitMessage], workDir); + const hash = await execGitCommand(['rev-parse', 'HEAD'], workDir); this.eventBus.emitAutoModeEvent('auto_mode_feature_complete', { featureId, @@ -975,10 +986,10 @@ Address the follow-up instructions above. Review the previous work and make the return orphanedFeatures; } - // Get existing branches - const { stdout } = await execAsync( - 'git for-each-ref --format="%(refname:short)" refs/heads/', - { cwd: this.projectPath } + // Get existing branches (using safe array-based command) + const stdout = await execGitCommand( + ['for-each-ref', '--format=%(refname:short)', 'refs/heads/'], + this.projectPath ); const existingBranches = new Set( stdout diff --git a/apps/server/src/services/execution-service.ts b/apps/server/src/services/execution-service.ts index 54e8edd6..2af35fe7 100644 --- a/apps/server/src/services/execution-service.ts +++ b/apps/server/src/services/execution-service.ts @@ -190,9 +190,9 @@ ${feature.spec} } } - let worktreePath: string | null = null; + let worktreePath: string | null = providedWorktreePath ?? null; const branchName = feature.branchName; - if (useWorktrees && branchName) { + if (!worktreePath && useWorktrees && branchName) { worktreePath = await this.worktreeResolver.findWorktreeForBranch(projectPath, branchName); if (worktreePath) logger.info(`Using worktree for branch "${branchName}": ${worktreePath}`); } @@ -289,6 +289,11 @@ ${feature.spec} testAttempts: 0, maxTestAttempts: 5, }); + // Check if pipeline set a terminal status (e.g., merge_conflict) — don't overwrite it + const refreshed = await this.loadFeatureFn(projectPath, featureId); + if (refreshed?.status === 'merge_conflict') { + return; + } } const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified'; diff --git a/apps/server/src/services/feature-state-manager.ts b/apps/server/src/services/feature-state-manager.ts index e7f37962..c74ad88b 100644 --- a/apps/server/src/services/feature-state-manager.ts +++ b/apps/server/src/services/feature-state-manager.ts @@ -115,6 +115,13 @@ export class FeatureStateManager { // PERSIST BEFORE EMIT (Pitfall 2) await atomicWriteJson(featurePath, feature, { backupCount: DEFAULT_BACKUP_COUNT }); + // Emit status change event so UI can react without polling + this.emitAutoModeEvent('feature_status_changed', { + featureId, + projectPath, + status, + }); + // Create notifications for important status changes const notificationService = getNotificationService(); if (status === 'waiting_approval') { diff --git a/apps/server/src/services/merge-service.ts b/apps/server/src/services/merge-service.ts index 837fbfbb..087aa801 100644 --- a/apps/server/src/services/merge-service.ts +++ b/apps/server/src/services/merge-service.ts @@ -4,12 +4,8 @@ * Extracted from worktree merge route to allow internal service calls. */ -import { exec } from 'child_process'; -import { promisify } from 'util'; import { createLogger } from '@automaker/utils'; import { spawnProcess } from '@automaker/platform'; - -const execAsync = promisify(exec); const logger = createLogger('MergeService'); export interface MergeOptions { @@ -80,9 +76,23 @@ export async function performMerge( const mergeTo = targetBranch || 'main'; - // Validate source branch exists + // Validate branch names early to reject invalid input before any git operations + if (!isValidBranchName(branchName)) { + return { + success: false, + error: `Invalid source branch name: "${branchName}"`, + }; + } + if (!isValidBranchName(mergeTo)) { + return { + success: false, + error: `Invalid target branch name: "${mergeTo}"`, + }; + } + + // Validate source branch exists (using safe array-based command) try { - await execAsync(`git rev-parse --verify ${branchName}`, { cwd: projectPath }); + await execGitCommand(['rev-parse', '--verify', branchName], projectPath); } catch { return { success: false, @@ -90,9 +100,9 @@ export async function performMerge( }; } - // Validate target branch exists + // Validate target branch exists (using safe array-based command) try { - await execAsync(`git rev-parse --verify ${mergeTo}`, { cwd: projectPath }); + await execGitCommand(['rev-parse', '--verify', mergeTo], projectPath); } catch { return { success: false, @@ -100,13 +110,14 @@ export async function performMerge( }; } - // Merge the feature branch into the target branch - const mergeCmd = options?.squash - ? `git merge --squash ${branchName}` - : `git merge ${branchName} -m "${options?.message || `Merge ${branchName} into ${mergeTo}`}"`; + // Merge the feature branch into the target branch (using safe array-based commands) + const mergeMessage = options?.message || `Merge ${branchName} into ${mergeTo}`; + const mergeArgs = options?.squash + ? ['merge', '--squash', branchName] + : ['merge', branchName, '-m', mergeMessage]; try { - await execAsync(mergeCmd, { cwd: projectPath }); + await execGitCommand(mergeArgs, projectPath); } catch (mergeError: unknown) { // Check if this is a merge conflict const err = mergeError as { stdout?: string; stderr?: string; message?: string }; @@ -125,11 +136,10 @@ export async function performMerge( throw mergeError; } - // If squash merge, need to commit + // If squash merge, need to commit (using safe array-based command) if (options?.squash) { - await execAsync(`git commit -m "${options?.message || `Merge ${branchName} (squash)`}"`, { - cwd: projectPath, - }); + const squashMessage = options?.message || `Merge ${branchName} (squash)`; + await execGitCommand(['commit', '-m', squashMessage], projectPath); } // Optionally delete the worktree and branch after merging diff --git a/apps/server/src/services/pipeline-orchestrator.ts b/apps/server/src/services/pipeline-orchestrator.ts index 08be4092..ea2bf69e 100644 --- a/apps/server/src/services/pipeline-orchestrator.ts +++ b/apps/server/src/services/pipeline-orchestrator.ts @@ -460,6 +460,7 @@ export class PipelineOrchestrator { const session = this.testRunnerService.getSession(sessionId); if (session && session.status !== 'running' && session.status !== 'pending') { clearInterval(checkInterval); + clearTimeout(timeoutId); resolve({ status: session.status, exitCode: session.exitCode, @@ -469,7 +470,7 @@ export class PipelineOrchestrator { }); } }, 1000); - setTimeout(() => { + const timeoutId = setTimeout(() => { clearInterval(checkInterval); resolve({ status: 'failed', exitCode: null, duration: 600000 }); }, 600000); diff --git a/apps/server/src/services/plan-approval-service.ts b/apps/server/src/services/plan-approval-service.ts index 836d999f..3a677d49 100644 --- a/apps/server/src/services/plan-approval-service.ts +++ b/apps/server/src/services/plan-approval-service.ts @@ -83,6 +83,13 @@ export class PlanApprovalService { ); return new Promise((resolve, reject) => { + // Prevent duplicate registrations for the same key — reject and clean up existing entry + const existing = this.pendingApprovals.get(key); + if (existing) { + existing.reject(new Error('Superseded by a new waitForApproval call')); + this.pendingApprovals.delete(key); + } + // Set up timeout to prevent indefinite waiting and memory leaks // timeoutId stored in closure, NOT in PendingApproval object const timeoutId = setTimeout(() => { @@ -226,11 +233,11 @@ export class PlanApprovalService { status: approved ? 'approved' : 'rejected', approvedAt: approved ? new Date().toISOString() : undefined, reviewedByUser: true, - content: editedPlan, // Update content if user provided an edited version + ...(editedPlan !== undefined && { content: editedPlan }), // Only update content if user provided an edited version }); - // If rejected with feedback, emit event so client knows the rejection reason - if (!approved && feedback) { + // If rejected, emit event so client knows the rejection reason (even without feedback) + if (!approved) { this.eventBus.emitAutoModeEvent('plan_rejected', { featureId, projectPath, diff --git a/apps/ui/src/components/views/graph-view-page.tsx b/apps/ui/src/components/views/graph-view-page.tsx index 306b8eaa..3167647f 100644 --- a/apps/ui/src/components/views/graph-view-page.tsx +++ b/apps/ui/src/components/views/graph-view-page.tsx @@ -313,14 +313,18 @@ export function GraphViewPage() { // Handle add and start feature const handleAddAndStartFeature = useCallback( async (featureData: Parameters[0]) => { - const featuresBeforeIds = new Set(useAppStore.getState().features.map((f) => f.id)); - await handleAddFeature(featureData); + try { + const featuresBeforeIds = new Set(useAppStore.getState().features.map((f) => f.id)); + await handleAddFeature(featureData); - const latestFeatures = useAppStore.getState().features; - const newFeature = latestFeatures.find((f) => !featuresBeforeIds.has(f.id)); + const latestFeatures = useAppStore.getState().features; + const newFeature = latestFeatures.find((f) => !featuresBeforeIds.has(f.id)); - if (newFeature) { - await handleStartImplementation(newFeature); + if (newFeature) { + await handleStartImplementation(newFeature); + } + } catch (error) { + logger.error('Failed to add and start feature:', error); } }, [handleAddFeature, handleStartImplementation] From 434792a2eff8ad35145bb6f1d50165257e2ea8d9 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 12:07:05 -0800 Subject: [PATCH 78/89] fix: Normalize 'main' branch to __main__ in auto-loop key generation --- apps/server/src/services/auto-loop-coordinator.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/server/src/services/auto-loop-coordinator.ts b/apps/server/src/services/auto-loop-coordinator.ts index 3e63cff1..ddc666d5 100644 --- a/apps/server/src/services/auto-loop-coordinator.ts +++ b/apps/server/src/services/auto-loop-coordinator.ts @@ -35,12 +35,12 @@ export interface ProjectAutoLoopState { * Generate a unique key for a worktree auto-loop instance. * * When branchName is null, this represents the main worktree (uses '__main__' sentinel). - * Named branches always use their exact name — the caller is responsible for passing - * null for the primary branch (main/master/etc.) so key matching stays consistent - * with ConcurrencyManager's dynamic primary branch resolution. + * The string 'main' is also normalized to '__main__' for consistency. + * Named branches always use their exact name. */ export function getWorktreeAutoLoopKey(projectPath: string, branchName: string | null): string { - return `${projectPath}::${branchName ?? '__main__'}`; + const normalizedBranch = branchName === 'main' ? null : branchName; + return `${projectPath}::${normalizedBranch ?? '__main__'}`; } export type ExecuteFeatureFn = ( From 0b03e70f1d54a9138eb0f2d64bd9688fdd3e883b Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 12:27:56 -0800 Subject: [PATCH 79/89] fix: Resolve null coalescing, feature verification, and test abort handling issues --- apps/server/src/services/agent-executor.ts | 2 +- apps/server/src/services/auto-mode/facade.ts | 24 ++++-- apps/server/src/services/execution-service.ts | 1 + .../src/services/feature-state-manager.ts | 15 ++++ .../src/services/pipeline-orchestrator.ts | 73 ++++++++++++++----- .../src/services/plan-approval-service.ts | 36 ++++----- .../src/components/views/graph-view-page.tsx | 3 + 7 files changed, 109 insertions(+), 45 deletions(-) diff --git a/apps/server/src/services/agent-executor.ts b/apps/server/src/services/agent-executor.ts index dd1c179c..9a8772ef 100644 --- a/apps/server/src/services/agent-executor.ts +++ b/apps/server/src/services/agent-executor.ts @@ -333,7 +333,7 @@ export class AgentExecutor { userFeedback ); const taskStream = provider.executeQuery( - this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns || 100, 50)) + this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 50)) ); let taskOutput = '', taskStartDetected = false, diff --git a/apps/server/src/services/auto-mode/facade.ts b/apps/server/src/services/auto-mode/facade.ts index af909d30..01985081 100644 --- a/apps/server/src/services/auto-mode/facade.ts +++ b/apps/server/src/services/auto-mode/facade.ts @@ -227,7 +227,7 @@ export class AutoModeServiceFacade { .replace(/\{\{taskName\}\}/g, task.description) .replace(/\{\{taskIndex\}\}/g, String(taskIndex + 1)) .replace(/\{\{totalTasks\}\}/g, String(allTasks.length)) - .replace(/\{\{taskDescription\}\}/g, task.description || task.description); + .replace(/\{\{taskDescription\}\}/g, task.description || task.name); if (feedback) { taskPrompt = taskPrompt.replace(/\{\{userFeedback\}\}/g, feedback); } @@ -636,15 +636,23 @@ Address the follow-up instructions above. Review the previous work and make the */ async verifyFeature(featureId: string): Promise { const feature = await this.featureStateManager.loadFeature(this.projectPath, featureId); - const sanitizedFeatureId = featureId.replace(/[^a-zA-Z0-9_-]/g, '-'); - const worktreePath = path.join(this.projectPath, '.worktrees', sanitizedFeatureId); let workDir = this.projectPath; - try { - await secureFs.access(worktreePath); - workDir = worktreePath; - } catch { - // No worktree + // Use worktreeResolver to find worktree path (consistent with commitFeature) + const branchName = feature?.branchName; + if (branchName) { + const resolved = await this.worktreeResolver.findWorktreeForBranch( + this.projectPath, + branchName + ); + if (resolved) { + try { + await secureFs.access(resolved); + workDir = resolved; + } catch { + // Fall back to project path + } + } } const verificationChecks = [ diff --git a/apps/server/src/services/execution-service.ts b/apps/server/src/services/execution-service.ts index 2af35fe7..75bb10bd 100644 --- a/apps/server/src/services/execution-service.ts +++ b/apps/server/src/services/execution-service.ts @@ -339,6 +339,7 @@ ${feature.spec} } catch (error) { const errorInfo = classifyError(error); if (errorInfo.isAbort) { + await this.updateFeatureStatusFn(projectPath, featureId, 'interrupted'); this.eventBus.emitAutoModeEvent('auto_mode_feature_complete', { featureId, featureName: feature?.title, diff --git a/apps/server/src/services/feature-state-manager.ts b/apps/server/src/services/feature-state-manager.ts index c74ad88b..3fcf69fc 100644 --- a/apps/server/src/services/feature-state-manager.ts +++ b/apps/server/src/services/feature-state-manager.ts @@ -211,6 +211,8 @@ export class FeatureStateManager { */ async resetStuckFeatures(projectPath: string): Promise { const featuresDir = getFeaturesDir(projectPath); + let featuresScanned = 0; + let featuresReset = 0; try { const entries = await secureFs.readdir(featuresDir, { withFileTypes: true }); @@ -218,6 +220,7 @@ export class FeatureStateManager { for (const entry of entries) { if (!entry.isDirectory()) continue; + featuresScanned++; const featurePath = path.join(featuresDir, entry.name, 'feature.json'); const result = await readJsonWithRecovery(featurePath, null, { maxBackups: DEFAULT_BACKUP_COUNT, @@ -271,8 +274,13 @@ export class FeatureStateManager { if (needsUpdate) { feature.updatedAt = new Date().toISOString(); await atomicWriteJson(featurePath, feature, { backupCount: DEFAULT_BACKUP_COUNT }); + featuresReset++; } } + + logger.info( + `[resetStuckFeatures] Scanned ${featuresScanned} features, reset ${featuresReset} features for ${projectPath}` + ); } catch (error) { // If features directory doesn't exist, that's fine if ((error as NodeJS.ErrnoException).code !== 'ENOENT') { @@ -334,6 +342,13 @@ export class FeatureStateManager { // PERSIST BEFORE EMIT await atomicWriteJson(featurePath, feature, { backupCount: DEFAULT_BACKUP_COUNT }); + + // Emit event for UI update + this.emitAutoModeEvent('plan_spec_updated', { + featureId, + projectPath, + planSpec: feature.planSpec, + }); } catch (error) { logger.error(`Failed to update planSpec for ${featureId}:`, error); } diff --git a/apps/server/src/services/pipeline-orchestrator.ts b/apps/server/src/services/pipeline-orchestrator.ts index ea2bf69e..3eb427c8 100644 --- a/apps/server/src/services/pipeline-orchestrator.ts +++ b/apps/server/src/services/pipeline-orchestrator.ts @@ -361,8 +361,14 @@ export class PipelineOrchestrator { await this.executePipeline(context); + // Re-fetch feature to check if executePipeline set a terminal status (e.g., merge_conflict) + const reloadedFeature = await this.featureLoader.getById(projectPath, featureId); const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified'; - await this.updateFeatureStatusFn(projectPath, featureId, finalStatus); + + // Only update status if not already in a terminal state + if (reloadedFeature && reloadedFeature.status !== 'merge_conflict') { + await this.updateFeatureStatusFn(projectPath, featureId, finalStatus); + } logger.info(`Pipeline resume completed for feature ${featureId}`); this.eventBus.emitAutoModeEvent('auto_mode_feature_complete', { featureId, @@ -417,7 +423,10 @@ export class PipelineOrchestrator { message: testResult.error || 'Failed to start tests', }; - const completionResult = await this.waitForTestCompletion(testResult.result.sessionId); + const completionResult = await this.waitForTestCompletion( + testResult.result.sessionId, + abortController.signal + ); if (completionResult.status === 'passed') return { success: true, testsPassed: true }; const sessionOutput = this.testRunnerService.getSessionOutput(testResult.result.sessionId); @@ -453,10 +462,19 @@ export class PipelineOrchestrator { /** Wait for test completion */ private async waitForTestCompletion( - sessionId: string + sessionId: string, + signal: AbortSignal ): Promise<{ status: TestRunStatus; exitCode: number | null; duration: number }> { return new Promise((resolve) => { const checkInterval = setInterval(() => { + // Check for abort + if (signal.aborted) { + clearInterval(checkInterval); + clearTimeout(timeoutId); + resolve({ status: 'failed', exitCode: null, duration: 0 }); + return; + } + const session = this.testRunnerService.getSession(sessionId); if (session && session.status !== 'running' && session.status !== 'pending') { clearInterval(checkInterval); @@ -471,6 +489,12 @@ export class PipelineOrchestrator { } }, 1000); const timeoutId = setTimeout(() => { + // Check for abort before timeout resolution + if (signal.aborted) { + clearInterval(checkInterval); + resolve({ status: 'failed', exitCode: null, duration: 0 }); + return; + } clearInterval(checkInterval); resolve({ status: 'failed', exitCode: null, duration: 600000 }); }, 600000); @@ -484,12 +508,15 @@ export class PipelineOrchestrator { logger.info(`Attempting auto-merge for feature ${featureId} (branch: ${branchName})`); try { + // Get the primary branch dynamically instead of hardcoding 'main' + const targetBranch = await this.worktreeResolver.getCurrentBranch(projectPath); + // Call merge service directly instead of HTTP fetch const result = await performMerge( projectPath, branchName, worktreePath || projectPath, - 'main', + targetBranch, { deleteWorktreeAndBranch: false, } @@ -524,12 +551,16 @@ export class PipelineOrchestrator { } } - /** Build a concise test failure summary for the agent */ - buildTestFailureSummary(scrollback: string): string { + /** Shared helper to parse test output lines and extract failure information */ + private parseTestLines(scrollback: string): { + failedTests: string[]; + passCount: number; + failCount: number; + } { const lines = scrollback.split('\n'); const failedTests: string[] = []; - let passCount = 0, - failCount = 0; + let passCount = 0; + let failCount = 0; for (const line of lines) { const trimmed = line.trim(); @@ -537,30 +568,34 @@ export class PipelineOrchestrator { const match = trimmed.match(/(?:FAIL|FAILED)\s+(.+)/); if (match) failedTests.push(match[1].trim()); failCount++; - } else if (trimmed.includes('PASS') || trimmed.includes('PASSED')) passCount++; - if (trimmed.match(/^>\s+.*\.(test|spec)\./)) failedTests.push(trimmed.replace(/^>\s+/, '')); + } else if (trimmed.includes('PASS') || trimmed.includes('PASSED')) { + passCount++; + } + if (trimmed.match(/^>\s+.*\.(test|spec)\./)) { + failedTests.push(trimmed.replace(/^>\s+/, '')); + } if ( trimmed.includes('AssertionError') || trimmed.includes('toBe') || trimmed.includes('toEqual') - ) + ) { failedTests.push(trimmed); + } } + return { failedTests, passCount, failCount }; + } + + /** Build a concise test failure summary for the agent */ + buildTestFailureSummary(scrollback: string): string { + const { failedTests, passCount, failCount } = this.parseTestLines(scrollback); const unique = [...new Set(failedTests)].slice(0, 10); return `Test Results: ${passCount} passed, ${failCount} failed.\n\nFailed tests:\n${unique.map((t) => `- ${t}`).join('\n')}\n\nOutput (last 2000 chars):\n${scrollback.slice(-2000)}`; } /** Extract failed test names from scrollback */ private extractFailedTestNames(scrollback: string): string[] { - const failedTests: string[] = []; - for (const line of scrollback.split('\n')) { - const trimmed = line.trim(); - if (trimmed.includes('FAIL') || trimmed.includes('FAILED')) { - const match = trimmed.match(/(?:FAIL|FAILED)\s+(.+)/); - if (match) failedTests.push(match[1].trim()); - } - } + const { failedTests } = this.parseTestLines(scrollback); return [...new Set(failedTests)].slice(0, 20); } } diff --git a/apps/server/src/services/plan-approval-service.ts b/apps/server/src/services/plan-approval-service.ts index 3a677d49..ebd37767 100644 --- a/apps/server/src/services/plan-approval-service.ts +++ b/apps/server/src/services/plan-approval-service.ts @@ -90,25 +90,10 @@ export class PlanApprovalService { this.pendingApprovals.delete(key); } - // Set up timeout to prevent indefinite waiting and memory leaks - // timeoutId stored in closure, NOT in PendingApproval object - const timeoutId = setTimeout(() => { - const pending = this.pendingApprovals.get(key); - if (pending) { - logger.warn( - `Plan approval for feature ${featureId} timed out after ${timeoutMinutes} minutes` - ); - this.pendingApprovals.delete(key); - reject( - new Error( - `Plan approval timed out after ${timeoutMinutes} minutes - feature execution cancelled` - ) - ); - } - }, timeoutMs); - // Wrap resolve/reject to clear timeout when approval is resolved // This ensures timeout is ALWAYS cleared on any resolution path + // Define wrappers BEFORE setTimeout so they can be used in timeout callback + let timeoutId: NodeJS.Timeout; const wrappedResolve = (result: PlanApprovalResult) => { clearTimeout(timeoutId); resolve(result); @@ -119,6 +104,23 @@ export class PlanApprovalService { reject(error); }; + // Set up timeout to prevent indefinite waiting and memory leaks + // Now timeoutId assignment happens after wrappers are defined + timeoutId = setTimeout(() => { + const pending = this.pendingApprovals.get(key); + if (pending) { + logger.warn( + `Plan approval for feature ${featureId} timed out after ${timeoutMinutes} minutes` + ); + this.pendingApprovals.delete(key); + wrappedReject( + new Error( + `Plan approval timed out after ${timeoutMinutes} minutes - feature execution cancelled` + ) + ); + } + }, timeoutMs); + this.pendingApprovals.set(key, { resolve: wrappedResolve, reject: wrappedReject, diff --git a/apps/ui/src/components/views/graph-view-page.tsx b/apps/ui/src/components/views/graph-view-page.tsx index 3167647f..dc32b6c5 100644 --- a/apps/ui/src/components/views/graph-view-page.tsx +++ b/apps/ui/src/components/views/graph-view-page.tsx @@ -325,6 +325,9 @@ export function GraphViewPage() { } } catch (error) { logger.error('Failed to add and start feature:', error); + toast.error( + `Failed to add and start feature: ${error instanceof Error ? error.message : String(error)}` + ); } }, [handleAddFeature, handleStartImplementation] From ab5d6a0e54636433b8e673aaa4c921cdc46bb524 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 13:14:55 -0800 Subject: [PATCH 80/89] feat: Improve callback safety and remove unnecessary formatting in auto-mode facade --- apps/server/src/services/agent-executor.ts | 6 +- apps/server/src/services/auto-mode/facade.ts | 101 ++++++------------ .../src/services/feature-state-manager.ts | 46 ++++---- .../src/services/pipeline-orchestrator.ts | 23 ++-- apps/ui/eslint.config.mjs | 1 + apps/ui/src/store/app-store.ts | 17 +-- apps/ui/src/store/types/state-types.ts | 4 +- 7 files changed, 80 insertions(+), 118 deletions(-) diff --git a/apps/server/src/services/agent-executor.ts b/apps/server/src/services/agent-executor.ts index 9a8772ef..0d9c2399 100644 --- a/apps/server/src/services/agent-executor.ts +++ b/apps/server/src/services/agent-executor.ts @@ -126,9 +126,7 @@ export class AgentExecutor { const appendRawEvent = (event: unknown): void => { if (!enableRawOutput) return; try { - rawOutputLines.push( - JSON.stringify({ timestamp: new Date().toISOString(), event }, null, 4) - ); + rawOutputLines.push(JSON.stringify({ timestamp: new Date().toISOString(), event })); if (rawWriteTimeout) clearTimeout(rawWriteTimeout); rawWriteTimeout = setTimeout(async () => { try { @@ -552,7 +550,7 @@ export class AgentExecutor { }); let revText = ''; for await (const msg of provider.executeQuery( - this.buildExecOpts(options, revPrompt, sdkOptions?.maxTurns || 100) + this.buildExecOpts(options, revPrompt, sdkOptions?.maxTurns ?? 100) )) { if (msg.type === 'assistant' && msg.message?.content) for (const b of msg.message.content) diff --git a/apps/server/src/services/auto-mode/facade.ts b/apps/server/src/services/auto-mode/facade.ts index 01985081..5d8bde58 100644 --- a/apps/server/src/services/auto-mode/facade.ts +++ b/apps/server/src/services/auto-mode/facade.ts @@ -143,9 +143,20 @@ export class AutoModeServiceFacade { return prompt; }; - // Create placeholder callbacks - will be bound to facade methods after creation - // These use closures to capture the facade instance once created + // Create placeholder callbacks - will be bound to facade methods after creation. + // These use closures to capture the facade instance once created. + // INVARIANT: All callbacks passed to PipelineOrchestrator, AutoLoopCoordinator, + // and ExecutionService are invoked asynchronously (never during construction), + // so facadeInstance is guaranteed to be assigned before any callback runs. let facadeInstance: AutoModeServiceFacade | null = null; + const getFacade = (): AutoModeServiceFacade => { + if (!facadeInstance) { + throw new Error( + 'AutoModeServiceFacade not yet initialized — callback invoked during construction' + ); + } + return facadeInstance; + }; // PipelineOrchestrator - runAgentFn is a stub; routes use AutoModeService directly const pipelineOrchestrator = new PipelineOrchestrator( @@ -162,7 +173,7 @@ export class AutoModeServiceFacade { loadContextFiles, buildFeaturePrompt, (pPath, featureId, useWorktrees, _isAutoMode, _model, opts) => - facadeInstance!.executeFeature(featureId, useWorktrees, false, undefined, opts), + getFacade().executeFeature(featureId, useWorktrees, false, undefined, opts), // runAgentFn - delegates to AgentExecutor async ( workDir: string, @@ -227,7 +238,7 @@ export class AutoModeServiceFacade { .replace(/\{\{taskName\}\}/g, task.description) .replace(/\{\{taskIndex\}\}/g, String(taskIndex + 1)) .replace(/\{\{totalTasks\}\}/g, String(allTasks.length)) - .replace(/\{\{taskDescription\}\}/g, task.description || task.name); + .replace(/\{\{taskDescription\}\}/g, task.description || `Task ${task.id}`); if (feedback) { taskPrompt = taskPrompt.replace(/\{\{userFeedback\}\}/g, feedback); } @@ -248,7 +259,7 @@ export class AutoModeServiceFacade { settingsService, // Callbacks (pPath, featureId, useWorktrees, isAutoMode) => - facadeInstance!.executeFeature(featureId, useWorktrees, isAutoMode), + getFacade().executeFeature(featureId, useWorktrees, isAutoMode), async (pPath, branchName) => { const features = await featureLoader.getAll(pPath); // For main worktree (branchName === null), resolve the actual primary branch name @@ -266,8 +277,8 @@ export class AutoModeServiceFacade { ); }, (pPath, branchName, maxConcurrency) => - facadeInstance!.saveExecutionStateForProject(branchName, maxConcurrency), - (pPath, branchName) => facadeInstance!.clearExecutionState(branchName), + getFacade().saveExecutionStateForProject(branchName, maxConcurrency), + (pPath, branchName) => getFacade().clearExecutionState(branchName), (pPath) => featureStateManager.resetStuckFeatures(pPath), (feature) => feature.status === 'completed' || @@ -375,16 +386,16 @@ export class AutoModeServiceFacade { async () => { /* recordLearnings - stub */ }, - (pPath, featureId) => facadeInstance!.contextExists(featureId), + (pPath, featureId) => getFacade().contextExists(featureId), (pPath, featureId, useWorktrees, _calledInternally) => - facadeInstance!.resumeFeature(featureId, useWorktrees, _calledInternally), + getFacade().resumeFeature(featureId, useWorktrees, _calledInternally), (errorInfo) => autoLoopCoordinator.trackFailureAndCheckPauseForProject(projectPath, null, errorInfo), (errorInfo) => autoLoopCoordinator.signalShouldPauseForProject(projectPath, null, errorInfo), () => { /* recordSuccess - no-op */ }, - (_pPath) => facadeInstance!.saveExecutionState(), + (_pPath) => getFacade().saveExecutionState(), loadContextFiles ); @@ -395,13 +406,7 @@ export class AutoModeServiceFacade { settingsService, // Callbacks (pPath, featureId, useWorktrees, isAutoMode, providedWorktreePath, opts) => - facadeInstance!.executeFeature( - featureId, - useWorktrees, - isAutoMode, - providedWorktreePath, - opts - ), + getFacade().executeFeature(featureId, useWorktrees, isAutoMode, providedWorktreePath, opts), (pPath, featureId) => featureStateManager.loadFeature(pPath, featureId), (pPath, featureId, status) => pipelineOrchestrator.detectPipelineStatus(pPath, featureId, status), @@ -547,7 +552,9 @@ export class AutoModeServiceFacade { imagePaths?: string[], useWorktrees = true ): Promise { - // This method contains substantial logic - delegates most work to AgentExecutor + // Stub: acquire concurrency slot then immediately throw. + // Heavy I/O (loadFeature, worktree resolution, context reading, prompt building) + // is deferred to the real AutoModeService.followUpFeature implementation. validateWorkingDirectory(this.projectPath); const runningEntry = this.concurrencyManager.acquire({ @@ -555,56 +562,6 @@ export class AutoModeServiceFacade { projectPath: this.projectPath, isAutoMode: false, }); - const abortController = runningEntry.abortController; - - const feature = await this.featureStateManager.loadFeature(this.projectPath, featureId); - let workDir = path.resolve(this.projectPath); - let worktreePath: string | null = null; - const branchName = feature?.branchName || `feature/${featureId}`; - - if (useWorktrees && branchName) { - worktreePath = await this.worktreeResolver.findWorktreeForBranch( - this.projectPath, - branchName - ); - if (worktreePath) { - workDir = worktreePath; - } - } - - // Load previous context - const featureDir = getFeatureDir(this.projectPath, featureId); - const contextPath = path.join(featureDir, 'agent-output.md'); - let previousContext = ''; - try { - previousContext = (await secureFs.readFile(contextPath, 'utf-8')) as string; - } catch { - // No previous context - } - - const prompts = await getPromptCustomization(this.settingsService, '[Facade]'); - - // Build follow-up prompt inline (no template in TaskExecutionPrompts) - let fullPrompt = `## Follow-up on Feature Implementation - -${feature ? `**Feature ID:** ${feature.id}\n**Title:** ${feature.title || 'Untitled'}\n**Description:** ${feature.description}` : `**Feature ID:** ${featureId}`} -`; - - if (previousContext) { - fullPrompt += ` -## Previous Agent Work -The following is the output from the previous implementation attempt: - -${previousContext} -`; - } - - fullPrompt += ` -## Follow-up Instructions -${prompt} - -## Task -Address the follow-up instructions above. Review the previous work and make the requested changes or fixes.`; try { // NOTE: Facade does not have runAgent - this method requires AutoModeService @@ -617,8 +574,8 @@ Address the follow-up instructions above. Review the previous work and make the if (!errorInfo.isAbort) { this.eventBus.emitAutoModeEvent('auto_mode_error', { featureId, - featureName: feature?.title, - branchName: feature?.branchName ?? null, + featureName: undefined, + branchName: null, error: errorInfo.message, errorType: errorInfo.type, projectPath: this.projectPath, @@ -854,7 +811,9 @@ Address the follow-up instructions above. Review the previous work and make the async checkWorktreeCapacity(featureId: string): Promise { const feature = await this.featureStateManager.loadFeature(this.projectPath, featureId); const rawBranchName = feature?.branchName ?? null; - const branchName = rawBranchName === 'main' ? null : rawBranchName; + // Normalize primary branch to null (works for main, master, or any default branch) + const primaryBranch = await this.worktreeResolver.getCurrentBranch(this.projectPath); + const branchName = rawBranchName === primaryBranch ? null : rawBranchName; const maxAgents = await this.autoLoopCoordinator.resolveMaxConcurrency( this.projectPath, diff --git a/apps/server/src/services/feature-state-manager.ts b/apps/server/src/services/feature-state-manager.ts index 3fcf69fc..cd35859e 100644 --- a/apps/server/src/services/feature-state-manager.ts +++ b/apps/server/src/services/feature-state-manager.ts @@ -123,23 +123,28 @@ export class FeatureStateManager { }); // Create notifications for important status changes - const notificationService = getNotificationService(); - if (status === 'waiting_approval') { - await notificationService.createNotification({ - type: 'feature_waiting_approval', - title: 'Feature Ready for Review', - message: `"${feature.name || featureId}" is ready for your review and approval.`, - featureId, - projectPath, - }); - } else if (status === 'verified') { - await notificationService.createNotification({ - type: 'feature_verified', - title: 'Feature Verified', - message: `"${feature.name || featureId}" has been verified and is complete.`, - featureId, - projectPath, - }); + // Wrapped in try-catch so failures don't block syncFeatureToAppSpec below + try { + const notificationService = getNotificationService(); + if (status === 'waiting_approval') { + await notificationService.createNotification({ + type: 'feature_waiting_approval', + title: 'Feature Ready for Review', + message: `"${feature.name || featureId}" is ready for your review and approval.`, + featureId, + projectPath, + }); + } else if (status === 'verified') { + await notificationService.createNotification({ + type: 'feature_verified', + title: 'Feature Verified', + message: `"${feature.name || featureId}" has been verified and is complete.`, + featureId, + projectPath, + }); + } + } catch (notificationError) { + logger.warn(`Failed to create notification for feature ${featureId}:`, notificationError); } // Sync completed/verified features to app_spec.txt @@ -334,7 +339,7 @@ export class FeatureStateManager { Object.assign(feature.planSpec, updates); // If content is being updated and it's different from old content, increment version - if (updates.content && updates.content !== oldContent) { + if (updates.content !== undefined && updates.content !== oldContent) { feature.planSpec.version = (feature.planSpec.version || 0) + 1; } @@ -446,6 +451,11 @@ export class FeatureStateManager { status, tasks: feature.planSpec.tasks, }); + } else { + const availableIds = feature.planSpec.tasks.map((t) => t.id).join(', '); + logger.warn( + `[updateTaskStatus] Task ${taskId} not found in feature ${featureId} (${projectPath}). Available task IDs: [${availableIds}]` + ); } } catch (error) { logger.error(`Failed to update task ${taskId} status for ${featureId}:`, error); diff --git a/apps/server/src/services/pipeline-orchestrator.ts b/apps/server/src/services/pipeline-orchestrator.ts index 3eb427c8..4308825b 100644 --- a/apps/server/src/services/pipeline-orchestrator.ts +++ b/apps/server/src/services/pipeline-orchestrator.ts @@ -362,7 +362,7 @@ export class PipelineOrchestrator { await this.executePipeline(context); // Re-fetch feature to check if executePipeline set a terminal status (e.g., merge_conflict) - const reloadedFeature = await this.featureLoader.getById(projectPath, featureId); + const reloadedFeature = await this.featureStateManager.loadFeature(projectPath, featureId); const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified'; // Only update status if not already in a terminal state @@ -516,7 +516,7 @@ export class PipelineOrchestrator { projectPath, branchName, worktreePath || projectPath, - targetBranch, + targetBranch || 'main', { deleteWorktreeAndBranch: false, } @@ -562,22 +562,33 @@ export class PipelineOrchestrator { let passCount = 0; let failCount = 0; + let inFailureContext = false; for (const line of lines) { const trimmed = line.trim(); if (trimmed.includes('FAIL') || trimmed.includes('FAILED')) { const match = trimmed.match(/(?:FAIL|FAILED)\s+(.+)/); if (match) failedTests.push(match[1].trim()); failCount++; + inFailureContext = true; } else if (trimmed.includes('PASS') || trimmed.includes('PASSED')) { passCount++; + inFailureContext = false; } if (trimmed.match(/^>\s+.*\.(test|spec)\./)) { failedTests.push(trimmed.replace(/^>\s+/, '')); } - if ( - trimmed.includes('AssertionError') || - trimmed.includes('toBe') || - trimmed.includes('toEqual') + // Only capture assertion details when they appear in failure context + // or match explicit assertion error / expect patterns + if (trimmed.includes('AssertionError') || trimmed.includes('AssertionError')) { + failedTests.push(trimmed); + } else if ( + inFailureContext && + /expect\(.+\)\.(toBe|toEqual|toMatch|toThrow|toContain)\s*\(/.test(trimmed) + ) { + failedTests.push(trimmed); + } else if ( + inFailureContext && + (trimmed.startsWith('Expected') || trimmed.startsWith('Received')) ) { failedTests.push(trimmed); } diff --git a/apps/ui/eslint.config.mjs b/apps/ui/eslint.config.mjs index 6cf025de..3ad4d79d 100644 --- a/apps/ui/eslint.config.mjs +++ b/apps/ui/eslint.config.mjs @@ -96,6 +96,7 @@ const eslintConfig = defineConfig([ setInterval: 'readonly', clearTimeout: 'readonly', clearInterval: 'readonly', + queueMicrotask: 'readonly', // Node.js (for scripts and Electron) process: 'readonly', require: 'readonly', diff --git a/apps/ui/src/store/app-store.ts b/apps/ui/src/store/app-store.ts index c0735355..7cc77907 100644 --- a/apps/ui/src/store/app-store.ts +++ b/apps/ui/src/store/app-store.ts @@ -7,7 +7,6 @@ import { createLogger } from '@automaker/utils/logger'; // Note: setItem/getItem moved to ./utils/theme-utils.ts import { UI_SANS_FONT_OPTIONS, UI_MONO_FONT_OPTIONS } from '@/config/ui-font-options'; import type { - Feature as BaseFeature, FeatureImagePath, FeatureTextFilePath, ModelAlias, @@ -15,25 +14,11 @@ import type { ThinkingLevel, ReasoningEffort, ModelProvider, - CursorModelId, - CodexModelId, - OpencodeModelId, - GeminiModelId, - CopilotModelId, - PhaseModelConfig, PhaseModelKey, PhaseModelEntry, - MCPServerConfig, - FeatureStatusWithPipeline, - PipelineConfig, PipelineStep, - PromptCustomization, ModelDefinition, ServerLogLevel, - EventHook, - ClaudeApiProfile, - ClaudeCompatibleProvider, - SidebarStyle, ParsedTask, PlanSpec, } from '@automaker/types'; @@ -2131,7 +2116,7 @@ export const useAppStore = create()((set, get) => ({ const updateSizes = (layout: TerminalPanelContent): TerminalPanelContent => { if (layout.type === 'split') { // Find matching panels and update sizes - const updatedPanels = layout.panels.map((panel, index) => { + const updatedPanels = layout.panels.map((panel, _index) => { // Generate key for this panel const panelKey = panel.type === 'split' diff --git a/apps/ui/src/store/types/state-types.ts b/apps/ui/src/store/types/state-types.ts index 4febb1ca..e06bb618 100644 --- a/apps/ui/src/store/types/state-types.ts +++ b/apps/ui/src/store/types/state-types.ts @@ -2,8 +2,6 @@ import type { Project, TrashedProject } from '@/lib/electron'; import type { ModelAlias, PlanningMode, - ThinkingLevel, - ReasoningEffort, ModelProvider, CursorModelId, CodexModelId, @@ -33,7 +31,7 @@ import type { BackgroundSettings, } from './ui-types'; import type { ApiKeys } from './settings-types'; -import type { ChatMessage, ChatSession, FeatureImage } from './chat-types'; +import type { ChatMessage, ChatSession } from './chat-types'; import type { TerminalState, TerminalPanelContent, PersistedTerminalState } from './terminal-types'; import type { Feature, ProjectAnalysis } from './project-types'; import type { ClaudeUsage, CodexUsage } from './usage-types'; From 67dd6281158c6dd11045d7f5680076156b20b5b3 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 13:35:49 -0800 Subject: [PATCH 81/89] test: Add mock for getCurrentBranch in pipeline orchestrator tests --- apps/server/tests/unit/services/pipeline-orchestrator.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/apps/server/tests/unit/services/pipeline-orchestrator.test.ts b/apps/server/tests/unit/services/pipeline-orchestrator.test.ts index 44bf8d9a..aa543afb 100644 --- a/apps/server/tests/unit/services/pipeline-orchestrator.test.ts +++ b/apps/server/tests/unit/services/pipeline-orchestrator.test.ts @@ -165,6 +165,7 @@ describe('PipelineOrchestrator', () => { mockWorktreeResolver = { findWorktreeForBranch: vi.fn().mockResolvedValue('/test/worktree'), + getCurrentBranch: vi.fn().mockResolvedValue('main'), } as unknown as WorktreeResolver; mockConcurrencyManager = { From 727a7a5b9db12bad8ad6484c2432bcb36db98518 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 14:14:17 -0800 Subject: [PATCH 82/89] feat: Exclude waiting_approval cards from active running state display --- .../components/kanban-card/kanban-card.tsx | 15 +++++++++------ .../board-view/components/list-view/list-row.tsx | 8 ++++++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx index f6725a7d..59bf8d8e 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx @@ -108,6 +108,9 @@ export const KanbanCard = memo(function KanbanCard({ currentProject: state.currentProject, })) ); + // A card in waiting_approval should not display as "actively running" even if + // it's still in the runningAutoTasks list. The waiting_approval UI takes precedence. + const isActivelyRunning = !!isCurrentAutoTask && feature.status !== 'waiting_approval'; const [isLifted, setIsLifted] = useState(false); useLayoutEffect(() => { @@ -186,10 +189,10 @@ export const KanbanCard = memo(function KanbanCard({ // Disable hover translate for in-progress cards to prevent gap showing gradient isInteractive && !reduceEffects && - !isCurrentAutoTask && + !isActivelyRunning && 'hover:-translate-y-0.5 hover:shadow-md hover:shadow-black/10 bg-transparent', !glassmorphism && 'backdrop-blur-[0px]!', - !isCurrentAutoTask && + !isActivelyRunning && cardBorderEnabled && (cardBorderOpacity === 100 ? 'border-border/50' : 'border'), hasError && 'border-[var(--status-error)] border-2 shadow-[var(--status-error-bg)] shadow-lg', @@ -206,7 +209,7 @@ export const KanbanCard = memo(function KanbanCard({ const renderCardContent = () => ( - {isCurrentAutoTask ? ( + {isActivelyRunning ? (
{renderCardContent()}
) : ( renderCardContent() diff --git a/apps/ui/src/components/views/board-view/components/list-view/list-row.tsx b/apps/ui/src/components/views/board-view/components/list-view/list-row.tsx index 2c5474f9..6d14c269 100644 --- a/apps/ui/src/components/views/board-view/components/list-view/list-row.tsx +++ b/apps/ui/src/components/views/board-view/components/list-view/list-row.tsx @@ -209,6 +209,10 @@ export const ListRow = memo(function ListRow({ blockingDependencies = [], className, }: ListRowProps) { + // A card in waiting_approval should not display as "actively running" even if + // it's still in the runningAutoTasks list. The waiting_approval UI takes precedence. + const isActivelyRunning = isCurrentAutoTask && feature.status !== 'waiting_approval'; + const handleRowClick = useCallback( (e: React.MouseEvent) => { // Don't trigger row click if clicking on checkbox or actions @@ -349,13 +353,13 @@ export const ListRow = memo(function ListRow({ {/* Actions column */}
- +
); // Wrap with animated border for currently running auto task - if (isCurrentAutoTask) { + if (isActivelyRunning) { return
{rowContent}
; } From 416ef3a3940c7cdd4d8bcc4c9ad80e8313bd1b12 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 18:58:42 -0800 Subject: [PATCH 83/89] feat: Add error handling to auto-mode facade and implement followUp feature. Fix Claude weekly usage indicator. Fix mobile card drag --- apps/server/src/services/auto-mode/compat.ts | 5 +- apps/server/src/services/auto-mode/facade.ts | 155 ++++++++++++++---- apps/server/src/services/auto-mode/index.ts | 1 + apps/server/src/services/auto-mode/types.ts | 20 +++ .../src/services/claude-usage-service.ts | 37 ++++- .../src/services/feature-state-manager.ts | 41 +++++ .../services/claude-usage-service.test.ts | 60 +++++++ .../services/feature-state-manager.test.ts | 90 ++++++++++ .../src/components/ui/task-progress-panel.tsx | 55 ++++--- apps/ui/src/components/usage-popover.tsx | 51 +++++- .../kanban-card/agent-info-panel.tsx | 12 ++ .../components/kanban-card/card-header.tsx | 9 +- .../components/kanban-card/kanban-card.tsx | 8 +- .../views/board-view/mobile-usage-bar.tsx | 25 ++- apps/ui/src/store/utils/usage-utils.ts | 59 +++++++ 15 files changed, 552 insertions(+), 76 deletions(-) diff --git a/apps/server/src/services/auto-mode/compat.ts b/apps/server/src/services/auto-mode/compat.ts index cece2475..2c713c01 100644 --- a/apps/server/src/services/auto-mode/compat.ts +++ b/apps/server/src/services/auto-mode/compat.ts @@ -13,6 +13,7 @@ import { GlobalAutoModeService } from './global-service.js'; import { AutoModeServiceFacade } from './facade.js'; import type { SettingsService } from '../settings-service.js'; import type { FeatureLoader } from '../feature-loader.js'; +import type { ClaudeUsageService } from '../claude-usage-service.js'; import type { FacadeOptions, AutoModeStatus, RunningAgentInfo } from './types.js'; /** @@ -27,7 +28,8 @@ export class AutoModeServiceCompat { constructor( events: EventEmitter, settingsService: SettingsService | null, - featureLoader: FeatureLoader + featureLoader: FeatureLoader, + claudeUsageService?: ClaudeUsageService | null ) { this.globalService = new GlobalAutoModeService(events, settingsService, featureLoader); const sharedServices = this.globalService.getSharedServices(); @@ -37,6 +39,7 @@ export class AutoModeServiceCompat { settingsService, featureLoader, sharedServices, + claudeUsageService: claudeUsageService ?? null, }; } diff --git a/apps/server/src/services/auto-mode/facade.ts b/apps/server/src/services/auto-mode/facade.ts index 5d8bde58..e31543b4 100644 --- a/apps/server/src/services/auto-mode/facade.ts +++ b/apps/server/src/services/auto-mode/facade.ts @@ -38,6 +38,7 @@ import type { SettingsService } from '../settings-service.js'; import type { EventEmitter } from '../../lib/events.js'; import type { FacadeOptions, + FacadeError, AutoModeStatus, ProjectAutoModeStatus, WorktreeCapacityInfo, @@ -89,6 +90,45 @@ export class AutoModeServiceFacade { private readonly settingsService: SettingsService | null ) {} + /** + * Classify and log an error at the facade boundary. + * Emits an error event to the UI so failures are surfaced to the user. + * + * @param error - The caught error + * @param method - The facade method name where the error occurred + * @param featureId - Optional feature ID for context + * @returns The classified FacadeError for structured consumption + */ + private handleFacadeError(error: unknown, method: string, featureId?: string): FacadeError { + const errorInfo = classifyError(error); + + // Log at the facade boundary for debugging + logger.error( + `[${method}] ${featureId ? `Feature ${featureId}: ` : ''}${errorInfo.message}`, + error + ); + + // Emit error event to UI unless it's an abort/cancellation + if (!errorInfo.isAbort && !errorInfo.isCancellation) { + this.eventBus.emitAutoModeEvent('auto_mode_error', { + featureId: featureId ?? null, + featureName: undefined, + branchName: null, + error: errorInfo.message, + errorType: errorInfo.type, + projectPath: this.projectPath, + }); + } + + return { + method, + errorType: errorInfo.type, + message: errorInfo.message, + featureId, + projectPath: this.projectPath, + }; + } + /** * Create a new AutoModeServiceFacade instance for a specific project. * @@ -447,11 +487,16 @@ export class AutoModeServiceFacade { * @param maxConcurrency - Maximum concurrent features */ async startAutoLoop(branchName: string | null = null, maxConcurrency?: number): Promise { - return this.autoLoopCoordinator.startAutoLoopForProject( - this.projectPath, - branchName, - maxConcurrency - ); + try { + return await this.autoLoopCoordinator.startAutoLoopForProject( + this.projectPath, + branchName, + maxConcurrency + ); + } catch (error) { + this.handleFacadeError(error, 'startAutoLoop'); + throw error; + } } /** @@ -459,7 +504,12 @@ export class AutoModeServiceFacade { * @param branchName - The branch name, or null for main worktree */ async stopAutoLoop(branchName: string | null = null): Promise { - return this.autoLoopCoordinator.stopAutoLoopForProject(this.projectPath, branchName); + try { + return await this.autoLoopCoordinator.stopAutoLoopForProject(this.projectPath, branchName); + } catch (error) { + this.handleFacadeError(error, 'stopAutoLoop'); + throw error; + } } /** @@ -500,14 +550,19 @@ export class AutoModeServiceFacade { _calledInternally?: boolean; } ): Promise { - return this.executionService.executeFeature( - this.projectPath, - featureId, - useWorktrees, - isAutoMode, - providedWorktreePath, - options - ); + try { + return await this.executionService.executeFeature( + this.projectPath, + featureId, + useWorktrees, + isAutoMode, + providedWorktreePath, + options + ); + } catch (error) { + this.handleFacadeError(error, 'executeFeature', featureId); + throw error; + } } /** @@ -515,9 +570,14 @@ export class AutoModeServiceFacade { * @param featureId - ID of the feature to stop */ async stopFeature(featureId: string): Promise { - // Cancel any pending plan approval for this feature - this.cancelPlanApproval(featureId); - return this.executionService.stopFeature(featureId); + try { + // Cancel any pending plan approval for this feature + this.cancelPlanApproval(featureId); + return await this.executionService.stopFeature(featureId); + } catch (error) { + this.handleFacadeError(error, 'stopFeature', featureId); + throw error; + } } /** @@ -552,23 +612,54 @@ export class AutoModeServiceFacade { imagePaths?: string[], useWorktrees = true ): Promise { - // Stub: acquire concurrency slot then immediately throw. - // Heavy I/O (loadFeature, worktree resolution, context reading, prompt building) - // is deferred to the real AutoModeService.followUpFeature implementation. validateWorkingDirectory(this.projectPath); - const runningEntry = this.concurrencyManager.acquire({ - featureId, - projectPath: this.projectPath, - isAutoMode: false, - }); - try { - // NOTE: Facade does not have runAgent - this method requires AutoModeService - // Do NOT emit start events before throwing to prevent false start events - throw new Error( - 'followUpFeature not fully implemented in facade - use AutoModeService.followUpFeature instead' - ); + // Load feature to build the prompt context + const feature = await this.featureStateManager.loadFeature(this.projectPath, featureId); + if (!feature) throw new Error(`Feature ${featureId} not found`); + + // Read previous agent output as context + const featureDir = getFeatureDir(this.projectPath, featureId); + let previousContext = ''; + try { + previousContext = (await secureFs.readFile( + path.join(featureDir, 'agent-output.md'), + 'utf-8' + )) as string; + } catch { + // No previous context available - that's OK + } + + // Build the feature prompt section + const featurePrompt = `## Feature Implementation Task\n\n**Feature ID:** ${feature.id}\n**Title:** ${feature.title || 'Untitled Feature'}\n**Description:** ${feature.description}\n`; + + // Get the follow-up prompt template and build the continuation prompt + const prompts = await getPromptCustomization(this.settingsService, '[Facade]'); + let continuationPrompt = prompts.autoMode.followUpPromptTemplate; + continuationPrompt = continuationPrompt + .replace(/\{\{featurePrompt\}\}/g, featurePrompt) + .replace(/\{\{previousContext\}\}/g, previousContext) + .replace(/\{\{followUpInstructions\}\}/g, prompt); + + // Store image paths on the feature so executeFeature can pick them up + if (imagePaths && imagePaths.length > 0) { + feature.imagePaths = imagePaths.map((p) => ({ + path: p, + filename: p.split('/').pop() || p, + mimeType: 'image/*', + })); + await this.featureStateManager.updateFeatureStatus( + this.projectPath, + featureId, + feature.status || 'in_progress' + ); + } + + // Delegate to executeFeature with the built continuation prompt + await this.executeFeature(featureId, useWorktrees, false, undefined, { + continuationPrompt, + }); } catch (error) { const errorInfo = classifyError(error); if (!errorInfo.isAbort) { @@ -582,8 +673,6 @@ export class AutoModeServiceFacade { }); } throw error; - } finally { - this.concurrencyManager.release(featureId); } } diff --git a/apps/server/src/services/auto-mode/index.ts b/apps/server/src/services/auto-mode/index.ts index 9e150ad2..40e0ee84 100644 --- a/apps/server/src/services/auto-mode/index.ts +++ b/apps/server/src/services/auto-mode/index.ts @@ -58,6 +58,7 @@ export type { WorktreeCapacityInfo, RunningAgentInfo, OrphanedFeatureInfo, + FacadeError, GlobalAutoModeOperations, } from './types.js'; diff --git a/apps/server/src/services/auto-mode/types.ts b/apps/server/src/services/auto-mode/types.ts index b831daba..fc82cb13 100644 --- a/apps/server/src/services/auto-mode/types.ts +++ b/apps/server/src/services/auto-mode/types.ts @@ -15,6 +15,7 @@ import type { ConcurrencyManager } from '../concurrency-manager.js'; import type { AutoLoopCoordinator } from '../auto-loop-coordinator.js'; import type { WorktreeResolver } from '../worktree-resolver.js'; import type { TypedEventBus } from '../typed-event-bus.js'; +import type { ClaudeUsageService } from '../claude-usage-service.js'; // Re-export types from extracted services for route consumption export type { AutoModeConfig, ProjectAutoLoopState } from '../auto-loop-coordinator.js'; @@ -55,6 +56,8 @@ export interface FacadeOptions { featureLoader?: FeatureLoader; /** Shared services for state sharing across facades (optional) */ sharedServices?: SharedServices; + /** ClaudeUsageService for checking usage limits before picking up features (optional) */ + claudeUsageService?: ClaudeUsageService | null; } /** @@ -110,6 +113,23 @@ export interface OrphanedFeatureInfo { missingBranch: string; } +/** + * Structured error object returned/emitted by facade methods. + * Provides consistent error information for callers and UI consumers. + */ +export interface FacadeError { + /** The facade method where the error originated */ + method: string; + /** Classified error type from the error handler */ + errorType: import('@automaker/types').ErrorType; + /** Human-readable error message */ + message: string; + /** Feature ID if the error is associated with a specific feature */ + featureId?: string; + /** Project path where the error occurred */ + projectPath: string; +} + /** * Interface describing global auto-mode operations (not project-specific). * Used by routes that need global state access. diff --git a/apps/server/src/services/claude-usage-service.ts b/apps/server/src/services/claude-usage-service.ts index aa8afc1c..6438b5dc 100644 --- a/apps/server/src/services/claude-usage-service.ts +++ b/apps/server/src/services/claude-usage-service.ts @@ -294,7 +294,16 @@ export class ClaudeUsageService { this.killPtyProcess(ptyProcess); } // Don't fail if we have data - return it instead - if (output.includes('Current session')) { + // Check cleaned output since raw output has ANSI codes between words + // eslint-disable-next-line no-control-regex + const cleanedForCheck = output + .replace(/\x1B\[(\d+)C/g, (_m: string, n: string) => ' '.repeat(parseInt(n, 10))) + .replace(/\x1B\[[0-9;?]*[A-Za-z@]/g, ''); + if ( + cleanedForCheck.includes('Current session') || + cleanedForCheck.includes('% used') || + cleanedForCheck.includes('% left') + ) { resolve(output); } else if (hasSeenTrustPrompt) { // Trust prompt was shown but we couldn't auto-approve it @@ -320,8 +329,13 @@ export class ClaudeUsageService { output += data; // Strip ANSI codes for easier matching + // Convert cursor forward (ESC[nC) to spaces first to preserve word boundaries, + // then strip remaining ANSI sequences. Without this, the Claude CLI TUI output + // like "Current week (all models)" becomes "Currentweek(allmodels)". // eslint-disable-next-line no-control-regex - const cleanOutput = output.replace(/\x1B\[[0-9;]*[A-Za-z]/g, ''); + const cleanOutput = output + .replace(/\x1B\[(\d+)C/g, (_match: string, n: string) => ' '.repeat(parseInt(n, 10))) + .replace(/\x1B\[[0-9;?]*[A-Za-z@]/g, ''); // Check for specific authentication/permission errors // Must be very specific to avoid false positives from garbled terminal encoding @@ -356,7 +370,8 @@ export class ClaudeUsageService { const hasUsageIndicators = cleanOutput.includes('Current session') || (cleanOutput.includes('Usage') && cleanOutput.includes('% left')) || - // Additional patterns for winpty - look for percentage patterns + // Look for percentage patterns - allow optional whitespace between % and left/used + // since cursor movement codes may or may not create spaces after stripping /\d+%\s*(left|used|remaining)/i.test(cleanOutput) || cleanOutput.includes('Resets in') || cleanOutput.includes('Current week'); @@ -382,12 +397,15 @@ export class ClaudeUsageService { // Handle Trust Dialog - multiple variants: // - "Do you want to work in this folder?" // - "Ready to code here?" / "I'll need permission to work with your files" + // - "Quick safety check" / "Yes, I trust this folder" // Since we are running in cwd (project dir), it is safe to approve. if ( !hasApprovedTrust && (cleanOutput.includes('Do you want to work in this folder?') || cleanOutput.includes('Ready to code here') || - cleanOutput.includes('permission to work with your files')) + cleanOutput.includes('permission to work with your files') || + cleanOutput.includes('trust this folder') || + cleanOutput.includes('safety check')) ) { hasApprovedTrust = true; hasSeenTrustPrompt = true; @@ -471,10 +489,17 @@ export class ClaudeUsageService { * Handles CSI, OSC, and other common ANSI sequences */ private stripAnsiCodes(text: string): string { - // First strip ANSI sequences (colors, etc) and handle CR + // First, convert cursor movement sequences to whitespace to preserve word boundaries. + // The Claude CLI TUI uses ESC[nC (cursor forward) instead of actual spaces between words. + // Without this, "Current week (all models)" becomes "Currentweek(allmodels)" after stripping. // eslint-disable-next-line no-control-regex let clean = text - // CSI sequences: ESC [ ... (letter or @) + // Cursor forward (CSI n C): replace with n spaces to preserve word separation + .replace(/\x1B\[(\d+)C/g, (_match, n) => ' '.repeat(parseInt(n, 10))) + // Cursor movement (up/down/back/position): replace with newline or nothing + .replace(/\x1B\[\d*[ABD]/g, '') // cursor up (A), down (B), back (D) + .replace(/\x1B\[\d+;\d+[Hf]/g, '\n') // cursor position (H/f) + // Now strip remaining CSI sequences (colors, modes, etc.) .replace(/\x1B\[[0-9;?]*[A-Za-z@]/g, '') // OSC sequences: ESC ] ... terminated by BEL, ST, or another ESC .replace(/\x1B\][^\x07\x1B]*(?:\x07|\x1B\\)?/g, '') diff --git a/apps/server/src/services/feature-state-manager.ts b/apps/server/src/services/feature-state-manager.ts index cd35859e..e8afe0b9 100644 --- a/apps/server/src/services/feature-state-manager.ts +++ b/apps/server/src/services/feature-state-manager.ts @@ -107,6 +107,47 @@ export class FeatureStateManager { // Badge will show for 2 minutes after this timestamp if (status === 'waiting_approval') { feature.justFinishedAt = new Date().toISOString(); + + // Finalize task statuses when feature is done: + // - Mark any in_progress tasks as completed (agent finished but didn't explicitly complete them) + // - Do NOT mark pending tasks as completed (they were never started) + // - Clear currentTaskId since no task is actively running + // This prevents cards in "waiting for review" from appearing to still have running tasks + if (feature.planSpec?.tasks) { + let tasksFinalized = 0; + for (const task of feature.planSpec.tasks) { + if (task.status === 'in_progress') { + task.status = 'completed'; + tasksFinalized++; + } + } + if (tasksFinalized > 0) { + logger.info( + `[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to waiting_approval` + ); + } + // Update tasksCompleted count to reflect actual completed tasks + feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter( + (t) => t.status === 'completed' + ).length; + feature.planSpec.currentTaskId = undefined; + } + } else if (status === 'verified') { + // Also finalize in_progress tasks when moving directly to verified (skipTests=false) + // Do NOT mark pending tasks as completed - they were never started + if (feature.planSpec?.tasks) { + for (const task of feature.planSpec.tasks) { + if (task.status === 'in_progress') { + task.status = 'completed'; + } + } + feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter( + (t) => t.status === 'completed' + ).length; + feature.planSpec.currentTaskId = undefined; + } + // Clear the timestamp when moving to other statuses + feature.justFinishedAt = undefined; } else { // Clear the timestamp when moving to other statuses feature.justFinishedAt = undefined; diff --git a/apps/server/tests/unit/services/claude-usage-service.test.ts b/apps/server/tests/unit/services/claude-usage-service.test.ts index 7901192c..bb88381e 100644 --- a/apps/server/tests/unit/services/claude-usage-service.test.ts +++ b/apps/server/tests/unit/services/claude-usage-service.test.ts @@ -177,6 +177,66 @@ describe('claude-usage-service.ts', () => { // BEL is stripped, newlines and tabs preserved expect(result).toBe('Line 1\nLine 2\tTabbed with bell'); }); + + it('should convert cursor forward (ESC[nC) to spaces', () => { + const service = new ClaudeUsageService(); + // Claude CLI TUI uses ESC[1C instead of space between words + const input = 'Current\x1B[1Csession'; + // @ts-expect-error - accessing private method for testing + const result = service.stripAnsiCodes(input); + + expect(result).toBe('Current session'); + }); + + it('should handle multi-character cursor forward sequences', () => { + const service = new ClaudeUsageService(); + // ESC[3C = move cursor forward 3 positions = 3 spaces + const input = 'Hello\x1B[3Cworld'; + // @ts-expect-error - accessing private method for testing + const result = service.stripAnsiCodes(input); + + expect(result).toBe('Hello world'); + }); + + it('should handle real Claude CLI TUI output with cursor movement codes', () => { + const service = new ClaudeUsageService(); + // Simulates actual Claude CLI /usage output where words are separated by ESC[1C + const input = + 'Current\x1B[1Cweek\x1B[1C(all\x1B[1Cmodels)\n' + + '\x1B[32m█████████████████████████▌\x1B[0m\x1B[1C51%\x1B[1Cused\n' + + 'Resets\x1B[1CFeb\x1B[1C19\x1B[1Cat\x1B[1C3pm\x1B[1C(America/Los_Angeles)'; + // @ts-expect-error - accessing private method for testing + const result = service.stripAnsiCodes(input); + + expect(result).toContain('Current week (all models)'); + expect(result).toContain('51% used'); + expect(result).toContain('Resets Feb 19 at 3pm (America/Los_Angeles)'); + }); + + it('should parse usage output with cursor movement codes between words', () => { + const service = new ClaudeUsageService(); + // Simulates the full /usage TUI output with ESC[1C between every word + const output = + 'Current\x1B[1Csession\n' + + '\x1B[32m█████████████▌\x1B[0m\x1B[1C27%\x1B[1Cused\n' + + 'Resets\x1B[1C9pm\x1B[1C(America/Los_Angeles)\n' + + '\n' + + 'Current\x1B[1Cweek\x1B[1C(all\x1B[1Cmodels)\n' + + '\x1B[32m█████████████████████████▌\x1B[0m\x1B[1C51%\x1B[1Cused\n' + + 'Resets\x1B[1CFeb\x1B[1C19\x1B[1Cat\x1B[1C3pm\x1B[1C(America/Los_Angeles)\n' + + '\n' + + 'Current\x1B[1Cweek\x1B[1C(Sonnet\x1B[1Conly)\n' + + '\x1B[32m██▌\x1B[0m\x1B[1C5%\x1B[1Cused\n' + + 'Resets\x1B[1CFeb\x1B[1C19\x1B[1Cat\x1B[1C11pm\x1B[1C(America/Los_Angeles)'; + // @ts-expect-error - accessing private method for testing + const result = service.parseUsageOutput(output); + + expect(result.sessionPercentage).toBe(27); + expect(result.weeklyPercentage).toBe(51); + expect(result.sonnetWeeklyPercentage).toBe(5); + expect(result.weeklyResetText).toContain('Resets Feb 19 at 3pm'); + expect(result.weeklyResetText).not.toContain('America/Los_Angeles'); + }); }); describe('parseResetTime', () => { diff --git a/apps/server/tests/unit/services/feature-state-manager.test.ts b/apps/server/tests/unit/services/feature-state-manager.test.ts index d53c40b9..bff51d78 100644 --- a/apps/server/tests/unit/services/feature-state-manager.test.ts +++ b/apps/server/tests/unit/services/feature-state-manager.test.ts @@ -151,6 +151,96 @@ describe('FeatureStateManager', () => { expect(savedFeature.justFinishedAt).toBeUndefined(); }); + it('should finalize in_progress and pending tasks when moving to waiting_approval', async () => { + const featureWithTasks: Feature = { + ...mockFeature, + status: 'in_progress', + planSpec: { + status: 'approved', + version: 1, + reviewedByUser: true, + currentTaskId: 'task-2', + tasksCompleted: 1, + tasks: [ + { id: 'task-1', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'task-2', title: 'Task 2', status: 'in_progress', description: 'Second task' }, + { id: 'task-3', title: 'Task 3', status: 'pending', description: 'Third task' }, + ], + }, + }; + + (readJsonWithRecovery as Mock).mockResolvedValue({ + data: featureWithTasks, + recovered: false, + source: 'main', + }); + + await manager.updateFeatureStatus('/project', 'feature-123', 'waiting_approval'); + + const savedFeature = (atomicWriteJson as Mock).mock.calls[0][1] as Feature; + // All tasks should be completed + expect(savedFeature.planSpec?.tasks?.[0].status).toBe('completed'); + expect(savedFeature.planSpec?.tasks?.[1].status).toBe('completed'); + expect(savedFeature.planSpec?.tasks?.[2].status).toBe('completed'); + // currentTaskId should be cleared + expect(savedFeature.planSpec?.currentTaskId).toBeUndefined(); + // tasksCompleted should equal total tasks + expect(savedFeature.planSpec?.tasksCompleted).toBe(3); + }); + + it('should finalize tasks when moving to verified status', async () => { + const featureWithTasks: Feature = { + ...mockFeature, + status: 'in_progress', + planSpec: { + status: 'approved', + version: 1, + reviewedByUser: true, + currentTaskId: 'task-2', + tasksCompleted: 1, + tasks: [ + { id: 'task-1', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'task-2', title: 'Task 2', status: 'in_progress', description: 'Second task' }, + { id: 'task-3', title: 'Task 3', status: 'pending', description: 'Third task' }, + ], + }, + }; + + (readJsonWithRecovery as Mock).mockResolvedValue({ + data: featureWithTasks, + recovered: false, + source: 'main', + }); + + await manager.updateFeatureStatus('/project', 'feature-123', 'verified'); + + const savedFeature = (atomicWriteJson as Mock).mock.calls[0][1] as Feature; + // All tasks should be completed + expect(savedFeature.planSpec?.tasks?.[0].status).toBe('completed'); + expect(savedFeature.planSpec?.tasks?.[1].status).toBe('completed'); + expect(savedFeature.planSpec?.tasks?.[2].status).toBe('completed'); + // currentTaskId should be cleared + expect(savedFeature.planSpec?.currentTaskId).toBeUndefined(); + // tasksCompleted should equal total tasks + expect(savedFeature.planSpec?.tasksCompleted).toBe(3); + // justFinishedAt should be cleared for verified + expect(savedFeature.justFinishedAt).toBeUndefined(); + }); + + it('should handle waiting_approval without planSpec tasks gracefully', async () => { + (readJsonWithRecovery as Mock).mockResolvedValue({ + data: { ...mockFeature }, + recovered: false, + source: 'main', + }); + + await manager.updateFeatureStatus('/project', 'feature-123', 'waiting_approval'); + + const savedFeature = (atomicWriteJson as Mock).mock.calls[0][1] as Feature; + expect(savedFeature.status).toBe('waiting_approval'); + expect(savedFeature.justFinishedAt).toBeDefined(); + }); + it('should create notification for waiting_approval status', async () => { const mockNotificationService = { createNotification: vi.fn() }; (getNotificationService as Mock).mockReturnValue(mockNotificationService); diff --git a/apps/ui/src/components/ui/task-progress-panel.tsx b/apps/ui/src/components/ui/task-progress-panel.tsx index f72d6174..6ed40295 100644 --- a/apps/ui/src/components/ui/task-progress-panel.tsx +++ b/apps/ui/src/components/ui/task-progress-panel.tsx @@ -59,24 +59,19 @@ export function TaskProgressPanel({ const planSpec = feature.planSpec; const planTasks = planSpec.tasks; // Already guarded by the if condition above const currentId = planSpec.currentTaskId; - const completedCount = planSpec.tasksCompleted || 0; - // Convert planSpec tasks to TaskInfo with proper status + // Convert planSpec tasks to TaskInfo using their persisted status // planTasks is guaranteed to be defined due to the if condition check - const initialTasks: TaskInfo[] = (planTasks as ParsedTask[]).map( - (t: ParsedTask, index: number) => ({ - id: t.id, - description: t.description, - filePath: t.filePath, - phase: t.phase, - status: - index < completedCount - ? ('completed' as const) - : t.id === currentId - ? ('in_progress' as const) - : ('pending' as const), - }) - ); + const initialTasks: TaskInfo[] = (planTasks as ParsedTask[]).map((t: ParsedTask) => ({ + id: t.id, + description: t.description, + filePath: t.filePath, + phase: t.phase, + status: + t.id === currentId + ? ('in_progress' as const) + : (t.status as TaskInfo['status']) || ('pending' as const), + })); setTasks(initialTasks); setCurrentTaskId(currentId || null); @@ -113,16 +108,12 @@ export function TaskProgressPanel({ const existingIndex = prev.findIndex((t) => t.id === taskEvent.taskId); if (existingIndex !== -1) { - // Update status to in_progress and mark previous as completed - return prev.map((t, idx) => { + // Update only the started task to in_progress + // Do NOT assume previous tasks are completed - rely on actual task_complete events + return prev.map((t) => { if (t.id === taskEvent.taskId) { return { ...t, status: 'in_progress' as const }; } - // If we are moving to a task that is further down the list, assume previous ones are completed - // This is a heuristic, but usually correct for sequential execution - if (idx < existingIndex && t.status !== 'completed') { - return { ...t, status: 'completed' as const }; - } return t; }); } @@ -151,6 +142,24 @@ export function TaskProgressPanel({ setCurrentTaskId(null); } break; + + case 'auto_mode_task_status': + if ('taskId' in event && 'status' in event) { + const taskEvent = event as Extract; + setTasks((prev) => + prev.map((t) => + t.id === taskEvent.taskId + ? { ...t, status: taskEvent.status as TaskInfo['status'] } + : t + ) + ); + if (taskEvent.status === 'in_progress') { + setCurrentTaskId(taskEvent.taskId); + } else if (taskEvent.status === 'completed') { + setCurrentTaskId((current) => (current === taskEvent.taskId ? null : current)); + } + } + break; } }); diff --git a/apps/ui/src/components/usage-popover.tsx b/apps/ui/src/components/usage-popover.tsx index 5d8acb0b..b3f4347f 100644 --- a/apps/ui/src/components/usage-popover.tsx +++ b/apps/ui/src/components/usage-popover.tsx @@ -8,6 +8,7 @@ import { cn } from '@/lib/utils'; import { useSetupStore } from '@/store/setup-store'; import { AnthropicIcon, OpenAIIcon } from '@/components/ui/provider-icon'; import { useClaudeUsage, useCodexUsage } from '@/hooks/queries'; +import { getExpectedWeeklyPacePercentage, getPaceStatusLabel } from '@/store/utils/usage-utils'; // Error codes for distinguishing failure modes const ERROR_CODES = { @@ -146,13 +147,28 @@ export function UsagePopover() { return { color: 'text-green-500', icon: CheckCircle, bg: 'bg-green-500' }; }; - // Helper component for the progress bar - const ProgressBar = ({ percentage, colorClass }: { percentage: number; colorClass: string }) => ( -
+ // Helper component for the progress bar with optional pace indicator + const ProgressBar = ({ + percentage, + colorClass, + pacePercentage, + }: { + percentage: number; + colorClass: string; + pacePercentage?: number | null; + }) => ( +
+ {pacePercentage != null && pacePercentage > 0 && pacePercentage < 100 && ( +
+ )}
); @@ -163,6 +179,7 @@ export function UsagePopover() { resetText, isPrimary = false, stale = false, + pacePercentage, }: { title: string; subtitle: string; @@ -170,6 +187,7 @@ export function UsagePopover() { resetText?: string; isPrimary?: boolean; stale?: boolean; + pacePercentage?: number | null; }) => { const isValidPercentage = typeof percentage === 'number' && !isNaN(percentage) && isFinite(percentage); @@ -177,6 +195,10 @@ export function UsagePopover() { const status = getStatusInfo(safePercentage); const StatusIcon = status.icon; + const paceLabel = + isValidPercentage && pacePercentage != null + ? getPaceStatusLabel(safePercentage, pacePercentage) + : null; return (
- {resetText && ( -
+
+ {paceLabel ? ( +

(pacePercentage ?? 0) ? 'text-orange-500' : 'text-green-500' + )} + > + {paceLabel} +

+ ) : ( +
+ )} + {resetText && (

{resetText}

-
- )} + )} +
); }; @@ -384,6 +419,7 @@ export function UsagePopover() { percentage={claudeUsage.sonnetWeeklyPercentage} resetText={claudeUsage.sonnetResetText} stale={isClaudeStale} + pacePercentage={getExpectedWeeklyPacePercentage(claudeUsage.weeklyResetTime)} />
diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx index a3540cd7..80e42371 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx @@ -153,6 +153,7 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({ // Derive effective todos from planSpec.tasks when available, fallback to agentInfo.todos // Uses freshPlanSpec (from API) for accurate progress, with taskStatusMap for real-time updates + const isFeatureFinished = feature.status === 'waiting_approval' || feature.status === 'verified'; const effectiveTodos = useMemo(() => { // Use freshPlanSpec if available (fetched from API), fallback to store's feature.planSpec const planSpec = freshPlanSpec?.tasks?.length ? freshPlanSpec : feature.planSpec; @@ -163,6 +164,16 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({ const currentTaskId = planSpec.currentTaskId; return planSpec.tasks.map((task: ParsedTask, index: number) => { + // If the feature is done (waiting_approval/verified), all tasks are completed + // This is a defensive UI-side check: the server should have already finalized + // task statuses, but stale data from before the fix could still show spinners + if (isFeatureFinished) { + return { + content: task.description, + status: 'completed' as const, + }; + } + // Use real-time status from WebSocket events if available const realtimeStatus = taskStatusMap.get(task.id); @@ -199,6 +210,7 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({ feature.planSpec?.currentTaskId, agentInfo?.todos, taskStatusMap, + isFeatureFinished, ]); // Listen to WebSocket events for real-time task status updates diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/card-header.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/card-header.tsx index bdf028b9..cc97b202 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/card-header.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/card-header.tsx @@ -1,5 +1,6 @@ // @ts-nocheck - header component props with optional handlers and status variants import { memo, useState } from 'react'; +import type { DraggableAttributes, DraggableSyntheticListeners } from '@dnd-kit/core'; import { Feature } from '@/store/app-store'; import { cn } from '@/lib/utils'; import { CardDescription, CardHeader, CardTitle } from '@/components/ui/card'; @@ -35,6 +36,8 @@ interface CardHeaderProps { onDelete: () => void; onViewOutput?: () => void; onSpawnTask?: () => void; + dragHandleListeners?: DraggableSyntheticListeners; + dragHandleAttributes?: DraggableAttributes; } export const CardHeaderSection = memo(function CardHeaderSection({ @@ -46,6 +49,8 @@ export const CardHeaderSection = memo(function CardHeaderSection({ onDelete, onViewOutput, onSpawnTask, + dragHandleListeners, + dragHandleAttributes, }: CardHeaderProps) { const [isDescriptionExpanded, setIsDescriptionExpanded] = useState(false); const [isDeleteDialogOpen, setIsDeleteDialogOpen] = useState(false); @@ -319,8 +324,10 @@ export const CardHeaderSection = memo(function CardHeaderSection({
{isDraggable && (
diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx index 59bf8d8e..eb44c49b 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/kanban-card.tsx @@ -32,7 +32,7 @@ function getCursorClass( ): string { if (isSelectionMode) return 'cursor-pointer'; if (isOverlay) return 'cursor-grabbing'; - if (isDraggable) return 'cursor-grab active:cursor-grabbing'; + // Drag cursor is now only on the drag handle, not the full card return 'cursor-default'; } @@ -172,7 +172,7 @@ export const KanbanCard = memo(function KanbanCard({ const isSelectable = isSelectionMode && feature.status === selectionTarget; const wrapperClasses = cn( - 'relative select-none outline-none touch-none transition-transform duration-200 ease-out', + 'relative select-none outline-none transition-transform duration-200 ease-out', getCursorClass(isOverlay, isDraggable, isSelectable), isOverlay && isLifted && 'scale-105 rotate-1 z-50', // Visual feedback when another card is being dragged over this one @@ -254,6 +254,8 @@ export const KanbanCard = memo(function KanbanCard({ onDelete={onDelete} onViewOutput={onViewOutput} onSpawnTask={onSpawnTask} + dragHandleListeners={isDraggable ? listeners : undefined} + dragHandleAttributes={isDraggable ? attributes : undefined} /> @@ -296,8 +298,6 @@ export const KanbanCard = memo(function KanbanCard({
diff --git a/apps/ui/src/components/views/board-view/mobile-usage-bar.tsx b/apps/ui/src/components/views/board-view/mobile-usage-bar.tsx index 918988e9..687de785 100644 --- a/apps/ui/src/components/views/board-view/mobile-usage-bar.tsx +++ b/apps/ui/src/components/views/board-view/mobile-usage-bar.tsx @@ -5,6 +5,7 @@ import { Spinner } from '@/components/ui/spinner'; import { getElectronAPI } from '@/lib/electron'; import { useAppStore } from '@/store/app-store'; import { AnthropicIcon, OpenAIIcon } from '@/components/ui/provider-icon'; +import { getExpectedWeeklyPacePercentage, getPaceStatusLabel } from '@/store/utils/usage-utils'; interface MobileUsageBarProps { showClaudeUsage: boolean; @@ -23,11 +24,15 @@ function UsageBar({ label, percentage, isStale, + pacePercentage, }: { label: string; percentage: number; isStale: boolean; + pacePercentage?: number | null; }) { + const paceLabel = pacePercentage != null ? getPaceStatusLabel(percentage, pacePercentage) : null; + return (
@@ -49,7 +54,7 @@ function UsageBar({
@@ -57,7 +62,24 @@ function UsageBar({ className={cn('h-full transition-all duration-500', getProgressBarColor(percentage))} style={{ width: `${Math.min(percentage, 100)}%` }} /> + {pacePercentage != null && pacePercentage > 0 && pacePercentage < 100 && ( +
+ )}
+ {paceLabel && ( +

(pacePercentage ?? 0) ? 'text-orange-500' : 'text-green-500' + )} + > + {paceLabel} +

+ )}
); } @@ -190,6 +212,7 @@ export function MobileUsageBar({ showClaudeUsage, showCodexUsage }: MobileUsageB label="Weekly" percentage={claudeUsage.weeklyPercentage} isStale={isClaudeStale} + pacePercentage={getExpectedWeeklyPacePercentage(claudeUsage.weeklyResetTime)} /> ) : ( diff --git a/apps/ui/src/store/utils/usage-utils.ts b/apps/ui/src/store/utils/usage-utils.ts index 7b82fb12..1c363f21 100644 --- a/apps/ui/src/store/utils/usage-utils.ts +++ b/apps/ui/src/store/utils/usage-utils.ts @@ -1,5 +1,64 @@ import type { ClaudeUsage } from '../types/usage-types'; +/** + * Calculate the expected weekly usage percentage based on how far through the week we are. + * Claude's weekly usage resets every Thursday. Given the reset time (when the NEXT reset occurs), + * we can determine how much of the week has elapsed and therefore what percentage of the budget + * should have been used if usage were evenly distributed. + * + * @param weeklyResetTime - ISO date string for when the weekly usage next resets + * @returns The expected usage percentage (0-100), or null if the reset time is invalid + */ +export function getExpectedWeeklyPacePercentage( + weeklyResetTime: string | undefined +): number | null { + if (!weeklyResetTime) return null; + + try { + const resetDate = new Date(weeklyResetTime); + if (isNaN(resetDate.getTime())) return null; + + const now = new Date(); + const WEEK_MS = 7 * 24 * 60 * 60 * 1000; + + // The week started 7 days before the reset + const weekStartDate = new Date(resetDate.getTime() - WEEK_MS); + + // How far through the week are we? + const elapsed = now.getTime() - weekStartDate.getTime(); + const fractionElapsed = elapsed / WEEK_MS; + + // Clamp to 0-1 range + const clamped = Math.max(0, Math.min(1, fractionElapsed)); + + return clamped * 100; + } catch { + return null; + } +} + +/** + * Get a human-readable label for the pace status (ahead or behind expected usage). + * + * @param actualPercentage - The actual usage percentage (0-100) + * @param expectedPercentage - The expected usage percentage (0-100) + * @returns A string like "5% ahead of pace" or "10% behind pace", or null + */ +export function getPaceStatusLabel( + actualPercentage: number, + expectedPercentage: number | null +): string | null { + if (expectedPercentage === null) return null; + + const diff = Math.round(actualPercentage - expectedPercentage); + + if (diff === 0) return 'On pace'; + // Using more than expected = behind pace (bad) + if (diff > 0) return `${Math.abs(diff)}% behind pace`; + // Using less than expected = ahead of pace (good) + return `${Math.abs(diff)}% ahead of pace`; +} + /** * Check if Claude usage is at its limit (any of: session >= 100%, weekly >= 100%, OR cost >= limit) * Returns true if any limit is reached, meaning auto mode should pause feature pickup. From 4a8c6b0eba0a3c32961f8a9edf2555842b6b1517 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 20:47:38 -0800 Subject: [PATCH 84/89] Update feature-state-manager.test.ts Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../tests/unit/services/feature-state-manager.test.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/server/tests/unit/services/feature-state-manager.test.ts b/apps/server/tests/unit/services/feature-state-manager.test.ts index bff51d78..65998ce1 100644 --- a/apps/server/tests/unit/services/feature-state-manager.test.ts +++ b/apps/server/tests/unit/services/feature-state-manager.test.ts @@ -178,14 +178,14 @@ describe('FeatureStateManager', () => { await manager.updateFeatureStatus('/project', 'feature-123', 'waiting_approval'); const savedFeature = (atomicWriteJson as Mock).mock.calls[0][1] as Feature; - // All tasks should be completed + // Only in_progress tasks should be completed expect(savedFeature.planSpec?.tasks?.[0].status).toBe('completed'); expect(savedFeature.planSpec?.tasks?.[1].status).toBe('completed'); - expect(savedFeature.planSpec?.tasks?.[2].status).toBe('completed'); + expect(savedFeature.planSpec?.tasks?.[2].status).toBe('pending'); // currentTaskId should be cleared expect(savedFeature.planSpec?.currentTaskId).toBeUndefined(); - // tasksCompleted should equal total tasks - expect(savedFeature.planSpec?.tasksCompleted).toBe(3); + // tasksCompleted should be 2, not 3 + expect(savedFeature.planSpec?.tasksCompleted).toBe(2); }); it('should finalize tasks when moving to verified status', async () => { From 30fce3f7469bbd9ab04d2ae85be2dabb769df23e Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 21:24:24 -0800 Subject: [PATCH 85/89] test: Update task finalization behavior to keep pending tasks in review states --- .../services/feature-state-manager.test.ts | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/apps/server/tests/unit/services/feature-state-manager.test.ts b/apps/server/tests/unit/services/feature-state-manager.test.ts index 65998ce1..6abd4764 100644 --- a/apps/server/tests/unit/services/feature-state-manager.test.ts +++ b/apps/server/tests/unit/services/feature-state-manager.test.ts @@ -151,7 +151,7 @@ describe('FeatureStateManager', () => { expect(savedFeature.justFinishedAt).toBeUndefined(); }); - it('should finalize in_progress and pending tasks when moving to waiting_approval', async () => { + it('should finalize in_progress tasks but keep pending tasks when moving to waiting_approval', async () => { const featureWithTasks: Feature = { ...mockFeature, status: 'in_progress', @@ -178,13 +178,15 @@ describe('FeatureStateManager', () => { await manager.updateFeatureStatus('/project', 'feature-123', 'waiting_approval'); const savedFeature = (atomicWriteJson as Mock).mock.calls[0][1] as Feature; - // Only in_progress tasks should be completed + // Already completed tasks stay completed expect(savedFeature.planSpec?.tasks?.[0].status).toBe('completed'); + // in_progress tasks should be finalized to completed expect(savedFeature.planSpec?.tasks?.[1].status).toBe('completed'); + // pending tasks should remain pending (never started) expect(savedFeature.planSpec?.tasks?.[2].status).toBe('pending'); // currentTaskId should be cleared expect(savedFeature.planSpec?.currentTaskId).toBeUndefined(); - // tasksCompleted should be 2, not 3 + // tasksCompleted should equal actual completed tasks count expect(savedFeature.planSpec?.tasksCompleted).toBe(2); }); @@ -215,14 +217,16 @@ describe('FeatureStateManager', () => { await manager.updateFeatureStatus('/project', 'feature-123', 'verified'); const savedFeature = (atomicWriteJson as Mock).mock.calls[0][1] as Feature; - // All tasks should be completed + // Already completed tasks stay completed expect(savedFeature.planSpec?.tasks?.[0].status).toBe('completed'); + // in_progress tasks should be finalized to completed expect(savedFeature.planSpec?.tasks?.[1].status).toBe('completed'); - expect(savedFeature.planSpec?.tasks?.[2].status).toBe('completed'); + // pending tasks should remain pending (never started) + expect(savedFeature.planSpec?.tasks?.[2].status).toBe('pending'); // currentTaskId should be cleared expect(savedFeature.planSpec?.currentTaskId).toBeUndefined(); - // tasksCompleted should equal total tasks - expect(savedFeature.planSpec?.tasksCompleted).toBe(3); + // tasksCompleted should equal actual completed tasks count + expect(savedFeature.planSpec?.tasksCompleted).toBe(2); // justFinishedAt should be cleared for verified expect(savedFeature.justFinishedAt).toBeUndefined(); }); From aa940d44ff0692357633f5d1f6bcb2cf5071520f Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 22:10:50 -0800 Subject: [PATCH 86/89] feat: Add task retry logic and improve max turns limit --- apps/server/src/services/agent-executor.ts | 2 +- apps/server/src/services/execution-service.ts | 93 +++++- .../src/services/feature-state-manager.ts | 23 ++ .../unit/services/execution-service.test.ts | 296 ++++++++++++++++++ .../kanban-card/agent-info-panel.tsx | 11 +- 5 files changed, 419 insertions(+), 6 deletions(-) diff --git a/apps/server/src/services/agent-executor.ts b/apps/server/src/services/agent-executor.ts index 0d9c2399..cbf9e401 100644 --- a/apps/server/src/services/agent-executor.ts +++ b/apps/server/src/services/agent-executor.ts @@ -331,7 +331,7 @@ export class AgentExecutor { userFeedback ); const taskStream = provider.executeQuery( - this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 50)) + this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 150)) ); let taskOutput = '', taskStartDetected = false, diff --git a/apps/server/src/services/execution-service.ts b/apps/server/src/services/execution-service.ts index 75bb10bd..f7a51ace 100644 --- a/apps/server/src/services/execution-service.ts +++ b/apps/server/src/services/execution-service.ts @@ -270,6 +270,84 @@ ${feature.spec} } ); + // Check for incomplete tasks after agent execution. + // The agent may have finished early (hit max turns, decided it was done, etc.) + // while tasks are still pending. If so, re-run the agent to complete remaining tasks. + const MAX_TASK_RETRY_ATTEMPTS = 3; + let taskRetryAttempts = 0; + while (!abortController.signal.aborted && taskRetryAttempts < MAX_TASK_RETRY_ATTEMPTS) { + const currentFeature = await this.loadFeatureFn(projectPath, featureId); + if (!currentFeature?.planSpec?.tasks) break; + + const pendingTasks = currentFeature.planSpec.tasks.filter( + (t) => t.status === 'pending' || t.status === 'in_progress' + ); + if (pendingTasks.length === 0) break; + + taskRetryAttempts++; + const totalTasks = currentFeature.planSpec.tasks.length; + const completedTasks = currentFeature.planSpec.tasks.filter( + (t) => t.status === 'completed' + ).length; + logger.info( + `[executeFeature] Feature ${featureId} has ${pendingTasks.length} incomplete tasks (${completedTasks}/${totalTasks} completed). Re-running agent (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})` + ); + + this.eventBus.emitAutoModeEvent('auto_mode_progress', { + featureId, + branchName: feature.branchName ?? null, + content: `Agent finished with ${pendingTasks.length} tasks remaining. Re-running to complete tasks (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})...`, + projectPath, + }); + + // Build a continuation prompt that tells the agent to finish remaining tasks + const remainingTasksList = pendingTasks + .map((t) => `- ${t.id}: ${t.description} (${t.status})`) + .join('\n'); + + const continuationPrompt = `## Continue Implementation - Incomplete Tasks + +The previous agent session ended before all tasks were completed. Please continue implementing the remaining tasks. + +**Completed:** ${completedTasks}/${totalTasks} tasks +**Remaining tasks:** +${remainingTasksList} + +Please continue from where you left off and complete all remaining tasks. Use the same [TASK_START:ID] and [TASK_COMPLETE:ID] markers for each task.`; + + await this.runAgentFn( + workDir, + featureId, + continuationPrompt, + abortController, + projectPath, + undefined, + model, + { + projectPath, + planningMode: 'skip', + requirePlanApproval: false, + systemPrompt: combinedSystemPrompt || undefined, + autoLoadClaudeMd, + thinkingLevel: feature.thinkingLevel, + branchName: feature.branchName ?? null, + } + ); + } + + // Log if tasks are still incomplete after retry attempts + if (taskRetryAttempts >= MAX_TASK_RETRY_ATTEMPTS) { + const finalFeature = await this.loadFeatureFn(projectPath, featureId); + const stillPending = finalFeature?.planSpec?.tasks?.filter( + (t) => t.status === 'pending' || t.status === 'in_progress' + ); + if (stillPending && stillPending.length > 0) { + logger.warn( + `[executeFeature] Feature ${featureId} still has ${stillPending.length} incomplete tasks after ${MAX_TASK_RETRY_ATTEMPTS} retry attempts. Moving to final status.` + ); + } + } + const pipelineConfig = await pipelineService.getPipelineConfig(projectPath); const excludedStepIds = new Set(feature.excludedPipelineSteps || []); const sortedSteps = [...(pipelineConfig?.steps || [])] @@ -300,6 +378,13 @@ ${feature.spec} await this.updateFeatureStatusFn(projectPath, featureId, finalStatus); this.recordSuccessFn(); + // Check final task completion state for accurate reporting + const completedFeature = await this.loadFeatureFn(projectPath, featureId); + const totalTasks = completedFeature?.planSpec?.tasks?.length ?? 0; + const completedTasks = + completedFeature?.planSpec?.tasks?.filter((t) => t.status === 'completed').length ?? 0; + const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks; + try { const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md'); let agentOutput = ''; @@ -326,12 +411,18 @@ ${feature.spec} /* learnings recording failed */ } + const elapsedSeconds = Math.round((Date.now() - tempRunningFeature.startTime) / 1000); + let completionMessage = `Feature completed in ${elapsedSeconds}s`; + if (finalStatus === 'verified') completionMessage += ' - auto-verified'; + if (hasIncompleteTasks) + completionMessage += ` (${completedTasks}/${totalTasks} tasks completed)`; + this.eventBus.emitAutoModeEvent('auto_mode_feature_complete', { featureId, featureName: feature.title, branchName: feature.branchName ?? null, passes: true, - message: `Feature completed in ${Math.round((Date.now() - tempRunningFeature.startTime) / 1000)}s${finalStatus === 'verified' ? ' - auto-verified' : ''}`, + message: completionMessage, projectPath, model: tempRunningFeature.model, provider: tempRunningFeature.provider, diff --git a/apps/server/src/services/feature-state-manager.ts b/apps/server/src/services/feature-state-manager.ts index e8afe0b9..b33f6df6 100644 --- a/apps/server/src/services/feature-state-manager.ts +++ b/apps/server/src/services/feature-state-manager.ts @@ -115,10 +115,13 @@ export class FeatureStateManager { // This prevents cards in "waiting for review" from appearing to still have running tasks if (feature.planSpec?.tasks) { let tasksFinalized = 0; + let tasksPending = 0; for (const task of feature.planSpec.tasks) { if (task.status === 'in_progress') { task.status = 'completed'; tasksFinalized++; + } else if (task.status === 'pending') { + tasksPending++; } } if (tasksFinalized > 0) { @@ -126,6 +129,11 @@ export class FeatureStateManager { `[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to waiting_approval` ); } + if (tasksPending > 0) { + logger.warn( + `[updateFeatureStatus] Feature ${featureId} moving to waiting_approval with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total` + ); + } // Update tasksCompleted count to reflect actual completed tasks feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter( (t) => t.status === 'completed' @@ -136,11 +144,26 @@ export class FeatureStateManager { // Also finalize in_progress tasks when moving directly to verified (skipTests=false) // Do NOT mark pending tasks as completed - they were never started if (feature.planSpec?.tasks) { + let tasksFinalized = 0; + let tasksPending = 0; for (const task of feature.planSpec.tasks) { if (task.status === 'in_progress') { task.status = 'completed'; + tasksFinalized++; + } else if (task.status === 'pending') { + tasksPending++; } } + if (tasksFinalized > 0) { + logger.info( + `[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to verified` + ); + } + if (tasksPending > 0) { + logger.warn( + `[updateFeatureStatus] Feature ${featureId} moving to verified with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total` + ); + } feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter( (t) => t.status === 'completed' ).length; diff --git a/apps/server/tests/unit/services/execution-service.test.ts b/apps/server/tests/unit/services/execution-service.test.ts index 0a0ca57d..497c6abd 100644 --- a/apps/server/tests/unit/services/execution-service.test.ts +++ b/apps/server/tests/unit/services/execution-service.test.ts @@ -677,6 +677,302 @@ describe('execution-service.ts', () => { }); }); + describe('executeFeature - incomplete task retry', () => { + const createServiceWithMocks = () => { + return new ExecutionService( + mockEventBus, + mockConcurrencyManager, + mockWorktreeResolver, + mockSettingsService, + mockRunAgentFn, + mockExecutePipelineFn, + mockUpdateFeatureStatusFn, + mockLoadFeatureFn, + mockGetPlanningPromptPrefixFn, + mockSaveFeatureSummaryFn, + mockRecordLearningsFn, + mockContextExistsFn, + mockResumeFeatureFn, + mockTrackFailureFn, + mockSignalPauseFn, + mockRecordSuccessFn, + mockSaveExecutionStateFn, + mockLoadContextFilesFn + ); + }; + + it('does not re-run agent when feature has no tasks', async () => { + // Feature with no planSpec/tasks - should complete normally with 1 agent call + mockLoadFeatureFn = vi.fn().mockResolvedValue(testFeature); + const svc = createServiceWithMocks(); + + await svc.executeFeature('/test/project', 'feature-1'); + + expect(mockRunAgentFn).toHaveBeenCalledTimes(1); + }); + + it('does not re-run agent when all tasks are completed', async () => { + const featureWithCompletedTasks: Feature = { + ...testFeature, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' }, + ], + tasksCompleted: 2, + }, + }; + mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithCompletedTasks); + const svc = createServiceWithMocks(); + + await svc.executeFeature('/test/project', 'feature-1'); + + // Only the initial agent call + the approved-plan recursive call + // The approved plan triggers recursive executeFeature, so runAgentFn is called once in the inner call + expect(mockRunAgentFn).toHaveBeenCalledTimes(1); + }); + + it('re-runs agent when there are pending tasks after initial execution', async () => { + const featureWithPendingTasks: Feature = { + ...testFeature, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' }, + { id: 'T003', title: 'Task 3', status: 'pending', description: 'Third task' }, + ], + tasksCompleted: 1, + }, + }; + + // After first agent run, loadFeature returns feature with pending tasks + // After second agent run, loadFeature returns feature with all tasks completed + const featureAllDone: Feature = { + ...testFeature, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' }, + { id: 'T003', title: 'Task 3', status: 'completed', description: 'Third task' }, + ], + tasksCompleted: 3, + }, + }; + + let loadCallCount = 0; + mockLoadFeatureFn = vi.fn().mockImplementation(() => { + loadCallCount++; + // First call: initial feature load at the top of executeFeature + // Second call: after first agent run (check for incomplete tasks) - has pending tasks + // Third call: after second agent run (check for incomplete tasks) - all done + if (loadCallCount <= 2) return featureWithPendingTasks; + return featureAllDone; + }); + + const svc = createServiceWithMocks(); + await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, { + continuationPrompt: 'Continue', + _calledInternally: true, + }); + + // Should have called runAgentFn twice: initial + one retry + expect(mockRunAgentFn).toHaveBeenCalledTimes(2); + + // The retry call should contain continuation prompt about incomplete tasks + const retryCallArgs = mockRunAgentFn.mock.calls[1]; + expect(retryCallArgs[2]).toContain('Continue Implementation - Incomplete Tasks'); + expect(retryCallArgs[2]).toContain('T002'); + expect(retryCallArgs[2]).toContain('T003'); + + // Should have emitted a progress event about retrying + expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith( + 'auto_mode_progress', + expect.objectContaining({ + featureId: 'feature-1', + content: expect.stringContaining('Re-running to complete tasks'), + }) + ); + }); + + it('respects maximum retry attempts', async () => { + const featureAlwaysPending: Feature = { + ...testFeature, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' }, + ], + tasksCompleted: 1, + }, + }; + + // Always return feature with pending tasks (agent never completes T002) + mockLoadFeatureFn = vi.fn().mockResolvedValue(featureAlwaysPending); + + const svc = createServiceWithMocks(); + await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, { + continuationPrompt: 'Continue', + _calledInternally: true, + }); + + // Initial run + 3 retry attempts = 4 total + expect(mockRunAgentFn).toHaveBeenCalledTimes(4); + + // Should still set final status even with incomplete tasks + expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith( + '/test/project', + 'feature-1', + 'verified' + ); + }); + + it('stops retrying when abort signal is triggered', async () => { + const featureWithPendingTasks: Feature = { + ...testFeature, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' }, + ], + tasksCompleted: 1, + }, + }; + + mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithPendingTasks); + + // Simulate abort after first agent run + let runCount = 0; + const capturedAbortController = { current: null as AbortController | null }; + mockRunAgentFn = vi.fn().mockImplementation((_wd, _fid, _prompt, abortCtrl) => { + capturedAbortController.current = abortCtrl; + runCount++; + if (runCount >= 1) { + // Abort after first run + abortCtrl.abort(); + } + return Promise.resolve(); + }); + + const svc = createServiceWithMocks(); + await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, { + continuationPrompt: 'Continue', + _calledInternally: true, + }); + + // Should only have the initial run, then abort prevents retries + expect(mockRunAgentFn).toHaveBeenCalledTimes(1); + }); + + it('re-runs agent for in_progress tasks (not just pending)', async () => { + const featureWithInProgressTask: Feature = { + ...testFeature, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'in_progress', description: 'Second task' }, + ], + tasksCompleted: 1, + currentTaskId: 'T002', + }, + }; + + const featureAllDone: Feature = { + ...testFeature, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' }, + ], + tasksCompleted: 2, + }, + }; + + let loadCallCount = 0; + mockLoadFeatureFn = vi.fn().mockImplementation(() => { + loadCallCount++; + if (loadCallCount <= 2) return featureWithInProgressTask; + return featureAllDone; + }); + + const svc = createServiceWithMocks(); + await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, { + continuationPrompt: 'Continue', + _calledInternally: true, + }); + + // Should have retried for the in_progress task + expect(mockRunAgentFn).toHaveBeenCalledTimes(2); + + // The retry prompt should mention the in_progress task + const retryCallArgs = mockRunAgentFn.mock.calls[1]; + expect(retryCallArgs[2]).toContain('T002'); + expect(retryCallArgs[2]).toContain('in_progress'); + }); + + it('uses planningMode skip and no plan approval for retry runs', async () => { + const featureWithPendingTasks: Feature = { + ...testFeature, + planningMode: 'full', + requirePlanApproval: true, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' }, + ], + tasksCompleted: 1, + }, + }; + + const featureAllDone: Feature = { + ...testFeature, + planSpec: { + status: 'approved', + content: 'Plan', + tasks: [ + { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' }, + { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' }, + ], + tasksCompleted: 2, + }, + }; + + let loadCallCount = 0; + mockLoadFeatureFn = vi.fn().mockImplementation(() => { + loadCallCount++; + if (loadCallCount <= 2) return featureWithPendingTasks; + return featureAllDone; + }); + + const svc = createServiceWithMocks(); + await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, { + continuationPrompt: 'Continue', + _calledInternally: true, + }); + + // The retry agent call should use planningMode: 'skip' and requirePlanApproval: false + const retryCallArgs = mockRunAgentFn.mock.calls[1]; + const retryOptions = retryCallArgs[7]; // options object + expect(retryOptions.planningMode).toBe('skip'); + expect(retryOptions.requirePlanApproval).toBe(false); + }); + }); + describe('executeFeature - error handling', () => { it('classifies and emits error event', async () => { const testError = new Error('Test error'); diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx index 80e42371..0e59e3bd 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx @@ -164,13 +164,16 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({ const currentTaskId = planSpec.currentTaskId; return planSpec.tasks.map((task: ParsedTask, index: number) => { - // If the feature is done (waiting_approval/verified), all tasks are completed - // This is a defensive UI-side check: the server should have already finalized - // task statuses, but stale data from before the fix could still show spinners + // When feature is finished (waiting_approval/verified), finalize task display: + // - in_progress tasks → completed (agent was working on them when it finished) + // - pending tasks stay pending (they were never started) + // - completed tasks stay completed + // This matches server-side behavior in feature-state-manager.ts if (isFeatureFinished) { + const finalStatus = task.status === 'in_progress' ? 'completed' : task.status; return { content: task.description, - status: 'completed' as const, + status: (finalStatus || 'completed') as 'pending' | 'in_progress' | 'completed', }; } From d5340fd1a40426924d93832ccffb89aacae317e7 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 22:19:26 -0800 Subject: [PATCH 87/89] Update apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../board-view/components/kanban-card/agent-info-panel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx index 0e59e3bd..1fae2128 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx @@ -170,7 +170,7 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({ // - completed tasks stay completed // This matches server-side behavior in feature-state-manager.ts if (isFeatureFinished) { - const finalStatus = task.status === 'in_progress' ? 'completed' : task.status; + const finalStatus = task.status === 'in_progress' || task.status === 'failed' ? 'completed' : task.status; return { content: task.description, status: (finalStatus || 'completed') as 'pending' | 'in_progress' | 'completed', From 8af1b8bd0807ab5f11394cf7d9851d8af4150005 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 22:38:59 -0800 Subject: [PATCH 88/89] chore: Increase default max turns for agent execution from 20/50 to 100 --- apps/server/src/providers/claude-provider.ts | 2 +- apps/server/src/services/agent-executor.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/server/src/providers/claude-provider.ts b/apps/server/src/providers/claude-provider.ts index 78a0a0c7..3d4f88cd 100644 --- a/apps/server/src/providers/claude-provider.ts +++ b/apps/server/src/providers/claude-provider.ts @@ -204,7 +204,7 @@ export class ClaudeProvider extends BaseProvider { model, cwd, systemPrompt, - maxTurns = 20, + maxTurns = 100, allowedTools, abortController, conversationHistory, diff --git a/apps/server/src/services/agent-executor.ts b/apps/server/src/services/agent-executor.ts index cbf9e401..5d049804 100644 --- a/apps/server/src/services/agent-executor.ts +++ b/apps/server/src/services/agent-executor.ts @@ -331,7 +331,7 @@ export class AgentExecutor { userFeedback ); const taskStream = provider.executeQuery( - this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 150)) + this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 100, 100)) ); let taskOutput = '', taskStartDetected = false, From f06088a062122869ae1750d77b31d8b92e88af47 Mon Sep 17 00:00:00 2001 From: gsxdsm Date: Mon, 16 Feb 2026 22:47:30 -0800 Subject: [PATCH 89/89] feat: Update maxTurns default from 20 to 100 and format code --- apps/server/tests/unit/providers/claude-provider.test.ts | 4 ++-- .../board-view/components/kanban-card/agent-info-panel.tsx | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/apps/server/tests/unit/providers/claude-provider.test.ts b/apps/server/tests/unit/providers/claude-provider.test.ts index 7df211ef..a2ebd72a 100644 --- a/apps/server/tests/unit/providers/claude-provider.test.ts +++ b/apps/server/tests/unit/providers/claude-provider.test.ts @@ -187,7 +187,7 @@ describe('claude-provider.ts', () => { expect(typeof callArgs.prompt).not.toBe('string'); }); - it('should use maxTurns default of 20', async () => { + it('should use maxTurns default of 100', async () => { vi.mocked(sdk.query).mockReturnValue( (async function* () { yield { type: 'text', text: 'test' }; @@ -205,7 +205,7 @@ describe('claude-provider.ts', () => { expect(sdk.query).toHaveBeenCalledWith({ prompt: 'Test', options: expect.objectContaining({ - maxTurns: 20, + maxTurns: 100, }), }); }); diff --git a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx index 1fae2128..2d215252 100644 --- a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx +++ b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx @@ -170,7 +170,8 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({ // - completed tasks stay completed // This matches server-side behavior in feature-state-manager.ts if (isFeatureFinished) { - const finalStatus = task.status === 'in_progress' || task.status === 'failed' ? 'completed' : task.status; + const finalStatus = + task.status === 'in_progress' || task.status === 'failed' ? 'completed' : task.status; return { content: task.description, status: (finalStatus || 'completed') as 'pending' | 'in_progress' | 'completed',