diff --git a/apps/server/src/providers/cli-provider.ts b/apps/server/src/providers/cli-provider.ts index 8683f841..ea636cb8 100644 --- a/apps/server/src/providers/cli-provider.ts +++ b/apps/server/src/providers/cli-provider.ts @@ -35,6 +35,7 @@ import { type SubprocessOptions, type WslCliResult, } from '@automaker/platform'; +import { calculateReasoningTimeout } from '@automaker/types'; import { createLogger, isAbortError } from '@automaker/utils'; import { execSync } from 'child_process'; import * as fs from 'fs'; @@ -107,6 +108,15 @@ export interface CliDetectionResult { // Create logger for CLI operations const cliLogger = createLogger('CliProvider'); +/** + * Base timeout for CLI operations in milliseconds. + * CLI tools have longer startup and processing times compared to direct API calls, + * so we use a higher base timeout (120s) than the default provider timeout (30s). + * This is multiplied by reasoning effort multipliers when applicable. + * @see calculateReasoningTimeout from @automaker/types + */ +const CLI_BASE_TIMEOUT_MS = 120000; + /** * Abstract base class for CLI-based providers * @@ -450,6 +460,10 @@ export abstract class CliProvider extends BaseProvider { } } + // Calculate dynamic timeout based on reasoning effort. + // This addresses GitHub issue #530 where reasoning models with 'xhigh' effort would timeout. + const timeout = calculateReasoningTimeout(options.reasoningEffort, CLI_BASE_TIMEOUT_MS); + // WSL strategy if (this.useWsl && this.wslCliPath) { const wslCwd = windowsToWslPath(cwd); @@ -473,7 +487,7 @@ export abstract class CliProvider extends BaseProvider { cwd, // Windows cwd for spawn env: filteredEnv, abortController: options.abortController, - timeout: 120000, // CLI operations may take longer + timeout, }; } @@ -488,7 +502,7 @@ export abstract class CliProvider extends BaseProvider { cwd, env: filteredEnv, abortController: options.abortController, - timeout: 120000, + timeout, }; } @@ -501,7 +515,7 @@ export abstract class CliProvider extends BaseProvider { cwd, env: filteredEnv, abortController: options.abortController, - timeout: 120000, + timeout, }; } diff --git a/apps/server/src/providers/codex-provider.ts b/apps/server/src/providers/codex-provider.ts index e0f38ee9..18838cb8 100644 --- a/apps/server/src/providers/codex-provider.ts +++ b/apps/server/src/providers/codex-provider.ts @@ -33,6 +33,8 @@ import { CODEX_MODEL_MAP, supportsReasoningEffort, validateBareModelId, + calculateReasoningTimeout, + DEFAULT_TIMEOUT_MS, type CodexApprovalPolicy, type CodexSandboxMode, type CodexAuthStatus, @@ -91,7 +93,14 @@ const CODEX_ITEM_TYPES = { const SYSTEM_PROMPT_LABEL = 'System instructions'; const HISTORY_HEADER = 'Current request:\n'; const TEXT_ENCODING = 'utf-8'; -const DEFAULT_TIMEOUT_MS = 30000; +/** + * Default timeout for Codex CLI operations in milliseconds. + * This is the "no output" timeout - if the CLI doesn't produce any JSONL output + * for this duration, the process is killed. For reasoning models with high + * reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout(). + * @see calculateReasoningTimeout from @automaker/types + */ +const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS; const CONTEXT_WINDOW_256K = 256000; const MAX_OUTPUT_32K = 32000; const MAX_OUTPUT_16K = 16000; @@ -814,13 +823,19 @@ export class CodexProvider extends BaseProvider { envOverrides[OPENAI_API_KEY_ENV] = executionPlan.openAiApiKey; } + // Calculate dynamic timeout based on reasoning effort. + // Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time + // for the model to generate reasoning tokens before producing output. + // This fixes GitHub issue #530 where features would get stuck with reasoning models. + const timeout = calculateReasoningTimeout(options.reasoningEffort, CODEX_CLI_TIMEOUT_MS); + const stream = spawnJSONLProcess({ command: commandPath, args, cwd: options.cwd, env: envOverrides, abortController: options.abortController, - timeout: DEFAULT_TIMEOUT_MS, + timeout, stdinData: promptText, // Pass prompt via stdin }); diff --git a/apps/server/tests/unit/providers/codex-provider.test.ts b/apps/server/tests/unit/providers/codex-provider.test.ts index 6ca69d86..ee9c7bad 100644 --- a/apps/server/tests/unit/providers/codex-provider.test.ts +++ b/apps/server/tests/unit/providers/codex-provider.test.ts @@ -11,6 +11,11 @@ import { getCodexConfigDir, getCodexAuthIndicators, } from '@automaker/platform'; +import { + calculateReasoningTimeout, + REASONING_TIMEOUT_MULTIPLIERS, + DEFAULT_TIMEOUT_MS, +} from '@automaker/types'; const OPENAI_API_KEY_ENV = 'OPENAI_API_KEY'; const originalOpenAIKey = process.env[OPENAI_API_KEY_ENV]; @@ -289,5 +294,121 @@ describe('codex-provider.ts', () => { expect(codexRunMock).not.toHaveBeenCalled(); expect(spawnJSONLProcess).toHaveBeenCalled(); }); + + it('passes extended timeout for high reasoning effort', async () => { + vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})()); + + await collectAsyncGenerator( + provider.executeQuery({ + prompt: 'Complex reasoning task', + model: 'gpt-5.1-codex-max', + cwd: '/tmp', + reasoningEffort: 'high', + }) + ); + + const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0]; + // High reasoning effort should have 3x the default timeout (90000ms) + expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.high); + }); + + it('passes extended timeout for xhigh reasoning effort', async () => { + vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})()); + + await collectAsyncGenerator( + provider.executeQuery({ + prompt: 'Very complex reasoning task', + model: 'gpt-5.1-codex-max', + cwd: '/tmp', + reasoningEffort: 'xhigh', + }) + ); + + const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0]; + // xhigh reasoning effort should have 4x the default timeout (120000ms) + expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh); + }); + + it('uses default timeout when no reasoning effort is specified', async () => { + vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})()); + + await collectAsyncGenerator( + provider.executeQuery({ + prompt: 'Simple task', + model: 'gpt-5.2', + cwd: '/tmp', + }) + ); + + const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0]; + // No reasoning effort should use the default timeout + expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS); + }); + }); + + describe('calculateReasoningTimeout', () => { + it('returns default timeout when no reasoning effort is specified', () => { + expect(calculateReasoningTimeout()).toBe(DEFAULT_TIMEOUT_MS); + expect(calculateReasoningTimeout(undefined)).toBe(DEFAULT_TIMEOUT_MS); + }); + + it('returns default timeout for none reasoning effort', () => { + expect(calculateReasoningTimeout('none')).toBe(DEFAULT_TIMEOUT_MS); + }); + + it('applies correct multiplier for minimal reasoning effort', () => { + const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.minimal); + expect(calculateReasoningTimeout('minimal')).toBe(expected); + }); + + it('applies correct multiplier for low reasoning effort', () => { + const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.low); + expect(calculateReasoningTimeout('low')).toBe(expected); + }); + + it('applies correct multiplier for medium reasoning effort', () => { + const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.medium); + expect(calculateReasoningTimeout('medium')).toBe(expected); + }); + + it('applies correct multiplier for high reasoning effort', () => { + const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.high); + expect(calculateReasoningTimeout('high')).toBe(expected); + }); + + it('applies correct multiplier for xhigh reasoning effort', () => { + const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh); + expect(calculateReasoningTimeout('xhigh')).toBe(expected); + }); + + it('uses custom base timeout when provided', () => { + const customBase = 60000; + expect(calculateReasoningTimeout('high', customBase)).toBe( + Math.round(customBase * REASONING_TIMEOUT_MULTIPLIERS.high) + ); + }); + + it('falls back to 1.0 multiplier for invalid reasoning effort', () => { + // Test that invalid values fallback gracefully to default multiplier + // This tests the defensive ?? 1.0 in calculateReasoningTimeout + const invalidEffort = 'invalid_effort' as never; + expect(calculateReasoningTimeout(invalidEffort)).toBe(DEFAULT_TIMEOUT_MS); + }); + + it('produces expected absolute timeout values', () => { + // Verify the actual timeout values that will be used: + // none: 30000ms (30s) + // minimal: 36000ms (36s) + // low: 45000ms (45s) + // medium: 60000ms (1m) + // high: 90000ms (1m 30s) + // xhigh: 120000ms (2m) + expect(calculateReasoningTimeout('none')).toBe(30000); + expect(calculateReasoningTimeout('minimal')).toBe(36000); + expect(calculateReasoningTimeout('low')).toBe(45000); + expect(calculateReasoningTimeout('medium')).toBe(60000); + expect(calculateReasoningTimeout('high')).toBe(90000); + expect(calculateReasoningTimeout('xhigh')).toBe(120000); + }); }); }); diff --git a/libs/types/src/index.ts b/libs/types/src/index.ts index 7f06a33b..a0145782 100644 --- a/libs/types/src/index.ts +++ b/libs/types/src/index.ts @@ -21,6 +21,13 @@ export type { ReasoningEffort, } from './provider.js'; +// Provider constants and utilities +export { + DEFAULT_TIMEOUT_MS, + REASONING_TIMEOUT_MULTIPLIERS, + calculateReasoningTimeout, +} from './provider.js'; + // Codex CLI types export type { CodexSandboxMode, diff --git a/libs/types/src/provider.ts b/libs/types/src/provider.ts index aa62b561..e934e999 100644 --- a/libs/types/src/provider.ts +++ b/libs/types/src/provider.ts @@ -18,6 +18,61 @@ import type { CodexSandboxMode, CodexApprovalPolicy } from './codex.js'; */ export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'; +/** + * Default timeout in milliseconds for provider operations. + * Used as the baseline timeout for API calls and CLI operations. + */ +export const DEFAULT_TIMEOUT_MS = 30000; + +/** + * Timeout multipliers for reasoning effort levels. + * Higher reasoning effort requires more time for the model to generate reasoning tokens. + * These multipliers are applied to DEFAULT_TIMEOUT_MS. + */ +export const REASONING_TIMEOUT_MULTIPLIERS: Record = { + none: 1.0, // No reasoning, baseline timeout + minimal: 1.2, // Very quick reasoning, slight increase + low: 1.5, // Quick reasoning, moderate increase + medium: 2.0, // Balanced reasoning, double baseline + high: 3.0, // Extended reasoning, triple baseline + xhigh: 4.0, // Maximum reasoning, quadruple baseline +}; + +/** + * Calculate timeout for provider operations based on reasoning effort. + * Higher reasoning effort requires more time for the model to generate reasoning tokens. + * + * This function addresses GitHub issue #530 where Codex CLI with GPT-5.2 "xtra thinking" + * (xhigh reasoning effort) mode would get stuck because the 30-second "no output" timeout + * would trigger during extended reasoning phases. + * + * @param reasoningEffort - The reasoning effort level, defaults to 'none' if undefined. + * If an invalid value is provided, falls back to multiplier 1.0. + * @param baseTimeoutMs - Optional custom base timeout, defaults to DEFAULT_TIMEOUT_MS (30000ms) + * @returns The calculated timeout in milliseconds, rounded to the nearest integer + * + * @example + * // Using default base timeout (30000ms) + * calculateReasoningTimeout('high') // Returns 90000 (30000 * 3.0) + * + * @example + * // Using custom base timeout + * calculateReasoningTimeout('medium', 60000) // Returns 120000 (60000 * 2.0) + * + * @example + * // No reasoning effort (default) + * calculateReasoningTimeout() // Returns 30000 (default timeout) + * calculateReasoningTimeout(undefined) // Returns 30000 + */ +export function calculateReasoningTimeout( + reasoningEffort?: ReasoningEffort, + baseTimeoutMs: number = DEFAULT_TIMEOUT_MS +): number { + const effort = reasoningEffort ?? 'none'; + const multiplier = REASONING_TIMEOUT_MULTIPLIERS[effort] ?? 1.0; + return Math.round(baseTimeoutMs * multiplier); +} + /** * Configuration for a provider instance */