Merge pull request #533 from AutoMaker-Org/feature/v0.12.0rc-1768605477061-fhv5

fix: Codex freezes
This commit is contained in:
Shirone
2026-01-16 23:59:16 +00:00
committed by GitHub
5 changed files with 217 additions and 5 deletions

View File

@@ -35,6 +35,7 @@ import {
type SubprocessOptions,
type WslCliResult,
} from '@automaker/platform';
import { calculateReasoningTimeout } from '@automaker/types';
import { createLogger, isAbortError } from '@automaker/utils';
import { execSync } from 'child_process';
import * as fs from 'fs';
@@ -107,6 +108,15 @@ export interface CliDetectionResult {
// Create logger for CLI operations
const cliLogger = createLogger('CliProvider');
/**
* Base timeout for CLI operations in milliseconds.
* CLI tools have longer startup and processing times compared to direct API calls,
* so we use a higher base timeout (120s) than the default provider timeout (30s).
* This is multiplied by reasoning effort multipliers when applicable.
* @see calculateReasoningTimeout from @automaker/types
*/
const CLI_BASE_TIMEOUT_MS = 120000;
/**
* Abstract base class for CLI-based providers
*
@@ -450,6 +460,10 @@ export abstract class CliProvider extends BaseProvider {
}
}
// Calculate dynamic timeout based on reasoning effort.
// This addresses GitHub issue #530 where reasoning models with 'xhigh' effort would timeout.
const timeout = calculateReasoningTimeout(options.reasoningEffort, CLI_BASE_TIMEOUT_MS);
// WSL strategy
if (this.useWsl && this.wslCliPath) {
const wslCwd = windowsToWslPath(cwd);
@@ -473,7 +487,7 @@ export abstract class CliProvider extends BaseProvider {
cwd, // Windows cwd for spawn
env: filteredEnv,
abortController: options.abortController,
timeout: 120000, // CLI operations may take longer
timeout,
};
}
@@ -488,7 +502,7 @@ export abstract class CliProvider extends BaseProvider {
cwd,
env: filteredEnv,
abortController: options.abortController,
timeout: 120000,
timeout,
};
}
@@ -501,7 +515,7 @@ export abstract class CliProvider extends BaseProvider {
cwd,
env: filteredEnv,
abortController: options.abortController,
timeout: 120000,
timeout,
};
}

View File

@@ -33,6 +33,8 @@ import {
CODEX_MODEL_MAP,
supportsReasoningEffort,
validateBareModelId,
calculateReasoningTimeout,
DEFAULT_TIMEOUT_MS,
type CodexApprovalPolicy,
type CodexSandboxMode,
type CodexAuthStatus,
@@ -91,7 +93,14 @@ const CODEX_ITEM_TYPES = {
const SYSTEM_PROMPT_LABEL = 'System instructions';
const HISTORY_HEADER = 'Current request:\n';
const TEXT_ENCODING = 'utf-8';
const DEFAULT_TIMEOUT_MS = 30000;
/**
* Default timeout for Codex CLI operations in milliseconds.
* This is the "no output" timeout - if the CLI doesn't produce any JSONL output
* for this duration, the process is killed. For reasoning models with high
* reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout().
* @see calculateReasoningTimeout from @automaker/types
*/
const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS;
const CONTEXT_WINDOW_256K = 256000;
const MAX_OUTPUT_32K = 32000;
const MAX_OUTPUT_16K = 16000;
@@ -814,13 +823,19 @@ export class CodexProvider extends BaseProvider {
envOverrides[OPENAI_API_KEY_ENV] = executionPlan.openAiApiKey;
}
// Calculate dynamic timeout based on reasoning effort.
// Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time
// for the model to generate reasoning tokens before producing output.
// This fixes GitHub issue #530 where features would get stuck with reasoning models.
const timeout = calculateReasoningTimeout(options.reasoningEffort, CODEX_CLI_TIMEOUT_MS);
const stream = spawnJSONLProcess({
command: commandPath,
args,
cwd: options.cwd,
env: envOverrides,
abortController: options.abortController,
timeout: DEFAULT_TIMEOUT_MS,
timeout,
stdinData: promptText, // Pass prompt via stdin
});

View File

@@ -11,6 +11,11 @@ import {
getCodexConfigDir,
getCodexAuthIndicators,
} from '@automaker/platform';
import {
calculateReasoningTimeout,
REASONING_TIMEOUT_MULTIPLIERS,
DEFAULT_TIMEOUT_MS,
} from '@automaker/types';
const OPENAI_API_KEY_ENV = 'OPENAI_API_KEY';
const originalOpenAIKey = process.env[OPENAI_API_KEY_ENV];
@@ -289,5 +294,121 @@ describe('codex-provider.ts', () => {
expect(codexRunMock).not.toHaveBeenCalled();
expect(spawnJSONLProcess).toHaveBeenCalled();
});
it('passes extended timeout for high reasoning effort', async () => {
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
await collectAsyncGenerator(
provider.executeQuery({
prompt: 'Complex reasoning task',
model: 'gpt-5.1-codex-max',
cwd: '/tmp',
reasoningEffort: 'high',
})
);
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
// High reasoning effort should have 3x the default timeout (90000ms)
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.high);
});
it('passes extended timeout for xhigh reasoning effort', async () => {
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
await collectAsyncGenerator(
provider.executeQuery({
prompt: 'Very complex reasoning task',
model: 'gpt-5.1-codex-max',
cwd: '/tmp',
reasoningEffort: 'xhigh',
})
);
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
// xhigh reasoning effort should have 4x the default timeout (120000ms)
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh);
});
it('uses default timeout when no reasoning effort is specified', async () => {
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
await collectAsyncGenerator(
provider.executeQuery({
prompt: 'Simple task',
model: 'gpt-5.2',
cwd: '/tmp',
})
);
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
// No reasoning effort should use the default timeout
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS);
});
});
describe('calculateReasoningTimeout', () => {
it('returns default timeout when no reasoning effort is specified', () => {
expect(calculateReasoningTimeout()).toBe(DEFAULT_TIMEOUT_MS);
expect(calculateReasoningTimeout(undefined)).toBe(DEFAULT_TIMEOUT_MS);
});
it('returns default timeout for none reasoning effort', () => {
expect(calculateReasoningTimeout('none')).toBe(DEFAULT_TIMEOUT_MS);
});
it('applies correct multiplier for minimal reasoning effort', () => {
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.minimal);
expect(calculateReasoningTimeout('minimal')).toBe(expected);
});
it('applies correct multiplier for low reasoning effort', () => {
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.low);
expect(calculateReasoningTimeout('low')).toBe(expected);
});
it('applies correct multiplier for medium reasoning effort', () => {
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.medium);
expect(calculateReasoningTimeout('medium')).toBe(expected);
});
it('applies correct multiplier for high reasoning effort', () => {
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.high);
expect(calculateReasoningTimeout('high')).toBe(expected);
});
it('applies correct multiplier for xhigh reasoning effort', () => {
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh);
expect(calculateReasoningTimeout('xhigh')).toBe(expected);
});
it('uses custom base timeout when provided', () => {
const customBase = 60000;
expect(calculateReasoningTimeout('high', customBase)).toBe(
Math.round(customBase * REASONING_TIMEOUT_MULTIPLIERS.high)
);
});
it('falls back to 1.0 multiplier for invalid reasoning effort', () => {
// Test that invalid values fallback gracefully to default multiplier
// This tests the defensive ?? 1.0 in calculateReasoningTimeout
const invalidEffort = 'invalid_effort' as never;
expect(calculateReasoningTimeout(invalidEffort)).toBe(DEFAULT_TIMEOUT_MS);
});
it('produces expected absolute timeout values', () => {
// Verify the actual timeout values that will be used:
// none: 30000ms (30s)
// minimal: 36000ms (36s)
// low: 45000ms (45s)
// medium: 60000ms (1m)
// high: 90000ms (1m 30s)
// xhigh: 120000ms (2m)
expect(calculateReasoningTimeout('none')).toBe(30000);
expect(calculateReasoningTimeout('minimal')).toBe(36000);
expect(calculateReasoningTimeout('low')).toBe(45000);
expect(calculateReasoningTimeout('medium')).toBe(60000);
expect(calculateReasoningTimeout('high')).toBe(90000);
expect(calculateReasoningTimeout('xhigh')).toBe(120000);
});
});
});