mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-02-03 21:03:08 +00:00
Merge pull request #533 from AutoMaker-Org/feature/v0.12.0rc-1768605477061-fhv5
fix: Codex freezes
This commit is contained in:
@@ -35,6 +35,7 @@ import {
|
|||||||
type SubprocessOptions,
|
type SubprocessOptions,
|
||||||
type WslCliResult,
|
type WslCliResult,
|
||||||
} from '@automaker/platform';
|
} from '@automaker/platform';
|
||||||
|
import { calculateReasoningTimeout } from '@automaker/types';
|
||||||
import { createLogger, isAbortError } from '@automaker/utils';
|
import { createLogger, isAbortError } from '@automaker/utils';
|
||||||
import { execSync } from 'child_process';
|
import { execSync } from 'child_process';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
@@ -107,6 +108,15 @@ export interface CliDetectionResult {
|
|||||||
// Create logger for CLI operations
|
// Create logger for CLI operations
|
||||||
const cliLogger = createLogger('CliProvider');
|
const cliLogger = createLogger('CliProvider');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base timeout for CLI operations in milliseconds.
|
||||||
|
* CLI tools have longer startup and processing times compared to direct API calls,
|
||||||
|
* so we use a higher base timeout (120s) than the default provider timeout (30s).
|
||||||
|
* This is multiplied by reasoning effort multipliers when applicable.
|
||||||
|
* @see calculateReasoningTimeout from @automaker/types
|
||||||
|
*/
|
||||||
|
const CLI_BASE_TIMEOUT_MS = 120000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base class for CLI-based providers
|
* Abstract base class for CLI-based providers
|
||||||
*
|
*
|
||||||
@@ -450,6 +460,10 @@ export abstract class CliProvider extends BaseProvider {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate dynamic timeout based on reasoning effort.
|
||||||
|
// This addresses GitHub issue #530 where reasoning models with 'xhigh' effort would timeout.
|
||||||
|
const timeout = calculateReasoningTimeout(options.reasoningEffort, CLI_BASE_TIMEOUT_MS);
|
||||||
|
|
||||||
// WSL strategy
|
// WSL strategy
|
||||||
if (this.useWsl && this.wslCliPath) {
|
if (this.useWsl && this.wslCliPath) {
|
||||||
const wslCwd = windowsToWslPath(cwd);
|
const wslCwd = windowsToWslPath(cwd);
|
||||||
@@ -473,7 +487,7 @@ export abstract class CliProvider extends BaseProvider {
|
|||||||
cwd, // Windows cwd for spawn
|
cwd, // Windows cwd for spawn
|
||||||
env: filteredEnv,
|
env: filteredEnv,
|
||||||
abortController: options.abortController,
|
abortController: options.abortController,
|
||||||
timeout: 120000, // CLI operations may take longer
|
timeout,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -488,7 +502,7 @@ export abstract class CliProvider extends BaseProvider {
|
|||||||
cwd,
|
cwd,
|
||||||
env: filteredEnv,
|
env: filteredEnv,
|
||||||
abortController: options.abortController,
|
abortController: options.abortController,
|
||||||
timeout: 120000,
|
timeout,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -501,7 +515,7 @@ export abstract class CliProvider extends BaseProvider {
|
|||||||
cwd,
|
cwd,
|
||||||
env: filteredEnv,
|
env: filteredEnv,
|
||||||
abortController: options.abortController,
|
abortController: options.abortController,
|
||||||
timeout: 120000,
|
timeout,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ import {
|
|||||||
CODEX_MODEL_MAP,
|
CODEX_MODEL_MAP,
|
||||||
supportsReasoningEffort,
|
supportsReasoningEffort,
|
||||||
validateBareModelId,
|
validateBareModelId,
|
||||||
|
calculateReasoningTimeout,
|
||||||
|
DEFAULT_TIMEOUT_MS,
|
||||||
type CodexApprovalPolicy,
|
type CodexApprovalPolicy,
|
||||||
type CodexSandboxMode,
|
type CodexSandboxMode,
|
||||||
type CodexAuthStatus,
|
type CodexAuthStatus,
|
||||||
@@ -91,7 +93,14 @@ const CODEX_ITEM_TYPES = {
|
|||||||
const SYSTEM_PROMPT_LABEL = 'System instructions';
|
const SYSTEM_PROMPT_LABEL = 'System instructions';
|
||||||
const HISTORY_HEADER = 'Current request:\n';
|
const HISTORY_HEADER = 'Current request:\n';
|
||||||
const TEXT_ENCODING = 'utf-8';
|
const TEXT_ENCODING = 'utf-8';
|
||||||
const DEFAULT_TIMEOUT_MS = 30000;
|
/**
|
||||||
|
* Default timeout for Codex CLI operations in milliseconds.
|
||||||
|
* This is the "no output" timeout - if the CLI doesn't produce any JSONL output
|
||||||
|
* for this duration, the process is killed. For reasoning models with high
|
||||||
|
* reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout().
|
||||||
|
* @see calculateReasoningTimeout from @automaker/types
|
||||||
|
*/
|
||||||
|
const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS;
|
||||||
const CONTEXT_WINDOW_256K = 256000;
|
const CONTEXT_WINDOW_256K = 256000;
|
||||||
const MAX_OUTPUT_32K = 32000;
|
const MAX_OUTPUT_32K = 32000;
|
||||||
const MAX_OUTPUT_16K = 16000;
|
const MAX_OUTPUT_16K = 16000;
|
||||||
@@ -814,13 +823,19 @@ export class CodexProvider extends BaseProvider {
|
|||||||
envOverrides[OPENAI_API_KEY_ENV] = executionPlan.openAiApiKey;
|
envOverrides[OPENAI_API_KEY_ENV] = executionPlan.openAiApiKey;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate dynamic timeout based on reasoning effort.
|
||||||
|
// Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time
|
||||||
|
// for the model to generate reasoning tokens before producing output.
|
||||||
|
// This fixes GitHub issue #530 where features would get stuck with reasoning models.
|
||||||
|
const timeout = calculateReasoningTimeout(options.reasoningEffort, CODEX_CLI_TIMEOUT_MS);
|
||||||
|
|
||||||
const stream = spawnJSONLProcess({
|
const stream = spawnJSONLProcess({
|
||||||
command: commandPath,
|
command: commandPath,
|
||||||
args,
|
args,
|
||||||
cwd: options.cwd,
|
cwd: options.cwd,
|
||||||
env: envOverrides,
|
env: envOverrides,
|
||||||
abortController: options.abortController,
|
abortController: options.abortController,
|
||||||
timeout: DEFAULT_TIMEOUT_MS,
|
timeout,
|
||||||
stdinData: promptText, // Pass prompt via stdin
|
stdinData: promptText, // Pass prompt via stdin
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,11 @@ import {
|
|||||||
getCodexConfigDir,
|
getCodexConfigDir,
|
||||||
getCodexAuthIndicators,
|
getCodexAuthIndicators,
|
||||||
} from '@automaker/platform';
|
} from '@automaker/platform';
|
||||||
|
import {
|
||||||
|
calculateReasoningTimeout,
|
||||||
|
REASONING_TIMEOUT_MULTIPLIERS,
|
||||||
|
DEFAULT_TIMEOUT_MS,
|
||||||
|
} from '@automaker/types';
|
||||||
|
|
||||||
const OPENAI_API_KEY_ENV = 'OPENAI_API_KEY';
|
const OPENAI_API_KEY_ENV = 'OPENAI_API_KEY';
|
||||||
const originalOpenAIKey = process.env[OPENAI_API_KEY_ENV];
|
const originalOpenAIKey = process.env[OPENAI_API_KEY_ENV];
|
||||||
@@ -289,5 +294,121 @@ describe('codex-provider.ts', () => {
|
|||||||
expect(codexRunMock).not.toHaveBeenCalled();
|
expect(codexRunMock).not.toHaveBeenCalled();
|
||||||
expect(spawnJSONLProcess).toHaveBeenCalled();
|
expect(spawnJSONLProcess).toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('passes extended timeout for high reasoning effort', async () => {
|
||||||
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
||||||
|
|
||||||
|
await collectAsyncGenerator(
|
||||||
|
provider.executeQuery({
|
||||||
|
prompt: 'Complex reasoning task',
|
||||||
|
model: 'gpt-5.1-codex-max',
|
||||||
|
cwd: '/tmp',
|
||||||
|
reasoningEffort: 'high',
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
||||||
|
// High reasoning effort should have 3x the default timeout (90000ms)
|
||||||
|
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.high);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('passes extended timeout for xhigh reasoning effort', async () => {
|
||||||
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
||||||
|
|
||||||
|
await collectAsyncGenerator(
|
||||||
|
provider.executeQuery({
|
||||||
|
prompt: 'Very complex reasoning task',
|
||||||
|
model: 'gpt-5.1-codex-max',
|
||||||
|
cwd: '/tmp',
|
||||||
|
reasoningEffort: 'xhigh',
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
||||||
|
// xhigh reasoning effort should have 4x the default timeout (120000ms)
|
||||||
|
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses default timeout when no reasoning effort is specified', async () => {
|
||||||
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
||||||
|
|
||||||
|
await collectAsyncGenerator(
|
||||||
|
provider.executeQuery({
|
||||||
|
prompt: 'Simple task',
|
||||||
|
model: 'gpt-5.2',
|
||||||
|
cwd: '/tmp',
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
||||||
|
// No reasoning effort should use the default timeout
|
||||||
|
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('calculateReasoningTimeout', () => {
|
||||||
|
it('returns default timeout when no reasoning effort is specified', () => {
|
||||||
|
expect(calculateReasoningTimeout()).toBe(DEFAULT_TIMEOUT_MS);
|
||||||
|
expect(calculateReasoningTimeout(undefined)).toBe(DEFAULT_TIMEOUT_MS);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns default timeout for none reasoning effort', () => {
|
||||||
|
expect(calculateReasoningTimeout('none')).toBe(DEFAULT_TIMEOUT_MS);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies correct multiplier for minimal reasoning effort', () => {
|
||||||
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.minimal);
|
||||||
|
expect(calculateReasoningTimeout('minimal')).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies correct multiplier for low reasoning effort', () => {
|
||||||
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.low);
|
||||||
|
expect(calculateReasoningTimeout('low')).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies correct multiplier for medium reasoning effort', () => {
|
||||||
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.medium);
|
||||||
|
expect(calculateReasoningTimeout('medium')).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies correct multiplier for high reasoning effort', () => {
|
||||||
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.high);
|
||||||
|
expect(calculateReasoningTimeout('high')).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('applies correct multiplier for xhigh reasoning effort', () => {
|
||||||
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh);
|
||||||
|
expect(calculateReasoningTimeout('xhigh')).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses custom base timeout when provided', () => {
|
||||||
|
const customBase = 60000;
|
||||||
|
expect(calculateReasoningTimeout('high', customBase)).toBe(
|
||||||
|
Math.round(customBase * REASONING_TIMEOUT_MULTIPLIERS.high)
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('falls back to 1.0 multiplier for invalid reasoning effort', () => {
|
||||||
|
// Test that invalid values fallback gracefully to default multiplier
|
||||||
|
// This tests the defensive ?? 1.0 in calculateReasoningTimeout
|
||||||
|
const invalidEffort = 'invalid_effort' as never;
|
||||||
|
expect(calculateReasoningTimeout(invalidEffort)).toBe(DEFAULT_TIMEOUT_MS);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('produces expected absolute timeout values', () => {
|
||||||
|
// Verify the actual timeout values that will be used:
|
||||||
|
// none: 30000ms (30s)
|
||||||
|
// minimal: 36000ms (36s)
|
||||||
|
// low: 45000ms (45s)
|
||||||
|
// medium: 60000ms (1m)
|
||||||
|
// high: 90000ms (1m 30s)
|
||||||
|
// xhigh: 120000ms (2m)
|
||||||
|
expect(calculateReasoningTimeout('none')).toBe(30000);
|
||||||
|
expect(calculateReasoningTimeout('minimal')).toBe(36000);
|
||||||
|
expect(calculateReasoningTimeout('low')).toBe(45000);
|
||||||
|
expect(calculateReasoningTimeout('medium')).toBe(60000);
|
||||||
|
expect(calculateReasoningTimeout('high')).toBe(90000);
|
||||||
|
expect(calculateReasoningTimeout('xhigh')).toBe(120000);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -21,6 +21,13 @@ export type {
|
|||||||
ReasoningEffort,
|
ReasoningEffort,
|
||||||
} from './provider.js';
|
} from './provider.js';
|
||||||
|
|
||||||
|
// Provider constants and utilities
|
||||||
|
export {
|
||||||
|
DEFAULT_TIMEOUT_MS,
|
||||||
|
REASONING_TIMEOUT_MULTIPLIERS,
|
||||||
|
calculateReasoningTimeout,
|
||||||
|
} from './provider.js';
|
||||||
|
|
||||||
// Codex CLI types
|
// Codex CLI types
|
||||||
export type {
|
export type {
|
||||||
CodexSandboxMode,
|
CodexSandboxMode,
|
||||||
|
|||||||
@@ -18,6 +18,61 @@ import type { CodexSandboxMode, CodexApprovalPolicy } from './codex.js';
|
|||||||
*/
|
*/
|
||||||
export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
|
export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default timeout in milliseconds for provider operations.
|
||||||
|
* Used as the baseline timeout for API calls and CLI operations.
|
||||||
|
*/
|
||||||
|
export const DEFAULT_TIMEOUT_MS = 30000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timeout multipliers for reasoning effort levels.
|
||||||
|
* Higher reasoning effort requires more time for the model to generate reasoning tokens.
|
||||||
|
* These multipliers are applied to DEFAULT_TIMEOUT_MS.
|
||||||
|
*/
|
||||||
|
export const REASONING_TIMEOUT_MULTIPLIERS: Record<ReasoningEffort, number> = {
|
||||||
|
none: 1.0, // No reasoning, baseline timeout
|
||||||
|
minimal: 1.2, // Very quick reasoning, slight increase
|
||||||
|
low: 1.5, // Quick reasoning, moderate increase
|
||||||
|
medium: 2.0, // Balanced reasoning, double baseline
|
||||||
|
high: 3.0, // Extended reasoning, triple baseline
|
||||||
|
xhigh: 4.0, // Maximum reasoning, quadruple baseline
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate timeout for provider operations based on reasoning effort.
|
||||||
|
* Higher reasoning effort requires more time for the model to generate reasoning tokens.
|
||||||
|
*
|
||||||
|
* This function addresses GitHub issue #530 where Codex CLI with GPT-5.2 "xtra thinking"
|
||||||
|
* (xhigh reasoning effort) mode would get stuck because the 30-second "no output" timeout
|
||||||
|
* would trigger during extended reasoning phases.
|
||||||
|
*
|
||||||
|
* @param reasoningEffort - The reasoning effort level, defaults to 'none' if undefined.
|
||||||
|
* If an invalid value is provided, falls back to multiplier 1.0.
|
||||||
|
* @param baseTimeoutMs - Optional custom base timeout, defaults to DEFAULT_TIMEOUT_MS (30000ms)
|
||||||
|
* @returns The calculated timeout in milliseconds, rounded to the nearest integer
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Using default base timeout (30000ms)
|
||||||
|
* calculateReasoningTimeout('high') // Returns 90000 (30000 * 3.0)
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // Using custom base timeout
|
||||||
|
* calculateReasoningTimeout('medium', 60000) // Returns 120000 (60000 * 2.0)
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* // No reasoning effort (default)
|
||||||
|
* calculateReasoningTimeout() // Returns 30000 (default timeout)
|
||||||
|
* calculateReasoningTimeout(undefined) // Returns 30000
|
||||||
|
*/
|
||||||
|
export function calculateReasoningTimeout(
|
||||||
|
reasoningEffort?: ReasoningEffort,
|
||||||
|
baseTimeoutMs: number = DEFAULT_TIMEOUT_MS
|
||||||
|
): number {
|
||||||
|
const effort = reasoningEffort ?? 'none';
|
||||||
|
const multiplier = REASONING_TIMEOUT_MULTIPLIERS[effort] ?? 1.0;
|
||||||
|
return Math.round(baseTimeoutMs * multiplier);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Configuration for a provider instance
|
* Configuration for a provider instance
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user