mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-01-30 06:12:03 +00:00
419 lines
14 KiB
TypeScript
419 lines
14 KiB
TypeScript
import { describe, it, expect, vi, beforeEach, afterAll } from 'vitest';
|
|
import os from 'os';
|
|
import path from 'path';
|
|
import { CodexProvider } from '../../../src/providers/codex-provider.js';
|
|
import type { ProviderMessage } from '../../../src/providers/types.js';
|
|
import { collectAsyncGenerator } from '../../utils/helpers.js';
|
|
import {
|
|
spawnJSONLProcess,
|
|
findCodexCliPath,
|
|
secureFs,
|
|
getCodexConfigDir,
|
|
getCodexAuthIndicators,
|
|
} from '@automaker/platform';
|
|
import {
|
|
calculateReasoningTimeout,
|
|
REASONING_TIMEOUT_MULTIPLIERS,
|
|
DEFAULT_TIMEOUT_MS,
|
|
} from '@automaker/types';
|
|
|
|
const OPENAI_API_KEY_ENV = 'OPENAI_API_KEY';
|
|
const originalOpenAIKey = process.env[OPENAI_API_KEY_ENV];
|
|
|
|
const codexRunMock = vi.fn();
|
|
|
|
vi.mock('@openai/codex-sdk', () => ({
|
|
Codex: class {
|
|
constructor(_opts: { apiKey: string }) {}
|
|
startThread() {
|
|
return {
|
|
id: 'thread-123',
|
|
run: codexRunMock,
|
|
};
|
|
}
|
|
resumeThread() {
|
|
return {
|
|
id: 'thread-123',
|
|
run: codexRunMock,
|
|
};
|
|
}
|
|
},
|
|
}));
|
|
|
|
const EXEC_SUBCOMMAND = 'exec';
|
|
|
|
vi.mock('@automaker/platform', () => ({
|
|
spawnJSONLProcess: vi.fn(),
|
|
spawnProcess: vi.fn(),
|
|
findCodexCliPath: vi.fn(),
|
|
getCodexAuthIndicators: vi.fn().mockResolvedValue({
|
|
hasAuthFile: false,
|
|
hasOAuthToken: false,
|
|
hasApiKey: false,
|
|
}),
|
|
getCodexConfigDir: vi.fn().mockReturnValue('/home/test/.codex'),
|
|
secureFs: {
|
|
readFile: vi.fn(),
|
|
mkdir: vi.fn(),
|
|
writeFile: vi.fn(),
|
|
},
|
|
getDataDirectory: vi.fn(),
|
|
}));
|
|
|
|
vi.mock('@/services/settings-service.js', () => ({
|
|
SettingsService: class {
|
|
async getGlobalSettings() {
|
|
return {
|
|
codexAutoLoadAgents: false,
|
|
codexSandboxMode: 'workspace-write',
|
|
codexApprovalPolicy: 'on-request',
|
|
};
|
|
}
|
|
},
|
|
}));
|
|
|
|
describe('codex-provider.ts', () => {
|
|
let provider: CodexProvider;
|
|
|
|
afterAll(() => {
|
|
if (originalOpenAIKey !== undefined) {
|
|
process.env[OPENAI_API_KEY_ENV] = originalOpenAIKey;
|
|
} else {
|
|
delete process.env[OPENAI_API_KEY_ENV];
|
|
}
|
|
});
|
|
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
vi.mocked(getCodexConfigDir).mockReturnValue('/home/test/.codex');
|
|
vi.mocked(findCodexCliPath).mockResolvedValue('/usr/bin/codex');
|
|
vi.mocked(getCodexAuthIndicators).mockResolvedValue({
|
|
hasAuthFile: true,
|
|
hasOAuthToken: true,
|
|
hasApiKey: false,
|
|
});
|
|
delete process.env[OPENAI_API_KEY_ENV];
|
|
provider = new CodexProvider();
|
|
});
|
|
|
|
describe('executeQuery', () => {
|
|
it('emits tool_use and tool_result with shared tool_use_id for command execution', async () => {
|
|
const mockEvents = [
|
|
{
|
|
type: 'item.started',
|
|
item: {
|
|
type: 'command_execution',
|
|
id: 'cmd-1',
|
|
command: 'ls',
|
|
},
|
|
},
|
|
{
|
|
type: 'item.completed',
|
|
item: {
|
|
type: 'command_execution',
|
|
id: 'cmd-1',
|
|
output: 'file1\nfile2',
|
|
},
|
|
},
|
|
];
|
|
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue(
|
|
(async function* () {
|
|
for (const event of mockEvents) {
|
|
yield event;
|
|
}
|
|
})()
|
|
);
|
|
const results = await collectAsyncGenerator<ProviderMessage>(
|
|
provider.executeQuery({
|
|
prompt: 'List files',
|
|
model: 'gpt-5.2',
|
|
cwd: '/tmp',
|
|
})
|
|
);
|
|
|
|
expect(results).toHaveLength(2);
|
|
const toolUse = results[0];
|
|
const toolResult = results[1];
|
|
|
|
expect(toolUse.type).toBe('assistant');
|
|
expect(toolUse.message?.content[0].type).toBe('tool_use');
|
|
const toolUseId = toolUse.message?.content[0].tool_use_id;
|
|
expect(toolUseId).toBeDefined();
|
|
|
|
expect(toolResult.type).toBe('assistant');
|
|
expect(toolResult.message?.content[0].type).toBe('tool_result');
|
|
expect(toolResult.message?.content[0].tool_use_id).toBe(toolUseId);
|
|
expect(toolResult.message?.content[0].content).toBe('file1\nfile2');
|
|
});
|
|
|
|
it('adds output schema and max turn overrides when configured', async () => {
|
|
// Note: With full-permissions always on, these flags are no longer used
|
|
// This test now only verifies the basic CLI structure
|
|
// Using gpt-5.1-codex-max which should route to Codex (not Cursor)
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Test config',
|
|
model: 'gpt-5.1-codex-max',
|
|
cwd: '/tmp',
|
|
allowedTools: ['Read', 'Write'],
|
|
maxTurns: 5,
|
|
})
|
|
);
|
|
|
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
|
expect(call.args).toContain('exec'); // Should have exec subcommand
|
|
expect(call.args).toContain('--dangerously-bypass-approvals-and-sandbox'); // Should have YOLO flag
|
|
expect(call.args).toContain('--model');
|
|
expect(call.args).toContain('--json');
|
|
});
|
|
|
|
it('overrides approval policy when MCP auto-approval is enabled', async () => {
|
|
// Note: With full-permissions always on (--dangerously-bypass-approvals-and-sandbox),
|
|
// approval policy is bypassed, not configured via --config
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Test approvals',
|
|
model: 'gpt-5.1-codex-max',
|
|
cwd: '/tmp',
|
|
mcpServers: { mock: { type: 'stdio', command: 'node' } },
|
|
mcpAutoApproveTools: true,
|
|
codexSettings: { approvalPolicy: 'untrusted' },
|
|
})
|
|
);
|
|
|
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
|
const execIndex = call.args.indexOf(EXEC_SUBCOMMAND);
|
|
expect(call.args).toContain('--dangerously-bypass-approvals-and-sandbox'); // YOLO flag bypasses approval
|
|
expect(call.args).toContain('--model');
|
|
expect(call.args).toContain('--json');
|
|
});
|
|
|
|
it('injects user and project instructions when auto-load is enabled', async () => {
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
const userPath = path.join('/home/test/.codex', 'AGENTS.md');
|
|
const projectPath = path.join('/tmp/project', '.codex', 'AGENTS.md');
|
|
vi.mocked(secureFs.readFile).mockImplementation(async (filePath: string) => {
|
|
if (filePath === userPath) {
|
|
return 'User rules';
|
|
}
|
|
if (filePath === projectPath) {
|
|
return 'Project rules';
|
|
}
|
|
throw new Error('missing');
|
|
});
|
|
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Hello',
|
|
model: 'gpt-5.2',
|
|
cwd: '/tmp/project',
|
|
codexSettings: { autoLoadAgents: true },
|
|
})
|
|
);
|
|
|
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
|
const promptText = call.stdinData;
|
|
expect(promptText).toContain('User rules');
|
|
expect(promptText).toContain('Project rules');
|
|
});
|
|
|
|
it('disables sandbox mode when running in cloud storage paths', async () => {
|
|
// Note: With full-permissions always on (--dangerously-bypass-approvals-and-sandbox),
|
|
// sandbox mode is bypassed, not configured via --sandbox flag
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
const cloudPath = path.join(os.homedir(), 'Dropbox', 'project');
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Hello',
|
|
model: 'gpt-5.1-codex-max',
|
|
cwd: cloudPath,
|
|
codexSettings: { sandboxMode: 'workspace-write' },
|
|
})
|
|
);
|
|
|
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
|
// YOLO flag bypasses sandbox entirely
|
|
expect(call.args).toContain('--dangerously-bypass-approvals-and-sandbox');
|
|
expect(call.args).toContain('--model');
|
|
expect(call.args).toContain('--json');
|
|
});
|
|
|
|
it('uses the SDK when no tools are requested and an API key is present', async () => {
|
|
process.env[OPENAI_API_KEY_ENV] = 'sk-test';
|
|
codexRunMock.mockResolvedValue({ finalResponse: 'Hello from SDK' });
|
|
|
|
const results = await collectAsyncGenerator<ProviderMessage>(
|
|
provider.executeQuery({
|
|
prompt: 'Hello',
|
|
model: 'gpt-5.2',
|
|
cwd: '/tmp',
|
|
allowedTools: [],
|
|
})
|
|
);
|
|
|
|
expect(results[0].message?.content[0].text).toBe('Hello from SDK');
|
|
expect(results[1].result).toBe('Hello from SDK');
|
|
});
|
|
|
|
it('uses the SDK when API key is present, even for tool requests (to avoid OAuth issues)', async () => {
|
|
process.env[OPENAI_API_KEY_ENV] = 'sk-test';
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Read files',
|
|
model: 'gpt-5.2',
|
|
cwd: '/tmp',
|
|
allowedTools: ['Read'],
|
|
})
|
|
);
|
|
|
|
expect(codexRunMock).toHaveBeenCalled();
|
|
expect(spawnJSONLProcess).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('falls back to CLI when no tools are requested and no API key is available', async () => {
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Hello',
|
|
model: 'gpt-5.2',
|
|
cwd: '/tmp',
|
|
allowedTools: [],
|
|
})
|
|
);
|
|
|
|
expect(codexRunMock).not.toHaveBeenCalled();
|
|
expect(spawnJSONLProcess).toHaveBeenCalled();
|
|
});
|
|
|
|
it('passes extended timeout for high reasoning effort', async () => {
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Complex reasoning task',
|
|
model: 'gpt-5.1-codex-max',
|
|
cwd: '/tmp',
|
|
reasoningEffort: 'high',
|
|
})
|
|
);
|
|
|
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
|
// High reasoning effort should have 3x the default timeout (90000ms)
|
|
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.high);
|
|
});
|
|
|
|
it('passes extended timeout for xhigh reasoning effort', async () => {
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Very complex reasoning task',
|
|
model: 'gpt-5.1-codex-max',
|
|
cwd: '/tmp',
|
|
reasoningEffort: 'xhigh',
|
|
})
|
|
);
|
|
|
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
|
// xhigh reasoning effort uses 5-minute base timeout (300000ms) for feature generation
|
|
// then applies 4x multiplier: 300000 * 4.0 = 1200000ms (20 minutes)
|
|
const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000;
|
|
expect(call.timeout).toBe(
|
|
CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh
|
|
);
|
|
});
|
|
|
|
it('uses default timeout when no reasoning effort is specified', async () => {
|
|
vi.mocked(spawnJSONLProcess).mockReturnValue((async function* () {})());
|
|
|
|
await collectAsyncGenerator(
|
|
provider.executeQuery({
|
|
prompt: 'Simple task',
|
|
model: 'gpt-5.2',
|
|
cwd: '/tmp',
|
|
})
|
|
);
|
|
|
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
|
// No reasoning effort should use the default timeout
|
|
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS);
|
|
});
|
|
});
|
|
|
|
describe('calculateReasoningTimeout', () => {
|
|
it('returns default timeout when no reasoning effort is specified', () => {
|
|
expect(calculateReasoningTimeout()).toBe(DEFAULT_TIMEOUT_MS);
|
|
expect(calculateReasoningTimeout(undefined)).toBe(DEFAULT_TIMEOUT_MS);
|
|
});
|
|
|
|
it('returns default timeout for none reasoning effort', () => {
|
|
expect(calculateReasoningTimeout('none')).toBe(DEFAULT_TIMEOUT_MS);
|
|
});
|
|
|
|
it('applies correct multiplier for minimal reasoning effort', () => {
|
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.minimal);
|
|
expect(calculateReasoningTimeout('minimal')).toBe(expected);
|
|
});
|
|
|
|
it('applies correct multiplier for low reasoning effort', () => {
|
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.low);
|
|
expect(calculateReasoningTimeout('low')).toBe(expected);
|
|
});
|
|
|
|
it('applies correct multiplier for medium reasoning effort', () => {
|
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.medium);
|
|
expect(calculateReasoningTimeout('medium')).toBe(expected);
|
|
});
|
|
|
|
it('applies correct multiplier for high reasoning effort', () => {
|
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.high);
|
|
expect(calculateReasoningTimeout('high')).toBe(expected);
|
|
});
|
|
|
|
it('applies correct multiplier for xhigh reasoning effort', () => {
|
|
const expected = Math.round(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh);
|
|
expect(calculateReasoningTimeout('xhigh')).toBe(expected);
|
|
});
|
|
|
|
it('uses custom base timeout when provided', () => {
|
|
const customBase = 60000;
|
|
expect(calculateReasoningTimeout('high', customBase)).toBe(
|
|
Math.round(customBase * REASONING_TIMEOUT_MULTIPLIERS.high)
|
|
);
|
|
});
|
|
|
|
it('falls back to 1.0 multiplier for invalid reasoning effort', () => {
|
|
// Test that invalid values fallback gracefully to default multiplier
|
|
// This tests the defensive ?? 1.0 in calculateReasoningTimeout
|
|
const invalidEffort = 'invalid_effort' as never;
|
|
expect(calculateReasoningTimeout(invalidEffort)).toBe(DEFAULT_TIMEOUT_MS);
|
|
});
|
|
|
|
it('produces expected absolute timeout values', () => {
|
|
// Verify the actual timeout values that will be used:
|
|
// none: 30000ms (30s)
|
|
// minimal: 36000ms (36s)
|
|
// low: 45000ms (45s)
|
|
// medium: 60000ms (1m)
|
|
// high: 90000ms (1m 30s)
|
|
// xhigh: 120000ms (2m)
|
|
expect(calculateReasoningTimeout('none')).toBe(30000);
|
|
expect(calculateReasoningTimeout('minimal')).toBe(36000);
|
|
expect(calculateReasoningTimeout('low')).toBe(45000);
|
|
expect(calculateReasoningTimeout('medium')).toBe(60000);
|
|
expect(calculateReasoningTimeout('high')).toBe(90000);
|
|
expect(calculateReasoningTimeout('xhigh')).toBe(120000);
|
|
});
|
|
});
|
|
});
|