From 92b1fb37254f0e20b064393fc16320eea20cf4ea Mon Sep 17 00:00:00 2001 From: Shirone Date: Sat, 24 Jan 2026 18:25:39 +0100 Subject: [PATCH 1/5] fix: Add structured output fallback for non-Claude models in app spec generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the app spec generation failing for non-Claude models (Cursor, Gemini, OpenCode, Copilot) that don't support structured output capabilities. Changes: - Add `supportsStructuredOutput()` utility function in @automaker/types to centralize model capability detection - Update generate-features-from-spec.ts: - Add explicit JSON instructions for non-Claude/Codex models - Define featuresOutputSchema for structured output - Pre-extract JSON from text responses using extractJsonWithArray - Handle both structured_output and text responses properly - Update generate-spec.ts: - Replace isCursorModel with supportsStructuredOutput for consistency - Update sync-spec.ts: - Add techStackOutputSchema for structured output - Add JSON extraction fallback for text responses - Handle both structured_output and text parsing - Update validate-issue.ts: - Use supportsStructuredOutput for cleaner capability detection The fix follows the same pattern used in generate-spec.ts where non-Claude models receive explicit JSON formatting instructions in the prompt and responses are parsed using extractJson utilities. Fixes #669 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../app-spec/generate-features-from-spec.ts | 139 ++++++++++++++++-- .../src/routes/app-spec/generate-spec.ts | 11 +- apps/server/src/routes/app-spec/sync-spec.ts | 120 +++++++++++---- .../routes/github/routes/validate-issue.ts | 6 +- libs/types/src/index.ts | 1 + libs/types/src/provider-utils.ts | 28 ++++ 6 files changed, 261 insertions(+), 44 deletions(-) diff --git a/apps/server/src/routes/app-spec/generate-features-from-spec.ts b/apps/server/src/routes/app-spec/generate-features-from-spec.ts index 56058cb7..e614113a 100644 --- a/apps/server/src/routes/app-spec/generate-features-from-spec.ts +++ b/apps/server/src/routes/app-spec/generate-features-from-spec.ts @@ -8,10 +8,11 @@ import * as secureFs from '../../lib/secure-fs.js'; import type { EventEmitter } from '../../lib/events.js'; import { createLogger } from '@automaker/utils'; -import { DEFAULT_PHASE_MODELS } from '@automaker/types'; +import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { streamingQuery } from '../../providers/simple-query-service.js'; import { parseAndCreateFeatures } from './parse-and-create-features.js'; +import { extractJsonWithArray } from '../../lib/json-extractor.js'; import { getAppSpecPath } from '@automaker/platform'; import type { SettingsService } from '../../services/settings-service.js'; import { @@ -25,6 +26,58 @@ const logger = createLogger('SpecRegeneration'); const DEFAULT_MAX_FEATURES = 50; +/** + * Type for extracted features JSON response + */ +interface FeaturesExtractionResult { + features: Array<{ + id: string; + category?: string; + title: string; + description: string; + priority?: number; + complexity?: 'simple' | 'moderate' | 'complex'; + dependencies?: string[]; + }>; +} + +/** + * JSON schema for features output format (Claude/Codex structured output) + */ +const featuresOutputSchema = { + type: 'object', + properties: { + features: { + type: 'array', + items: { + type: 'object', + properties: { + id: { type: 'string', description: 'Unique feature identifier (kebab-case)' }, + category: { type: 'string', description: 'Feature category' }, + title: { type: 'string', description: 'Short, descriptive title' }, + description: { type: 'string', description: 'Detailed feature description' }, + priority: { + type: 'number', + description: 'Priority level: 1 (highest) to 5 (lowest)', + }, + complexity: { + type: 'string', + enum: ['simple', 'moderate', 'complex'], + description: 'Implementation complexity', + }, + dependencies: { + type: 'array', + items: { type: 'string' }, + description: 'IDs of features this depends on', + }, + }, + required: ['id', 'title', 'description'], + }, + }, + }, + required: ['features'], +} as const; + export async function generateFeaturesFromSpec( projectPath: string, events: EventEmitter, @@ -140,9 +193,46 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API'); + // Determine if we should use structured output based on model type + const useStructuredOutput = supportsStructuredOutput(model); + logger.info( + `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}` + ); + + // Build the final prompt - for non-Claude/Codex models, include explicit JSON instructions + let finalPrompt = prompt; + if (!useStructuredOutput) { + finalPrompt = `${prompt} + +CRITICAL INSTRUCTIONS: +1. DO NOT write any files. Return the JSON in your response only. +2. After analyzing the spec, respond with ONLY a JSON object - no explanations, no markdown, just raw JSON. +3. The JSON must have this exact structure: +{ + "features": [ + { + "id": "unique-feature-id", + "category": "Category Name", + "title": "Short Feature Title", + "description": "Detailed description of the feature", + "priority": 1, + "complexity": "simple|moderate|complex", + "dependencies": ["other-feature-id"] + } + ] +} + +4. Feature IDs must be unique, lowercase, kebab-case (e.g., "user-authentication", "data-export") +5. Priority ranges from 1 (highest) to 5 (lowest) +6. Complexity must be one of: "simple", "moderate", "complex" +7. Dependencies is an array of feature IDs that must be completed first (can be empty) + +Your entire response should be valid JSON starting with { and ending with }. No text before or after.`; + } + // Use streamingQuery with event callbacks const result = await streamingQuery({ - prompt, + prompt: finalPrompt, model, cwd: projectPath, maxTurns: 250, @@ -153,6 +243,12 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined, claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration credentials, // Pass credentials for resolving 'credentials' apiKeySource + outputFormat: useStructuredOutput + ? { + type: 'json_schema', + schema: featuresOutputSchema, + } + : undefined, onText: (text) => { logger.debug(`Feature text block received (${text.length} chars)`); events.emit('spec-regeneration:event', { @@ -163,15 +259,40 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb }, }); - const responseText = result.text; + // Get response content - prefer structured output if available + let contentForParsing: string; - logger.info(`Feature stream complete.`); - logger.info(`Feature response length: ${responseText.length} chars`); - logger.info('========== FULL RESPONSE TEXT =========='); - logger.info(responseText); - logger.info('========== END RESPONSE TEXT =========='); + if (result.structured_output) { + // Use structured output from Claude/Codex models + logger.info('✅ Received structured output from model'); + contentForParsing = JSON.stringify(result.structured_output); + logger.debug('Structured output:', contentForParsing); + } else { + // Use text response (for non-Claude/Codex models or fallback) + // Pre-extract JSON to handle conversational text that may surround the JSON response + // This follows the same pattern used in generate-spec.ts and validate-issue.ts + const rawText = result.text; + logger.info(`Feature stream complete.`); + logger.info(`Feature response length: ${rawText.length} chars`); + logger.info('========== FULL RESPONSE TEXT =========='); + logger.info(rawText); + logger.info('========== END RESPONSE TEXT =========='); - await parseAndCreateFeatures(projectPath, responseText, events); + // Pre-extract JSON from response - handles conversational text around the JSON + const extracted = extractJsonWithArray(rawText, 'features', { + logger, + }); + if (extracted) { + contentForParsing = JSON.stringify(extracted); + logger.info('✅ Pre-extracted JSON from text response'); + } else { + // Fall back to raw text (let parseAndCreateFeatures try its extraction) + contentForParsing = rawText; + logger.warn('⚠️ Could not pre-extract JSON, passing raw text to parser'); + } + } + + await parseAndCreateFeatures(projectPath, contentForParsing, events); logger.debug('========== generateFeaturesFromSpec() completed =========='); } diff --git a/apps/server/src/routes/app-spec/generate-spec.ts b/apps/server/src/routes/app-spec/generate-spec.ts index 0f826d76..bd47e9ea 100644 --- a/apps/server/src/routes/app-spec/generate-spec.ts +++ b/apps/server/src/routes/app-spec/generate-spec.ts @@ -9,7 +9,7 @@ import * as secureFs from '../../lib/secure-fs.js'; import type { EventEmitter } from '../../lib/events.js'; import { specOutputSchema, specToXml, type SpecOutput } from '../../lib/app-spec-format.js'; import { createLogger } from '@automaker/utils'; -import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types'; +import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { extractJson } from '../../lib/json-extractor.js'; import { streamingQuery } from '../../providers/simple-query-service.js'; @@ -120,10 +120,13 @@ ${prompts.appSpec.structuredSpecInstructions}`; let responseText = ''; let structuredOutput: SpecOutput | null = null; - // Determine if we should use structured output (Claude supports it, Cursor doesn't) - const useStructuredOutput = !isCursorModel(model); + // Determine if we should use structured output based on model type + const useStructuredOutput = supportsStructuredOutput(model); + logger.info( + `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}` + ); - // Build the final prompt - for Cursor, include JSON schema instructions + // Build the final prompt - for non-Claude/Codex models, include JSON schema instructions let finalPrompt = prompt; if (!useStructuredOutput) { finalPrompt = `${prompt} diff --git a/apps/server/src/routes/app-spec/sync-spec.ts b/apps/server/src/routes/app-spec/sync-spec.ts index af5139dd..d36b6808 100644 --- a/apps/server/src/routes/app-spec/sync-spec.ts +++ b/apps/server/src/routes/app-spec/sync-spec.ts @@ -10,9 +10,10 @@ import * as secureFs from '../../lib/secure-fs.js'; import type { EventEmitter } from '../../lib/events.js'; import { createLogger } from '@automaker/utils'; -import { DEFAULT_PHASE_MODELS } from '@automaker/types'; +import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { streamingQuery } from '../../providers/simple-query-service.js'; +import { extractJson } from '../../lib/json-extractor.js'; import { getAppSpecPath } from '@automaker/platform'; import type { SettingsService } from '../../services/settings-service.js'; import { @@ -34,6 +35,28 @@ import { getNotificationService } from '../../services/notification-service.js'; const logger = createLogger('SpecSync'); +/** + * Type for extracted tech stack JSON response + */ +interface TechStackExtractionResult { + technologies: string[]; +} + +/** + * JSON schema for tech stack analysis output (Claude/Codex structured output) + */ +const techStackOutputSchema = { + type: 'object', + properties: { + technologies: { + type: 'array', + items: { type: 'string' }, + description: 'List of technologies detected in the project', + }, + }, + required: ['technologies'], +} as const; + /** * Result of a sync operation */ @@ -176,8 +199,14 @@ export async function syncSpec( logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API'); + // Determine if we should use structured output based on model type + const useStructuredOutput = supportsStructuredOutput(model); + logger.info( + `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}` + ); + // Use AI to analyze tech stack - const techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack. + let techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack. Current known technologies: ${currentTechStack.join(', ')} @@ -193,6 +222,16 @@ Return ONLY this JSON format, no other text: "technologies": ["Technology 1", "Technology 2", ...] }`; + // Add explicit JSON instructions for non-Claude/Codex models + if (!useStructuredOutput) { + techAnalysisPrompt = `${techAnalysisPrompt} + +CRITICAL INSTRUCTIONS: +1. DO NOT write any files. Return the JSON in your response only. +2. Your entire response should be valid JSON starting with { and ending with }. +3. No explanations, no markdown, no text before or after the JSON.`; + } + try { const techResult = await streamingQuery({ prompt: techAnalysisPrompt, @@ -206,44 +245,67 @@ Return ONLY this JSON format, no other text: settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined, claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration credentials, // Pass credentials for resolving 'credentials' apiKeySource + outputFormat: useStructuredOutput + ? { + type: 'json_schema', + schema: techStackOutputSchema, + } + : undefined, onText: (text) => { logger.debug(`Tech analysis text: ${text.substring(0, 100)}`); }, }); - // Parse tech stack from response - const jsonMatch = techResult.text.match(/\{[\s\S]*"technologies"[\s\S]*\}/); - if (jsonMatch) { - const parsed = JSON.parse(jsonMatch[0]); - if (Array.isArray(parsed.technologies)) { - const newTechStack = parsed.technologies as string[]; + // Parse tech stack from response - prefer structured output if available + let parsedTechnologies: string[] | null = null; - // Calculate differences - const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase())); - const newSet = new Set(newTechStack.map((t) => t.toLowerCase())); + if (techResult.structured_output) { + // Use structured output from Claude/Codex models + const structured = techResult.structured_output as TechStackExtractionResult; + if (Array.isArray(structured.technologies)) { + parsedTechnologies = structured.technologies; + logger.info('✅ Received structured output for tech analysis'); + } + } else { + // Fall back to text parsing for non-Claude/Codex models + const extracted = extractJson(techResult.text, { + logger, + requiredKey: 'technologies', + requireArray: true, + }); + if (extracted && Array.isArray(extracted.technologies)) { + parsedTechnologies = extracted.technologies; + logger.info('✅ Extracted tech stack from text response'); + } else { + logger.warn('⚠️ Failed to extract tech stack JSON from response'); + } + } - for (const tech of newTechStack) { - if (!currentSet.has(tech.toLowerCase())) { - result.techStackUpdates.added.push(tech); - } + if (parsedTechnologies) { + const newTechStack = parsedTechnologies; + + // Calculate differences + const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase())); + const newSet = new Set(newTechStack.map((t) => t.toLowerCase())); + + for (const tech of newTechStack) { + if (!currentSet.has(tech.toLowerCase())) { + result.techStackUpdates.added.push(tech); } + } - for (const tech of currentTechStack) { - if (!newSet.has(tech.toLowerCase())) { - result.techStackUpdates.removed.push(tech); - } + for (const tech of currentTechStack) { + if (!newSet.has(tech.toLowerCase())) { + result.techStackUpdates.removed.push(tech); } + } - // Update spec with new tech stack if there are changes - if ( - result.techStackUpdates.added.length > 0 || - result.techStackUpdates.removed.length > 0 - ) { - specContent = updateTechnologyStack(specContent, newTechStack); - logger.info( - `Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}` - ); - } + // Update spec with new tech stack if there are changes + if (result.techStackUpdates.added.length > 0 || result.techStackUpdates.removed.length > 0) { + specContent = updateTechnologyStack(specContent, newTechStack); + logger.info( + `Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}` + ); } } } catch (error) { diff --git a/apps/server/src/routes/github/routes/validate-issue.ts b/apps/server/src/routes/github/routes/validate-issue.ts index 10465829..69a13b83 100644 --- a/apps/server/src/routes/github/routes/validate-issue.ts +++ b/apps/server/src/routes/github/routes/validate-issue.ts @@ -23,6 +23,7 @@ import { isCodexModel, isCursorModel, isOpencodeModel, + supportsStructuredOutput, } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { extractJson } from '../../../lib/json-extractor.js'; @@ -124,8 +125,9 @@ async function runValidation( const prompts = await getPromptCustomization(settingsService, '[ValidateIssue]'); const issueValidationSystemPrompt = prompts.issueValidation.systemPrompt; - // Determine if we should use structured output (Claude/Codex support it, Cursor/OpenCode don't) - const useStructuredOutput = isClaudeModel(model) || isCodexModel(model); + // Determine if we should use structured output based on model type + // Claude and Codex support it; Cursor, Gemini, OpenCode, Copilot don't + const useStructuredOutput = supportsStructuredOutput(model); // Build the final prompt - for Cursor, include system prompt and JSON schema instructions let finalPrompt = basePrompt; diff --git a/libs/types/src/index.ts b/libs/types/src/index.ts index a4a7635e..29a12ae5 100644 --- a/libs/types/src/index.ts +++ b/libs/types/src/index.ts @@ -272,6 +272,7 @@ export { getBareModelId, normalizeModelString, validateBareModelId, + supportsStructuredOutput, } from './provider-utils.js'; // Model migration utilities diff --git a/libs/types/src/provider-utils.ts b/libs/types/src/provider-utils.ts index 025322e6..eadc41bb 100644 --- a/libs/types/src/provider-utils.ts +++ b/libs/types/src/provider-utils.ts @@ -345,6 +345,34 @@ export function normalizeModelString(model: string | undefined | null): string { return model; } +/** + * Check if a model supports structured output (JSON schema) + * + * Structured output is a feature that allows the model to return responses + * conforming to a JSON schema. Currently supported by: + * - Claude models (native Anthropic API support) + * - Codex/OpenAI models (via response_format with json_schema) + * + * Models that do NOT support structured output: + * - Cursor models (uses different API format) + * - OpenCode models (various backend providers) + * - Gemini models (different API) + * - Copilot models (proxy to various backends) + * + * @param model - Model string to check + * @returns true if the model supports structured output + * + * @example + * supportsStructuredOutput('sonnet') // true (Claude) + * supportsStructuredOutput('claude-sonnet-4-20250514') // true (Claude) + * supportsStructuredOutput('codex-gpt-5.2') // true (Codex/OpenAI) + * supportsStructuredOutput('cursor-auto') // false + * supportsStructuredOutput('gemini-2.5-pro') // false + */ +export function supportsStructuredOutput(model: string | undefined | null): boolean { + return isClaudeModel(model) || isCodexModel(model); +} + /** * Validate that a model ID does not contain a provider prefix * From db87e83aedd4936cd1aee9ffe665415e83af3e04 Mon Sep 17 00:00:00 2001 From: Shirone Date: Sat, 24 Jan 2026 18:34:46 +0100 Subject: [PATCH 2/5] fix: Address PR feedback for structured output fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Throw error immediately when JSON extraction fails in generate-features-from-spec.ts to avoid redundant parsing attempt (feedback from Gemini Code Assist review) - Emit spec_regeneration_error event before throwing for consistency - Fix TypeScript cast in sync-spec.ts by using double cast through unknown 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../app-spec/generate-features-from-spec.ts | 17 ++++++++++++++--- apps/server/src/routes/app-spec/sync-spec.ts | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/apps/server/src/routes/app-spec/generate-features-from-spec.ts b/apps/server/src/routes/app-spec/generate-features-from-spec.ts index e614113a..95e550e0 100644 --- a/apps/server/src/routes/app-spec/generate-features-from-spec.ts +++ b/apps/server/src/routes/app-spec/generate-features-from-spec.ts @@ -286,9 +286,20 @@ Your entire response should be valid JSON starting with { and ending with }. No contentForParsing = JSON.stringify(extracted); logger.info('✅ Pre-extracted JSON from text response'); } else { - // Fall back to raw text (let parseAndCreateFeatures try its extraction) - contentForParsing = rawText; - logger.warn('⚠️ Could not pre-extract JSON, passing raw text to parser'); + // If pre-extraction fails, we know the next step will also fail. + // Throw an error here to avoid redundant parsing and make the failure point clearer. + logger.error( + '❌ Could not extract features JSON from model response. Full response text was:\n' + + rawText + ); + const errorMessage = + 'Failed to parse features from model response: No valid JSON with a "features" array found.'; + events.emit('spec-regeneration:event', { + type: 'spec_regeneration_error', + error: errorMessage, + projectPath: projectPath, + }); + throw new Error(errorMessage); } } diff --git a/apps/server/src/routes/app-spec/sync-spec.ts b/apps/server/src/routes/app-spec/sync-spec.ts index d36b6808..d1ba139d 100644 --- a/apps/server/src/routes/app-spec/sync-spec.ts +++ b/apps/server/src/routes/app-spec/sync-spec.ts @@ -261,7 +261,7 @@ CRITICAL INSTRUCTIONS: if (techResult.structured_output) { // Use structured output from Claude/Codex models - const structured = techResult.structured_output as TechStackExtractionResult; + const structured = techResult.structured_output as unknown as TechStackExtractionResult; if (Array.isArray(structured.technologies)) { parsedTechnologies = structured.technologies; logger.info('✅ Received structured output for tech analysis'); From 1b2bf0df3f2bbb5c42d1f0f4fe11b5d87ac35ab7 Mon Sep 17 00:00:00 2001 From: Shirone Date: Sat, 24 Jan 2026 20:23:34 +0100 Subject: [PATCH 3/5] feat: Extend timeout handling for Codex model feature generation - Introduced a dedicated 5-minute timeout for Codex models during feature generation to accommodate slower response times when generating 50+ features. - Updated the CodexProvider to utilize this extended timeout based on the reasoning effort level. - Enhanced the feature generation logic in generate-features-from-spec.ts to detect Codex models and apply the appropriate timeout. - Modified the model resolver to include reasoning effort in the resolved phase model structure. This change improves the reliability of feature generation for Codex models, ensuring they have sufficient time to process requests effectively. --- apps/server/src/providers/codex-provider.ts | 14 ++++++++++- .../app-spec/generate-features-from-spec.ts | 24 +++++++++++++++++-- libs/model-resolver/src/resolver.ts | 11 +++++++-- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/apps/server/src/providers/codex-provider.ts b/apps/server/src/providers/codex-provider.ts index 18838cb8..5c200ea5 100644 --- a/apps/server/src/providers/codex-provider.ts +++ b/apps/server/src/providers/codex-provider.ts @@ -98,9 +98,14 @@ const TEXT_ENCODING = 'utf-8'; * This is the "no output" timeout - if the CLI doesn't produce any JSONL output * for this duration, the process is killed. For reasoning models with high * reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout(). + * + * For feature generation (which can generate 50+ features), we use a much longer + * base timeout (5 minutes) since Codex models are slower at generating large JSON responses. + * * @see calculateReasoningTimeout from @automaker/types */ const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS; +const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000; // 5 minutes for feature generation const CONTEXT_WINDOW_256K = 256000; const MAX_OUTPUT_32K = 32000; const MAX_OUTPUT_16K = 16000; @@ -827,7 +832,14 @@ export class CodexProvider extends BaseProvider { // Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time // for the model to generate reasoning tokens before producing output. // This fixes GitHub issue #530 where features would get stuck with reasoning models. - const timeout = calculateReasoningTimeout(options.reasoningEffort, CODEX_CLI_TIMEOUT_MS); + // + // For feature generation with 'xhigh', use the extended 5-minute base timeout + // since generating 50+ features takes significantly longer than normal operations. + const baseTimeout = + options.reasoningEffort === 'xhigh' + ? CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS + : CODEX_CLI_TIMEOUT_MS; + const timeout = calculateReasoningTimeout(options.reasoningEffort, baseTimeout); const stream = spawnJSONLProcess({ command: commandPath, diff --git a/apps/server/src/routes/app-spec/generate-features-from-spec.ts b/apps/server/src/routes/app-spec/generate-features-from-spec.ts index 95e550e0..6558256b 100644 --- a/apps/server/src/routes/app-spec/generate-features-from-spec.ts +++ b/apps/server/src/routes/app-spec/generate-features-from-spec.ts @@ -8,7 +8,7 @@ import * as secureFs from '../../lib/secure-fs.js'; import type { EventEmitter } from '../../lib/events.js'; import { createLogger } from '@automaker/utils'; -import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types'; +import { DEFAULT_PHASE_MODELS, supportsStructuredOutput, isCodexModel } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { streamingQuery } from '../../providers/simple-query-service.js'; import { parseAndCreateFeatures } from './parse-and-create-features.js'; @@ -26,6 +26,12 @@ const logger = createLogger('SpecRegeneration'); const DEFAULT_MAX_FEATURES = 50; +/** + * Timeout for Codex models when generating features (5 minutes). + * Codex models are slower and need more time to generate 50+ features. + */ +const CODEX_FEATURE_GENERATION_TIMEOUT_MS = 300000; // 5 minutes + /** * Type for extracted features JSON response */ @@ -189,10 +195,23 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb provider: undefined, credentials: undefined, }; - const { model, thinkingLevel } = resolvePhaseModel(phaseModelEntry); + const { model, thinkingLevel, reasoningEffort } = resolvePhaseModel(phaseModelEntry); logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API'); + // Codex models need extended timeout for generating many features. + // Use 'xhigh' reasoning effort to get 5-minute timeout (300s base * 1.0x = 300s). + // The Codex provider has a special 5-minute base timeout for feature generation. + const isCodex = isCodexModel(model); + const effectiveReasoningEffort = isCodex ? 'xhigh' : reasoningEffort; + + if (isCodex) { + logger.info('Codex model detected - using extended timeout (5 minutes for feature generation)'); + } + if (effectiveReasoningEffort) { + logger.info('Reasoning effort:', effectiveReasoningEffort); + } + // Determine if we should use structured output based on model type const useStructuredOutput = supportsStructuredOutput(model); logger.info( @@ -239,6 +258,7 @@ Your entire response should be valid JSON starting with { and ending with }. No allowedTools: ['Read', 'Glob', 'Grep'], abortController, thinkingLevel, + reasoningEffort: effectiveReasoningEffort, // Extended timeout for Codex models readOnly: true, // Feature generation only reads code, doesn't write settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined, claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration diff --git a/libs/model-resolver/src/resolver.ts b/libs/model-resolver/src/resolver.ts index d642ecde..ebb36c44 100644 --- a/libs/model-resolver/src/resolver.ts +++ b/libs/model-resolver/src/resolver.ts @@ -32,6 +32,7 @@ import { migrateModelId, type PhaseModelEntry, type ThinkingLevel, + type ReasoningEffort, } from '@automaker/types'; // Pattern definitions for Codex/OpenAI models @@ -162,8 +163,10 @@ export function getEffectiveModel( export interface ResolvedPhaseModel { /** Resolved model string (full model ID) */ model: string; - /** Optional thinking level for extended thinking */ + /** Optional thinking level for extended thinking (Claude models) */ thinkingLevel?: ThinkingLevel; + /** Optional reasoning effort for timeout calculation (Codex models) */ + reasoningEffort?: ReasoningEffort; /** Provider ID if using a ClaudeCompatibleProvider */ providerId?: string; } @@ -205,6 +208,7 @@ export function resolvePhaseModel( return { model: resolveModelString(undefined, defaultModel), thinkingLevel: undefined, + reasoningEffort: undefined, }; } @@ -214,12 +218,13 @@ export function resolvePhaseModel( return { model: resolveModelString(phaseModel, defaultModel), thinkingLevel: undefined, + reasoningEffort: undefined, }; } // Handle new PhaseModelEntry object format console.log( - `[ModelResolver] phaseModel is object format: model="${phaseModel.model}", thinkingLevel="${phaseModel.thinkingLevel}", providerId="${phaseModel.providerId}"` + `[ModelResolver] phaseModel is object format: model="${phaseModel.model}", thinkingLevel="${phaseModel.thinkingLevel}", reasoningEffort="${phaseModel.reasoningEffort}", providerId="${phaseModel.providerId}"` ); // If providerId is set, pass through the model string unchanged @@ -231,6 +236,7 @@ export function resolvePhaseModel( return { model: phaseModel.model, // Pass through unchanged thinkingLevel: phaseModel.thinkingLevel, + reasoningEffort: phaseModel.reasoningEffort, providerId: phaseModel.providerId, }; } @@ -239,5 +245,6 @@ export function resolvePhaseModel( return { model: resolveModelString(phaseModel.model, defaultModel), thinkingLevel: phaseModel.thinkingLevel, + reasoningEffort: phaseModel.reasoningEffort, }; } From 43661e5a6e8d1db898f82390af5002cf3ac7ced1 Mon Sep 17 00:00:00 2001 From: Shirone Date: Sat, 24 Jan 2026 20:41:25 +0100 Subject: [PATCH 4/5] fix: adress pr comments --- libs/types/src/provider-utils.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/libs/types/src/provider-utils.ts b/libs/types/src/provider-utils.ts index eadc41bb..772d4d7c 100644 --- a/libs/types/src/provider-utils.ts +++ b/libs/types/src/provider-utils.ts @@ -7,7 +7,7 @@ */ import type { ModelProvider } from './settings.js'; -import { CURSOR_MODEL_MAP, LEGACY_CURSOR_MODEL_MAP } from './cursor-models.js'; +import { LEGACY_CURSOR_MODEL_MAP } from './cursor-models.js'; import { CLAUDE_MODEL_MAP, CODEX_MODEL_MAP } from './model.js'; import { OPENCODE_MODEL_CONFIG_MAP, LEGACY_OPENCODE_MODEL_MAP } from './opencode-models.js'; import { GEMINI_MODEL_MAP } from './gemini-models.js'; @@ -370,6 +370,16 @@ export function normalizeModelString(model: string | undefined | null): string { * supportsStructuredOutput('gemini-2.5-pro') // false */ export function supportsStructuredOutput(model: string | undefined | null): boolean { + // Exclude proxy providers first - they may have Claude/Codex in the model name + // but route through different APIs that don't support structured output + if ( + isCursorModel(model) || + isGeminiModel(model) || + isOpencodeModel(model) || + isCopilotModel(model) + ) { + return false; + } return isClaudeModel(model) || isCodexModel(model); } From a3e536b8e60902d6a659aca42d44d742b766b62f Mon Sep 17 00:00:00 2001 From: Shirone Date: Sat, 24 Jan 2026 20:53:40 +0100 Subject: [PATCH 5/5] test: Update codex provider timeout calculation for feature generation --- apps/server/tests/unit/providers/codex-provider.test.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/apps/server/tests/unit/providers/codex-provider.test.ts b/apps/server/tests/unit/providers/codex-provider.test.ts index ee9c7bad..a0bd25f6 100644 --- a/apps/server/tests/unit/providers/codex-provider.test.ts +++ b/apps/server/tests/unit/providers/codex-provider.test.ts @@ -325,8 +325,12 @@ describe('codex-provider.ts', () => { ); const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0]; - // xhigh reasoning effort should have 4x the default timeout (120000ms) - expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh); + // xhigh reasoning effort uses 5-minute base timeout (300000ms) for feature generation + // then applies 4x multiplier: 300000 * 4.0 = 1200000ms (20 minutes) + const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000; + expect(call.timeout).toBe( + CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh + ); }); it('uses default timeout when no reasoning effort is specified', async () => {