Merge pull request #682 from AutoMaker-Org/feature/bug-fix-app-spec-generation-for-non-claude-models-dgq0

fix: Add structured output fallback for non-Claude models in app spec generation
This commit is contained in:
Shirone
2026-01-24 19:57:48 +00:00
committed by GitHub
9 changed files with 332 additions and 51 deletions

View File

@@ -98,9 +98,14 @@ const TEXT_ENCODING = 'utf-8';
* This is the "no output" timeout - if the CLI doesn't produce any JSONL output
* for this duration, the process is killed. For reasoning models with high
* reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout().
*
* For feature generation (which can generate 50+ features), we use a much longer
* base timeout (5 minutes) since Codex models are slower at generating large JSON responses.
*
* @see calculateReasoningTimeout from @automaker/types
*/
const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS;
const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000; // 5 minutes for feature generation
const CONTEXT_WINDOW_256K = 256000;
const MAX_OUTPUT_32K = 32000;
const MAX_OUTPUT_16K = 16000;
@@ -827,7 +832,14 @@ export class CodexProvider extends BaseProvider {
// Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time
// for the model to generate reasoning tokens before producing output.
// This fixes GitHub issue #530 where features would get stuck with reasoning models.
const timeout = calculateReasoningTimeout(options.reasoningEffort, CODEX_CLI_TIMEOUT_MS);
//
// For feature generation with 'xhigh', use the extended 5-minute base timeout
// since generating 50+ features takes significantly longer than normal operations.
const baseTimeout =
options.reasoningEffort === 'xhigh'
? CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS
: CODEX_CLI_TIMEOUT_MS;
const timeout = calculateReasoningTimeout(options.reasoningEffort, baseTimeout);
const stream = spawnJSONLProcess({
command: commandPath,

View File

@@ -8,10 +8,11 @@
import * as secureFs from '../../lib/secure-fs.js';
import type { EventEmitter } from '../../lib/events.js';
import { createLogger } from '@automaker/utils';
import { DEFAULT_PHASE_MODELS } from '@automaker/types';
import { DEFAULT_PHASE_MODELS, supportsStructuredOutput, isCodexModel } from '@automaker/types';
import { resolvePhaseModel } from '@automaker/model-resolver';
import { streamingQuery } from '../../providers/simple-query-service.js';
import { parseAndCreateFeatures } from './parse-and-create-features.js';
import { extractJsonWithArray } from '../../lib/json-extractor.js';
import { getAppSpecPath } from '@automaker/platform';
import type { SettingsService } from '../../services/settings-service.js';
import {
@@ -25,6 +26,64 @@ const logger = createLogger('SpecRegeneration');
const DEFAULT_MAX_FEATURES = 50;
/**
* Timeout for Codex models when generating features (5 minutes).
* Codex models are slower and need more time to generate 50+ features.
*/
const CODEX_FEATURE_GENERATION_TIMEOUT_MS = 300000; // 5 minutes
/**
* Type for extracted features JSON response
*/
interface FeaturesExtractionResult {
features: Array<{
id: string;
category?: string;
title: string;
description: string;
priority?: number;
complexity?: 'simple' | 'moderate' | 'complex';
dependencies?: string[];
}>;
}
/**
* JSON schema for features output format (Claude/Codex structured output)
*/
const featuresOutputSchema = {
type: 'object',
properties: {
features: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string', description: 'Unique feature identifier (kebab-case)' },
category: { type: 'string', description: 'Feature category' },
title: { type: 'string', description: 'Short, descriptive title' },
description: { type: 'string', description: 'Detailed feature description' },
priority: {
type: 'number',
description: 'Priority level: 1 (highest) to 5 (lowest)',
},
complexity: {
type: 'string',
enum: ['simple', 'moderate', 'complex'],
description: 'Implementation complexity',
},
dependencies: {
type: 'array',
items: { type: 'string' },
description: 'IDs of features this depends on',
},
},
required: ['id', 'title', 'description'],
},
},
},
required: ['features'],
} as const;
export async function generateFeaturesFromSpec(
projectPath: string,
events: EventEmitter,
@@ -136,23 +195,80 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb
provider: undefined,
credentials: undefined,
};
const { model, thinkingLevel } = resolvePhaseModel(phaseModelEntry);
const { model, thinkingLevel, reasoningEffort } = resolvePhaseModel(phaseModelEntry);
logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
// Codex models need extended timeout for generating many features.
// Use 'xhigh' reasoning effort to get 5-minute timeout (300s base * 1.0x = 300s).
// The Codex provider has a special 5-minute base timeout for feature generation.
const isCodex = isCodexModel(model);
const effectiveReasoningEffort = isCodex ? 'xhigh' : reasoningEffort;
if (isCodex) {
logger.info('Codex model detected - using extended timeout (5 minutes for feature generation)');
}
if (effectiveReasoningEffort) {
logger.info('Reasoning effort:', effectiveReasoningEffort);
}
// Determine if we should use structured output based on model type
const useStructuredOutput = supportsStructuredOutput(model);
logger.info(
`Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
);
// Build the final prompt - for non-Claude/Codex models, include explicit JSON instructions
let finalPrompt = prompt;
if (!useStructuredOutput) {
finalPrompt = `${prompt}
CRITICAL INSTRUCTIONS:
1. DO NOT write any files. Return the JSON in your response only.
2. After analyzing the spec, respond with ONLY a JSON object - no explanations, no markdown, just raw JSON.
3. The JSON must have this exact structure:
{
"features": [
{
"id": "unique-feature-id",
"category": "Category Name",
"title": "Short Feature Title",
"description": "Detailed description of the feature",
"priority": 1,
"complexity": "simple|moderate|complex",
"dependencies": ["other-feature-id"]
}
]
}
4. Feature IDs must be unique, lowercase, kebab-case (e.g., "user-authentication", "data-export")
5. Priority ranges from 1 (highest) to 5 (lowest)
6. Complexity must be one of: "simple", "moderate", "complex"
7. Dependencies is an array of feature IDs that must be completed first (can be empty)
Your entire response should be valid JSON starting with { and ending with }. No text before or after.`;
}
// Use streamingQuery with event callbacks
const result = await streamingQuery({
prompt,
prompt: finalPrompt,
model,
cwd: projectPath,
maxTurns: 250,
allowedTools: ['Read', 'Glob', 'Grep'],
abortController,
thinkingLevel,
reasoningEffort: effectiveReasoningEffort, // Extended timeout for Codex models
readOnly: true, // Feature generation only reads code, doesn't write
settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
credentials, // Pass credentials for resolving 'credentials' apiKeySource
outputFormat: useStructuredOutput
? {
type: 'json_schema',
schema: featuresOutputSchema,
}
: undefined,
onText: (text) => {
logger.debug(`Feature text block received (${text.length} chars)`);
events.emit('spec-regeneration:event', {
@@ -163,15 +279,51 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb
},
});
const responseText = result.text;
// Get response content - prefer structured output if available
let contentForParsing: string;
logger.info(`Feature stream complete.`);
logger.info(`Feature response length: ${responseText.length} chars`);
logger.info('========== FULL RESPONSE TEXT ==========');
logger.info(responseText);
logger.info('========== END RESPONSE TEXT ==========');
if (result.structured_output) {
// Use structured output from Claude/Codex models
logger.info('✅ Received structured output from model');
contentForParsing = JSON.stringify(result.structured_output);
logger.debug('Structured output:', contentForParsing);
} else {
// Use text response (for non-Claude/Codex models or fallback)
// Pre-extract JSON to handle conversational text that may surround the JSON response
// This follows the same pattern used in generate-spec.ts and validate-issue.ts
const rawText = result.text;
logger.info(`Feature stream complete.`);
logger.info(`Feature response length: ${rawText.length} chars`);
logger.info('========== FULL RESPONSE TEXT ==========');
logger.info(rawText);
logger.info('========== END RESPONSE TEXT ==========');
await parseAndCreateFeatures(projectPath, responseText, events);
// Pre-extract JSON from response - handles conversational text around the JSON
const extracted = extractJsonWithArray<FeaturesExtractionResult>(rawText, 'features', {
logger,
});
if (extracted) {
contentForParsing = JSON.stringify(extracted);
logger.info('✅ Pre-extracted JSON from text response');
} else {
// If pre-extraction fails, we know the next step will also fail.
// Throw an error here to avoid redundant parsing and make the failure point clearer.
logger.error(
'❌ Could not extract features JSON from model response. Full response text was:\n' +
rawText
);
const errorMessage =
'Failed to parse features from model response: No valid JSON with a "features" array found.';
events.emit('spec-regeneration:event', {
type: 'spec_regeneration_error',
error: errorMessage,
projectPath: projectPath,
});
throw new Error(errorMessage);
}
}
await parseAndCreateFeatures(projectPath, contentForParsing, events);
logger.debug('========== generateFeaturesFromSpec() completed ==========');
}

View File

@@ -9,7 +9,7 @@ import * as secureFs from '../../lib/secure-fs.js';
import type { EventEmitter } from '../../lib/events.js';
import { specOutputSchema, specToXml, type SpecOutput } from '../../lib/app-spec-format.js';
import { createLogger } from '@automaker/utils';
import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types';
import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
import { resolvePhaseModel } from '@automaker/model-resolver';
import { extractJson } from '../../lib/json-extractor.js';
import { streamingQuery } from '../../providers/simple-query-service.js';
@@ -120,10 +120,13 @@ ${prompts.appSpec.structuredSpecInstructions}`;
let responseText = '';
let structuredOutput: SpecOutput | null = null;
// Determine if we should use structured output (Claude supports it, Cursor doesn't)
const useStructuredOutput = !isCursorModel(model);
// Determine if we should use structured output based on model type
const useStructuredOutput = supportsStructuredOutput(model);
logger.info(
`Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
);
// Build the final prompt - for Cursor, include JSON schema instructions
// Build the final prompt - for non-Claude/Codex models, include JSON schema instructions
let finalPrompt = prompt;
if (!useStructuredOutput) {
finalPrompt = `${prompt}

View File

@@ -10,9 +10,10 @@
import * as secureFs from '../../lib/secure-fs.js';
import type { EventEmitter } from '../../lib/events.js';
import { createLogger } from '@automaker/utils';
import { DEFAULT_PHASE_MODELS } from '@automaker/types';
import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
import { resolvePhaseModel } from '@automaker/model-resolver';
import { streamingQuery } from '../../providers/simple-query-service.js';
import { extractJson } from '../../lib/json-extractor.js';
import { getAppSpecPath } from '@automaker/platform';
import type { SettingsService } from '../../services/settings-service.js';
import {
@@ -34,6 +35,28 @@ import { getNotificationService } from '../../services/notification-service.js';
const logger = createLogger('SpecSync');
/**
* Type for extracted tech stack JSON response
*/
interface TechStackExtractionResult {
technologies: string[];
}
/**
* JSON schema for tech stack analysis output (Claude/Codex structured output)
*/
const techStackOutputSchema = {
type: 'object',
properties: {
technologies: {
type: 'array',
items: { type: 'string' },
description: 'List of technologies detected in the project',
},
},
required: ['technologies'],
} as const;
/**
* Result of a sync operation
*/
@@ -176,8 +199,14 @@ export async function syncSpec(
logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
// Determine if we should use structured output based on model type
const useStructuredOutput = supportsStructuredOutput(model);
logger.info(
`Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
);
// Use AI to analyze tech stack
const techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack.
let techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack.
Current known technologies: ${currentTechStack.join(', ')}
@@ -193,6 +222,16 @@ Return ONLY this JSON format, no other text:
"technologies": ["Technology 1", "Technology 2", ...]
}`;
// Add explicit JSON instructions for non-Claude/Codex models
if (!useStructuredOutput) {
techAnalysisPrompt = `${techAnalysisPrompt}
CRITICAL INSTRUCTIONS:
1. DO NOT write any files. Return the JSON in your response only.
2. Your entire response should be valid JSON starting with { and ending with }.
3. No explanations, no markdown, no text before or after the JSON.`;
}
try {
const techResult = await streamingQuery({
prompt: techAnalysisPrompt,
@@ -206,44 +245,67 @@ Return ONLY this JSON format, no other text:
settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
credentials, // Pass credentials for resolving 'credentials' apiKeySource
outputFormat: useStructuredOutput
? {
type: 'json_schema',
schema: techStackOutputSchema,
}
: undefined,
onText: (text) => {
logger.debug(`Tech analysis text: ${text.substring(0, 100)}`);
},
});
// Parse tech stack from response
const jsonMatch = techResult.text.match(/\{[\s\S]*"technologies"[\s\S]*\}/);
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0]);
if (Array.isArray(parsed.technologies)) {
const newTechStack = parsed.technologies as string[];
// Parse tech stack from response - prefer structured output if available
let parsedTechnologies: string[] | null = null;
// Calculate differences
const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase()));
const newSet = new Set(newTechStack.map((t) => t.toLowerCase()));
if (techResult.structured_output) {
// Use structured output from Claude/Codex models
const structured = techResult.structured_output as unknown as TechStackExtractionResult;
if (Array.isArray(structured.technologies)) {
parsedTechnologies = structured.technologies;
logger.info('✅ Received structured output for tech analysis');
}
} else {
// Fall back to text parsing for non-Claude/Codex models
const extracted = extractJson<TechStackExtractionResult>(techResult.text, {
logger,
requiredKey: 'technologies',
requireArray: true,
});
if (extracted && Array.isArray(extracted.technologies)) {
parsedTechnologies = extracted.technologies;
logger.info('✅ Extracted tech stack from text response');
} else {
logger.warn('⚠️ Failed to extract tech stack JSON from response');
}
}
for (const tech of newTechStack) {
if (!currentSet.has(tech.toLowerCase())) {
result.techStackUpdates.added.push(tech);
}
if (parsedTechnologies) {
const newTechStack = parsedTechnologies;
// Calculate differences
const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase()));
const newSet = new Set(newTechStack.map((t) => t.toLowerCase()));
for (const tech of newTechStack) {
if (!currentSet.has(tech.toLowerCase())) {
result.techStackUpdates.added.push(tech);
}
}
for (const tech of currentTechStack) {
if (!newSet.has(tech.toLowerCase())) {
result.techStackUpdates.removed.push(tech);
}
for (const tech of currentTechStack) {
if (!newSet.has(tech.toLowerCase())) {
result.techStackUpdates.removed.push(tech);
}
}
// Update spec with new tech stack if there are changes
if (
result.techStackUpdates.added.length > 0 ||
result.techStackUpdates.removed.length > 0
) {
specContent = updateTechnologyStack(specContent, newTechStack);
logger.info(
`Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}`
);
}
// Update spec with new tech stack if there are changes
if (result.techStackUpdates.added.length > 0 || result.techStackUpdates.removed.length > 0) {
specContent = updateTechnologyStack(specContent, newTechStack);
logger.info(
`Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}`
);
}
}
} catch (error) {

View File

@@ -23,6 +23,7 @@ import {
isCodexModel,
isCursorModel,
isOpencodeModel,
supportsStructuredOutput,
} from '@automaker/types';
import { resolvePhaseModel } from '@automaker/model-resolver';
import { extractJson } from '../../../lib/json-extractor.js';
@@ -124,8 +125,9 @@ async function runValidation(
const prompts = await getPromptCustomization(settingsService, '[ValidateIssue]');
const issueValidationSystemPrompt = prompts.issueValidation.systemPrompt;
// Determine if we should use structured output (Claude/Codex support it, Cursor/OpenCode don't)
const useStructuredOutput = isClaudeModel(model) || isCodexModel(model);
// Determine if we should use structured output based on model type
// Claude and Codex support it; Cursor, Gemini, OpenCode, Copilot don't
const useStructuredOutput = supportsStructuredOutput(model);
// Build the final prompt - for Cursor, include system prompt and JSON schema instructions
let finalPrompt = basePrompt;