From 92b1fb37254f0e20b064393fc16320eea20cf4ea Mon Sep 17 00:00:00 2001 From: Shirone Date: Sat, 24 Jan 2026 18:25:39 +0100 Subject: [PATCH] fix: Add structured output fallback for non-Claude models in app spec generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the app spec generation failing for non-Claude models (Cursor, Gemini, OpenCode, Copilot) that don't support structured output capabilities. Changes: - Add `supportsStructuredOutput()` utility function in @automaker/types to centralize model capability detection - Update generate-features-from-spec.ts: - Add explicit JSON instructions for non-Claude/Codex models - Define featuresOutputSchema for structured output - Pre-extract JSON from text responses using extractJsonWithArray - Handle both structured_output and text responses properly - Update generate-spec.ts: - Replace isCursorModel with supportsStructuredOutput for consistency - Update sync-spec.ts: - Add techStackOutputSchema for structured output - Add JSON extraction fallback for text responses - Handle both structured_output and text parsing - Update validate-issue.ts: - Use supportsStructuredOutput for cleaner capability detection The fix follows the same pattern used in generate-spec.ts where non-Claude models receive explicit JSON formatting instructions in the prompt and responses are parsed using extractJson utilities. Fixes #669 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .../app-spec/generate-features-from-spec.ts | 139 ++++++++++++++++-- .../src/routes/app-spec/generate-spec.ts | 11 +- apps/server/src/routes/app-spec/sync-spec.ts | 120 +++++++++++---- .../routes/github/routes/validate-issue.ts | 6 +- libs/types/src/index.ts | 1 + libs/types/src/provider-utils.ts | 28 ++++ 6 files changed, 261 insertions(+), 44 deletions(-) diff --git a/apps/server/src/routes/app-spec/generate-features-from-spec.ts b/apps/server/src/routes/app-spec/generate-features-from-spec.ts index 56058cb7..e614113a 100644 --- a/apps/server/src/routes/app-spec/generate-features-from-spec.ts +++ b/apps/server/src/routes/app-spec/generate-features-from-spec.ts @@ -8,10 +8,11 @@ import * as secureFs from '../../lib/secure-fs.js'; import type { EventEmitter } from '../../lib/events.js'; import { createLogger } from '@automaker/utils'; -import { DEFAULT_PHASE_MODELS } from '@automaker/types'; +import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { streamingQuery } from '../../providers/simple-query-service.js'; import { parseAndCreateFeatures } from './parse-and-create-features.js'; +import { extractJsonWithArray } from '../../lib/json-extractor.js'; import { getAppSpecPath } from '@automaker/platform'; import type { SettingsService } from '../../services/settings-service.js'; import { @@ -25,6 +26,58 @@ const logger = createLogger('SpecRegeneration'); const DEFAULT_MAX_FEATURES = 50; +/** + * Type for extracted features JSON response + */ +interface FeaturesExtractionResult { + features: Array<{ + id: string; + category?: string; + title: string; + description: string; + priority?: number; + complexity?: 'simple' | 'moderate' | 'complex'; + dependencies?: string[]; + }>; +} + +/** + * JSON schema for features output format (Claude/Codex structured output) + */ +const featuresOutputSchema = { + type: 'object', + properties: { + features: { + type: 'array', + items: { + type: 'object', + properties: { + id: { type: 'string', description: 'Unique feature identifier (kebab-case)' }, + category: { type: 'string', description: 'Feature category' }, + title: { type: 'string', description: 'Short, descriptive title' }, + description: { type: 'string', description: 'Detailed feature description' }, + priority: { + type: 'number', + description: 'Priority level: 1 (highest) to 5 (lowest)', + }, + complexity: { + type: 'string', + enum: ['simple', 'moderate', 'complex'], + description: 'Implementation complexity', + }, + dependencies: { + type: 'array', + items: { type: 'string' }, + description: 'IDs of features this depends on', + }, + }, + required: ['id', 'title', 'description'], + }, + }, + }, + required: ['features'], +} as const; + export async function generateFeaturesFromSpec( projectPath: string, events: EventEmitter, @@ -140,9 +193,46 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API'); + // Determine if we should use structured output based on model type + const useStructuredOutput = supportsStructuredOutput(model); + logger.info( + `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}` + ); + + // Build the final prompt - for non-Claude/Codex models, include explicit JSON instructions + let finalPrompt = prompt; + if (!useStructuredOutput) { + finalPrompt = `${prompt} + +CRITICAL INSTRUCTIONS: +1. DO NOT write any files. Return the JSON in your response only. +2. After analyzing the spec, respond with ONLY a JSON object - no explanations, no markdown, just raw JSON. +3. The JSON must have this exact structure: +{ + "features": [ + { + "id": "unique-feature-id", + "category": "Category Name", + "title": "Short Feature Title", + "description": "Detailed description of the feature", + "priority": 1, + "complexity": "simple|moderate|complex", + "dependencies": ["other-feature-id"] + } + ] +} + +4. Feature IDs must be unique, lowercase, kebab-case (e.g., "user-authentication", "data-export") +5. Priority ranges from 1 (highest) to 5 (lowest) +6. Complexity must be one of: "simple", "moderate", "complex" +7. Dependencies is an array of feature IDs that must be completed first (can be empty) + +Your entire response should be valid JSON starting with { and ending with }. No text before or after.`; + } + // Use streamingQuery with event callbacks const result = await streamingQuery({ - prompt, + prompt: finalPrompt, model, cwd: projectPath, maxTurns: 250, @@ -153,6 +243,12 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined, claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration credentials, // Pass credentials for resolving 'credentials' apiKeySource + outputFormat: useStructuredOutput + ? { + type: 'json_schema', + schema: featuresOutputSchema, + } + : undefined, onText: (text) => { logger.debug(`Feature text block received (${text.length} chars)`); events.emit('spec-regeneration:event', { @@ -163,15 +259,40 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb }, }); - const responseText = result.text; + // Get response content - prefer structured output if available + let contentForParsing: string; - logger.info(`Feature stream complete.`); - logger.info(`Feature response length: ${responseText.length} chars`); - logger.info('========== FULL RESPONSE TEXT =========='); - logger.info(responseText); - logger.info('========== END RESPONSE TEXT =========='); + if (result.structured_output) { + // Use structured output from Claude/Codex models + logger.info('✅ Received structured output from model'); + contentForParsing = JSON.stringify(result.structured_output); + logger.debug('Structured output:', contentForParsing); + } else { + // Use text response (for non-Claude/Codex models or fallback) + // Pre-extract JSON to handle conversational text that may surround the JSON response + // This follows the same pattern used in generate-spec.ts and validate-issue.ts + const rawText = result.text; + logger.info(`Feature stream complete.`); + logger.info(`Feature response length: ${rawText.length} chars`); + logger.info('========== FULL RESPONSE TEXT =========='); + logger.info(rawText); + logger.info('========== END RESPONSE TEXT =========='); - await parseAndCreateFeatures(projectPath, responseText, events); + // Pre-extract JSON from response - handles conversational text around the JSON + const extracted = extractJsonWithArray(rawText, 'features', { + logger, + }); + if (extracted) { + contentForParsing = JSON.stringify(extracted); + logger.info('✅ Pre-extracted JSON from text response'); + } else { + // Fall back to raw text (let parseAndCreateFeatures try its extraction) + contentForParsing = rawText; + logger.warn('⚠️ Could not pre-extract JSON, passing raw text to parser'); + } + } + + await parseAndCreateFeatures(projectPath, contentForParsing, events); logger.debug('========== generateFeaturesFromSpec() completed =========='); } diff --git a/apps/server/src/routes/app-spec/generate-spec.ts b/apps/server/src/routes/app-spec/generate-spec.ts index 0f826d76..bd47e9ea 100644 --- a/apps/server/src/routes/app-spec/generate-spec.ts +++ b/apps/server/src/routes/app-spec/generate-spec.ts @@ -9,7 +9,7 @@ import * as secureFs from '../../lib/secure-fs.js'; import type { EventEmitter } from '../../lib/events.js'; import { specOutputSchema, specToXml, type SpecOutput } from '../../lib/app-spec-format.js'; import { createLogger } from '@automaker/utils'; -import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types'; +import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { extractJson } from '../../lib/json-extractor.js'; import { streamingQuery } from '../../providers/simple-query-service.js'; @@ -120,10 +120,13 @@ ${prompts.appSpec.structuredSpecInstructions}`; let responseText = ''; let structuredOutput: SpecOutput | null = null; - // Determine if we should use structured output (Claude supports it, Cursor doesn't) - const useStructuredOutput = !isCursorModel(model); + // Determine if we should use structured output based on model type + const useStructuredOutput = supportsStructuredOutput(model); + logger.info( + `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}` + ); - // Build the final prompt - for Cursor, include JSON schema instructions + // Build the final prompt - for non-Claude/Codex models, include JSON schema instructions let finalPrompt = prompt; if (!useStructuredOutput) { finalPrompt = `${prompt} diff --git a/apps/server/src/routes/app-spec/sync-spec.ts b/apps/server/src/routes/app-spec/sync-spec.ts index af5139dd..d36b6808 100644 --- a/apps/server/src/routes/app-spec/sync-spec.ts +++ b/apps/server/src/routes/app-spec/sync-spec.ts @@ -10,9 +10,10 @@ import * as secureFs from '../../lib/secure-fs.js'; import type { EventEmitter } from '../../lib/events.js'; import { createLogger } from '@automaker/utils'; -import { DEFAULT_PHASE_MODELS } from '@automaker/types'; +import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { streamingQuery } from '../../providers/simple-query-service.js'; +import { extractJson } from '../../lib/json-extractor.js'; import { getAppSpecPath } from '@automaker/platform'; import type { SettingsService } from '../../services/settings-service.js'; import { @@ -34,6 +35,28 @@ import { getNotificationService } from '../../services/notification-service.js'; const logger = createLogger('SpecSync'); +/** + * Type for extracted tech stack JSON response + */ +interface TechStackExtractionResult { + technologies: string[]; +} + +/** + * JSON schema for tech stack analysis output (Claude/Codex structured output) + */ +const techStackOutputSchema = { + type: 'object', + properties: { + technologies: { + type: 'array', + items: { type: 'string' }, + description: 'List of technologies detected in the project', + }, + }, + required: ['technologies'], +} as const; + /** * Result of a sync operation */ @@ -176,8 +199,14 @@ export async function syncSpec( logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API'); + // Determine if we should use structured output based on model type + const useStructuredOutput = supportsStructuredOutput(model); + logger.info( + `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}` + ); + // Use AI to analyze tech stack - const techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack. + let techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack. Current known technologies: ${currentTechStack.join(', ')} @@ -193,6 +222,16 @@ Return ONLY this JSON format, no other text: "technologies": ["Technology 1", "Technology 2", ...] }`; + // Add explicit JSON instructions for non-Claude/Codex models + if (!useStructuredOutput) { + techAnalysisPrompt = `${techAnalysisPrompt} + +CRITICAL INSTRUCTIONS: +1. DO NOT write any files. Return the JSON in your response only. +2. Your entire response should be valid JSON starting with { and ending with }. +3. No explanations, no markdown, no text before or after the JSON.`; + } + try { const techResult = await streamingQuery({ prompt: techAnalysisPrompt, @@ -206,44 +245,67 @@ Return ONLY this JSON format, no other text: settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined, claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration credentials, // Pass credentials for resolving 'credentials' apiKeySource + outputFormat: useStructuredOutput + ? { + type: 'json_schema', + schema: techStackOutputSchema, + } + : undefined, onText: (text) => { logger.debug(`Tech analysis text: ${text.substring(0, 100)}`); }, }); - // Parse tech stack from response - const jsonMatch = techResult.text.match(/\{[\s\S]*"technologies"[\s\S]*\}/); - if (jsonMatch) { - const parsed = JSON.parse(jsonMatch[0]); - if (Array.isArray(parsed.technologies)) { - const newTechStack = parsed.technologies as string[]; + // Parse tech stack from response - prefer structured output if available + let parsedTechnologies: string[] | null = null; - // Calculate differences - const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase())); - const newSet = new Set(newTechStack.map((t) => t.toLowerCase())); + if (techResult.structured_output) { + // Use structured output from Claude/Codex models + const structured = techResult.structured_output as TechStackExtractionResult; + if (Array.isArray(structured.technologies)) { + parsedTechnologies = structured.technologies; + logger.info('✅ Received structured output for tech analysis'); + } + } else { + // Fall back to text parsing for non-Claude/Codex models + const extracted = extractJson(techResult.text, { + logger, + requiredKey: 'technologies', + requireArray: true, + }); + if (extracted && Array.isArray(extracted.technologies)) { + parsedTechnologies = extracted.technologies; + logger.info('✅ Extracted tech stack from text response'); + } else { + logger.warn('⚠️ Failed to extract tech stack JSON from response'); + } + } - for (const tech of newTechStack) { - if (!currentSet.has(tech.toLowerCase())) { - result.techStackUpdates.added.push(tech); - } + if (parsedTechnologies) { + const newTechStack = parsedTechnologies; + + // Calculate differences + const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase())); + const newSet = new Set(newTechStack.map((t) => t.toLowerCase())); + + for (const tech of newTechStack) { + if (!currentSet.has(tech.toLowerCase())) { + result.techStackUpdates.added.push(tech); } + } - for (const tech of currentTechStack) { - if (!newSet.has(tech.toLowerCase())) { - result.techStackUpdates.removed.push(tech); - } + for (const tech of currentTechStack) { + if (!newSet.has(tech.toLowerCase())) { + result.techStackUpdates.removed.push(tech); } + } - // Update spec with new tech stack if there are changes - if ( - result.techStackUpdates.added.length > 0 || - result.techStackUpdates.removed.length > 0 - ) { - specContent = updateTechnologyStack(specContent, newTechStack); - logger.info( - `Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}` - ); - } + // Update spec with new tech stack if there are changes + if (result.techStackUpdates.added.length > 0 || result.techStackUpdates.removed.length > 0) { + specContent = updateTechnologyStack(specContent, newTechStack); + logger.info( + `Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}` + ); } } } catch (error) { diff --git a/apps/server/src/routes/github/routes/validate-issue.ts b/apps/server/src/routes/github/routes/validate-issue.ts index 10465829..69a13b83 100644 --- a/apps/server/src/routes/github/routes/validate-issue.ts +++ b/apps/server/src/routes/github/routes/validate-issue.ts @@ -23,6 +23,7 @@ import { isCodexModel, isCursorModel, isOpencodeModel, + supportsStructuredOutput, } from '@automaker/types'; import { resolvePhaseModel } from '@automaker/model-resolver'; import { extractJson } from '../../../lib/json-extractor.js'; @@ -124,8 +125,9 @@ async function runValidation( const prompts = await getPromptCustomization(settingsService, '[ValidateIssue]'); const issueValidationSystemPrompt = prompts.issueValidation.systemPrompt; - // Determine if we should use structured output (Claude/Codex support it, Cursor/OpenCode don't) - const useStructuredOutput = isClaudeModel(model) || isCodexModel(model); + // Determine if we should use structured output based on model type + // Claude and Codex support it; Cursor, Gemini, OpenCode, Copilot don't + const useStructuredOutput = supportsStructuredOutput(model); // Build the final prompt - for Cursor, include system prompt and JSON schema instructions let finalPrompt = basePrompt; diff --git a/libs/types/src/index.ts b/libs/types/src/index.ts index a4a7635e..29a12ae5 100644 --- a/libs/types/src/index.ts +++ b/libs/types/src/index.ts @@ -272,6 +272,7 @@ export { getBareModelId, normalizeModelString, validateBareModelId, + supportsStructuredOutput, } from './provider-utils.js'; // Model migration utilities diff --git a/libs/types/src/provider-utils.ts b/libs/types/src/provider-utils.ts index 025322e6..eadc41bb 100644 --- a/libs/types/src/provider-utils.ts +++ b/libs/types/src/provider-utils.ts @@ -345,6 +345,34 @@ export function normalizeModelString(model: string | undefined | null): string { return model; } +/** + * Check if a model supports structured output (JSON schema) + * + * Structured output is a feature that allows the model to return responses + * conforming to a JSON schema. Currently supported by: + * - Claude models (native Anthropic API support) + * - Codex/OpenAI models (via response_format with json_schema) + * + * Models that do NOT support structured output: + * - Cursor models (uses different API format) + * - OpenCode models (various backend providers) + * - Gemini models (different API) + * - Copilot models (proxy to various backends) + * + * @param model - Model string to check + * @returns true if the model supports structured output + * + * @example + * supportsStructuredOutput('sonnet') // true (Claude) + * supportsStructuredOutput('claude-sonnet-4-20250514') // true (Claude) + * supportsStructuredOutput('codex-gpt-5.2') // true (Codex/OpenAI) + * supportsStructuredOutput('cursor-auto') // false + * supportsStructuredOutput('gemini-2.5-pro') // false + */ +export function supportsStructuredOutput(model: string | undefined | null): boolean { + return isClaudeModel(model) || isCodexModel(model); +} + /** * Validate that a model ID does not contain a provider prefix *