Merge pull request #682 from AutoMaker-Org/feature/bug-fix-app-spec-generation-for-non-claude-models-dgq0

fix: Add structured output fallback for non-Claude models in app spec generation
2026-02-01 08:13:37 +00:00 · 2026-01-24 19:57:48 +00:00
parent 900bbb5e80 a3e536b8e6
commit d12e0705f0
9 changed files with 332 additions and 51 deletions
--- a/apps/server/src/providers/codex-provider.ts
+++ b/apps/server/src/providers/codex-provider.ts
@@ -98,9 +98,14 @@ const TEXT_ENCODING = 'utf-8';
 * This is the "no output" timeout - if the CLI doesn't produce any JSONL output
 * for this duration, the process is killed. For reasoning models with high
 * reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout().
+ *
+ * For feature generation (which can generate 50+ features), we use a much longer
+ * base timeout (5 minutes) since Codex models are slower at generating large JSON responses.
+ *
 * @see calculateReasoningTimeout from @automaker/types
 */
 const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS;
+const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000; // 5 minutes for feature generation
 const CONTEXT_WINDOW_256K = 256000;
 const MAX_OUTPUT_32K = 32000;
 const MAX_OUTPUT_16K = 16000;
@@ -827,7 +832,14 @@ export class CodexProvider extends BaseProvider {
      // Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time
      // for the model to generate reasoning tokens before producing output.
      // This fixes GitHub issue #530 where features would get stuck with reasoning models.
-      const timeout = calculateReasoningTimeout(options.reasoningEffort, CODEX_CLI_TIMEOUT_MS);
+      //
+      // For feature generation with 'xhigh', use the extended 5-minute base timeout
+      // since generating 50+ features takes significantly longer than normal operations.
+      const baseTimeout =
+        options.reasoningEffort === 'xhigh'
+          ? CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS
+          : CODEX_CLI_TIMEOUT_MS;
+      const timeout = calculateReasoningTimeout(options.reasoningEffort, baseTimeout);

      const stream = spawnJSONLProcess({
        command: commandPath,
--- a/apps/server/src/routes/app-spec/generate-features-from-spec.ts
+++ b/apps/server/src/routes/app-spec/generate-features-from-spec.ts
@@ -8,10 +8,11 @@
 import * as secureFs from '../../lib/secure-fs.js';
 import type { EventEmitter } from '../../lib/events.js';
 import { createLogger } from '@automaker/utils';
-import { DEFAULT_PHASE_MODELS } from '@automaker/types';
+import { DEFAULT_PHASE_MODELS, supportsStructuredOutput, isCodexModel } from '@automaker/types';
 import { resolvePhaseModel } from '@automaker/model-resolver';
 import { streamingQuery } from '../../providers/simple-query-service.js';
 import { parseAndCreateFeatures } from './parse-and-create-features.js';
+import { extractJsonWithArray } from '../../lib/json-extractor.js';
 import { getAppSpecPath } from '@automaker/platform';
 import type { SettingsService } from '../../services/settings-service.js';
 import {
@@ -25,6 +26,64 @@ const logger = createLogger('SpecRegeneration');

 const DEFAULT_MAX_FEATURES = 50;

+/**
+ * Timeout for Codex models when generating features (5 minutes).
+ * Codex models are slower and need more time to generate 50+ features.
+ */
+const CODEX_FEATURE_GENERATION_TIMEOUT_MS = 300000; // 5 minutes
+
+/**
+ * Type for extracted features JSON response
+ */
+interface FeaturesExtractionResult {
+  features: Array<{
+    id: string;
+    category?: string;
+    title: string;
+    description: string;
+    priority?: number;
+    complexity?: 'simple' | 'moderate' | 'complex';
+    dependencies?: string[];
+  }>;
+}
+
+/**
+ * JSON schema for features output format (Claude/Codex structured output)
+ */
+const featuresOutputSchema = {
+  type: 'object',
+  properties: {
+    features: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          id: { type: 'string', description: 'Unique feature identifier (kebab-case)' },
+          category: { type: 'string', description: 'Feature category' },
+          title: { type: 'string', description: 'Short, descriptive title' },
+          description: { type: 'string', description: 'Detailed feature description' },
+          priority: {
+            type: 'number',
+            description: 'Priority level: 1 (highest) to 5 (lowest)',
+          },
+          complexity: {
+            type: 'string',
+            enum: ['simple', 'moderate', 'complex'],
+            description: 'Implementation complexity',
+          },
+          dependencies: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'IDs of features this depends on',
+          },
+        },
+        required: ['id', 'title', 'description'],
+      },
+    },
+  },
+  required: ['features'],
+} as const;
+
 export async function generateFeaturesFromSpec(
  projectPath: string,
  events: EventEmitter,
@@ -136,23 +195,80 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb
        provider: undefined,
        credentials: undefined,
      };
-  const { model, thinkingLevel } = resolvePhaseModel(phaseModelEntry);
+  const { model, thinkingLevel, reasoningEffort } = resolvePhaseModel(phaseModelEntry);

  logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');

+  // Codex models need extended timeout for generating many features.
+  // Use 'xhigh' reasoning effort to get 5-minute timeout (300s base * 1.0x = 300s).
+  // The Codex provider has a special 5-minute base timeout for feature generation.
+  const isCodex = isCodexModel(model);
+  const effectiveReasoningEffort = isCodex ? 'xhigh' : reasoningEffort;
+
+  if (isCodex) {
+    logger.info('Codex model detected - using extended timeout (5 minutes for feature generation)');
+  }
+  if (effectiveReasoningEffort) {
+    logger.info('Reasoning effort:', effectiveReasoningEffort);
+  }
+
+  // Determine if we should use structured output based on model type
+  const useStructuredOutput = supportsStructuredOutput(model);
+  logger.info(
+    `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
+  );
+
+  // Build the final prompt - for non-Claude/Codex models, include explicit JSON instructions
+  let finalPrompt = prompt;
+  if (!useStructuredOutput) {
+    finalPrompt = `${prompt}
+
+CRITICAL INSTRUCTIONS:
+1. DO NOT write any files. Return the JSON in your response only.
+2. After analyzing the spec, respond with ONLY a JSON object - no explanations, no markdown, just raw JSON.
+3. The JSON must have this exact structure:
+{
+  "features": [
+    {
+      "id": "unique-feature-id",
+      "category": "Category Name",
+      "title": "Short Feature Title",
+      "description": "Detailed description of the feature",
+      "priority": 1,
+      "complexity": "simple|moderate|complex",
+      "dependencies": ["other-feature-id"]
+    }
+  ]
+}
+
+4. Feature IDs must be unique, lowercase, kebab-case (e.g., "user-authentication", "data-export")
+5. Priority ranges from 1 (highest) to 5 (lowest)
+6. Complexity must be one of: "simple", "moderate", "complex"
+7. Dependencies is an array of feature IDs that must be completed first (can be empty)
+
+Your entire response should be valid JSON starting with { and ending with }. No text before or after.`;
+  }
+
  // Use streamingQuery with event callbacks
  const result = await streamingQuery({
-    prompt,
+    prompt: finalPrompt,
    model,
    cwd: projectPath,
    maxTurns: 250,
    allowedTools: ['Read', 'Glob', 'Grep'],
    abortController,
    thinkingLevel,
+    reasoningEffort: effectiveReasoningEffort, // Extended timeout for Codex models
    readOnly: true, // Feature generation only reads code, doesn't write
    settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
    claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
    credentials, // Pass credentials for resolving 'credentials' apiKeySource
+    outputFormat: useStructuredOutput
+      ? {
+          type: 'json_schema',
+          schema: featuresOutputSchema,
+        }
+      : undefined,
    onText: (text) => {
      logger.debug(`Feature text block received (${text.length} chars)`);
      events.emit('spec-regeneration:event', {
@@ -163,15 +279,51 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb
    },
  });

-  const responseText = result.text;
+  // Get response content - prefer structured output if available
+  let contentForParsing: string;

-  logger.info(`Feature stream complete.`);
-  logger.info(`Feature response length: ${responseText.length} chars`);
-  logger.info('========== FULL RESPONSE TEXT ==========');
-  logger.info(responseText);
-  logger.info('========== END RESPONSE TEXT ==========');
+  if (result.structured_output) {
+    // Use structured output from Claude/Codex models
+    logger.info('✅ Received structured output from model');
+    contentForParsing = JSON.stringify(result.structured_output);
+    logger.debug('Structured output:', contentForParsing);
+  } else {
+    // Use text response (for non-Claude/Codex models or fallback)
+    // Pre-extract JSON to handle conversational text that may surround the JSON response
+    // This follows the same pattern used in generate-spec.ts and validate-issue.ts
+    const rawText = result.text;
+    logger.info(`Feature stream complete.`);
+    logger.info(`Feature response length: ${rawText.length} chars`);
+    logger.info('========== FULL RESPONSE TEXT ==========');
+    logger.info(rawText);
+    logger.info('========== END RESPONSE TEXT ==========');

-  await parseAndCreateFeatures(projectPath, responseText, events);
+    // Pre-extract JSON from response - handles conversational text around the JSON
+    const extracted = extractJsonWithArray<FeaturesExtractionResult>(rawText, 'features', {
+      logger,
+    });
+    if (extracted) {
+      contentForParsing = JSON.stringify(extracted);
+      logger.info('✅ Pre-extracted JSON from text response');
+    } else {
+      // If pre-extraction fails, we know the next step will also fail.
+      // Throw an error here to avoid redundant parsing and make the failure point clearer.
+      logger.error(
+        '❌ Could not extract features JSON from model response. Full response text was:\n' +
+          rawText
+      );
+      const errorMessage =
+        'Failed to parse features from model response: No valid JSON with a "features" array found.';
+      events.emit('spec-regeneration:event', {
+        type: 'spec_regeneration_error',
+        error: errorMessage,
+        projectPath: projectPath,
+      });
+      throw new Error(errorMessage);
+    }
+  }
+
+  await parseAndCreateFeatures(projectPath, contentForParsing, events);

  logger.debug('========== generateFeaturesFromSpec() completed ==========');
 }
--- a/apps/server/src/routes/app-spec/generate-spec.ts
+++ b/apps/server/src/routes/app-spec/generate-spec.ts
@@ -9,7 +9,7 @@ import * as secureFs from '../../lib/secure-fs.js';
 import type { EventEmitter } from '../../lib/events.js';
 import { specOutputSchema, specToXml, type SpecOutput } from '../../lib/app-spec-format.js';
 import { createLogger } from '@automaker/utils';
-import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types';
+import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
 import { resolvePhaseModel } from '@automaker/model-resolver';
 import { extractJson } from '../../lib/json-extractor.js';
 import { streamingQuery } from '../../providers/simple-query-service.js';
@@ -120,10 +120,13 @@ ${prompts.appSpec.structuredSpecInstructions}`;
  let responseText = '';
  let structuredOutput: SpecOutput | null = null;

-  // Determine if we should use structured output (Claude supports it, Cursor doesn't)
-  const useStructuredOutput = !isCursorModel(model);
+  // Determine if we should use structured output based on model type
+  const useStructuredOutput = supportsStructuredOutput(model);
+  logger.info(
+    `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
+  );

-  // Build the final prompt - for Cursor, include JSON schema instructions
+  // Build the final prompt - for non-Claude/Codex models, include JSON schema instructions
  let finalPrompt = prompt;
  if (!useStructuredOutput) {
    finalPrompt = `${prompt}
--- a/apps/server/src/routes/app-spec/sync-spec.ts
+++ b/apps/server/src/routes/app-spec/sync-spec.ts
@@ -10,9 +10,10 @@
 import * as secureFs from '../../lib/secure-fs.js';
 import type { EventEmitter } from '../../lib/events.js';
 import { createLogger } from '@automaker/utils';
-import { DEFAULT_PHASE_MODELS } from '@automaker/types';
+import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
 import { resolvePhaseModel } from '@automaker/model-resolver';
 import { streamingQuery } from '../../providers/simple-query-service.js';
+import { extractJson } from '../../lib/json-extractor.js';
 import { getAppSpecPath } from '@automaker/platform';
 import type { SettingsService } from '../../services/settings-service.js';
 import {
@@ -34,6 +35,28 @@ import { getNotificationService } from '../../services/notification-service.js';

 const logger = createLogger('SpecSync');

+/**
+ * Type for extracted tech stack JSON response
+ */
+interface TechStackExtractionResult {
+  technologies: string[];
+}
+
+/**
+ * JSON schema for tech stack analysis output (Claude/Codex structured output)
+ */
+const techStackOutputSchema = {
+  type: 'object',
+  properties: {
+    technologies: {
+      type: 'array',
+      items: { type: 'string' },
+      description: 'List of technologies detected in the project',
+    },
+  },
+  required: ['technologies'],
+} as const;
+
 /**
 * Result of a sync operation
 */
@@ -176,8 +199,14 @@ export async function syncSpec(

  logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');

+  // Determine if we should use structured output based on model type
+  const useStructuredOutput = supportsStructuredOutput(model);
+  logger.info(
+    `Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
+  );
+
  // Use AI to analyze tech stack
-  const techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack.
+  let techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack.

 Current known technologies: ${currentTechStack.join(', ')}

@@ -193,6 +222,16 @@ Return ONLY this JSON format, no other text:
  "technologies": ["Technology 1", "Technology 2", ...]
 }`;

+  // Add explicit JSON instructions for non-Claude/Codex models
+  if (!useStructuredOutput) {
+    techAnalysisPrompt = `${techAnalysisPrompt}
+
+CRITICAL INSTRUCTIONS:
+1. DO NOT write any files. Return the JSON in your response only.
+2. Your entire response should be valid JSON starting with { and ending with }.
+3. No explanations, no markdown, no text before or after the JSON.`;
+  }
+
  try {
    const techResult = await streamingQuery({
      prompt: techAnalysisPrompt,
@@ -206,44 +245,67 @@ Return ONLY this JSON format, no other text:
      settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
      claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
      credentials, // Pass credentials for resolving 'credentials' apiKeySource
+      outputFormat: useStructuredOutput
+        ? {
+            type: 'json_schema',
+            schema: techStackOutputSchema,
+          }
+        : undefined,
      onText: (text) => {
        logger.debug(`Tech analysis text: ${text.substring(0, 100)}`);
      },
    });

-    // Parse tech stack from response
-    const jsonMatch = techResult.text.match(/\{[\s\S]*"technologies"[\s\S]*\}/);
-    if (jsonMatch) {
-      const parsed = JSON.parse(jsonMatch[0]);
-      if (Array.isArray(parsed.technologies)) {
-        const newTechStack = parsed.technologies as string[];
+    // Parse tech stack from response - prefer structured output if available
+    let parsedTechnologies: string[] | null = null;

-        // Calculate differences
-        const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase()));
-        const newSet = new Set(newTechStack.map((t) => t.toLowerCase()));
+    if (techResult.structured_output) {
+      // Use structured output from Claude/Codex models
+      const structured = techResult.structured_output as unknown as TechStackExtractionResult;
+      if (Array.isArray(structured.technologies)) {
+        parsedTechnologies = structured.technologies;
+        logger.info('✅ Received structured output for tech analysis');
+      }
+    } else {
+      // Fall back to text parsing for non-Claude/Codex models
+      const extracted = extractJson<TechStackExtractionResult>(techResult.text, {
+        logger,
+        requiredKey: 'technologies',
+        requireArray: true,
+      });
+      if (extracted && Array.isArray(extracted.technologies)) {
+        parsedTechnologies = extracted.technologies;
+        logger.info('✅ Extracted tech stack from text response');
+      } else {
+        logger.warn('⚠️ Failed to extract tech stack JSON from response');
+      }
+    }

-        for (const tech of newTechStack) {
-          if (!currentSet.has(tech.toLowerCase())) {
-            result.techStackUpdates.added.push(tech);
-          }
+    if (parsedTechnologies) {
+      const newTechStack = parsedTechnologies;
+
+      // Calculate differences
+      const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase()));
+      const newSet = new Set(newTechStack.map((t) => t.toLowerCase()));
+
+      for (const tech of newTechStack) {
+        if (!currentSet.has(tech.toLowerCase())) {
+          result.techStackUpdates.added.push(tech);
        }
+      }

-        for (const tech of currentTechStack) {
-          if (!newSet.has(tech.toLowerCase())) {
-            result.techStackUpdates.removed.push(tech);
-          }
+      for (const tech of currentTechStack) {
+        if (!newSet.has(tech.toLowerCase())) {
+          result.techStackUpdates.removed.push(tech);
        }
+      }

-        // Update spec with new tech stack if there are changes
-        if (
-          result.techStackUpdates.added.length > 0 ||
-          result.techStackUpdates.removed.length > 0
-        ) {
-          specContent = updateTechnologyStack(specContent, newTechStack);
-          logger.info(
-            `Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}`
-          );
-        }
+      // Update spec with new tech stack if there are changes
+      if (result.techStackUpdates.added.length > 0 || result.techStackUpdates.removed.length > 0) {
+        specContent = updateTechnologyStack(specContent, newTechStack);
+        logger.info(
+          `Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}`
+        );
      }
    }
  } catch (error) {
--- a/apps/server/src/routes/github/routes/validate-issue.ts
+++ b/apps/server/src/routes/github/routes/validate-issue.ts
@@ -23,6 +23,7 @@ import {
  isCodexModel,
  isCursorModel,
  isOpencodeModel,
+  supportsStructuredOutput,
 } from '@automaker/types';
 import { resolvePhaseModel } from '@automaker/model-resolver';
 import { extractJson } from '../../../lib/json-extractor.js';
@@ -124,8 +125,9 @@ async function runValidation(
    const prompts = await getPromptCustomization(settingsService, '[ValidateIssue]');
    const issueValidationSystemPrompt = prompts.issueValidation.systemPrompt;

-    // Determine if we should use structured output (Claude/Codex support it, Cursor/OpenCode don't)
-    const useStructuredOutput = isClaudeModel(model) || isCodexModel(model);
+    // Determine if we should use structured output based on model type
+    // Claude and Codex support it; Cursor, Gemini, OpenCode, Copilot don't
+    const useStructuredOutput = supportsStructuredOutput(model);

    // Build the final prompt - for Cursor, include system prompt and JSON schema instructions
    let finalPrompt = basePrompt;
--- a/apps/server/tests/unit/providers/codex-provider.test.ts
+++ b/apps/server/tests/unit/providers/codex-provider.test.ts
@@ -325,8 +325,12 @@ describe('codex-provider.ts', () => {
      );

      const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
-      // xhigh reasoning effort should have 4x the default timeout (120000ms)
-      expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh);
+      // xhigh reasoning effort uses 5-minute base timeout (300000ms) for feature generation
+      // then applies 4x multiplier: 300000 * 4.0 = 1200000ms (20 minutes)
+      const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000;
+      expect(call.timeout).toBe(
+        CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh
+      );
    });

    it('uses default timeout when no reasoning effort is specified', async () => {
--- a/libs/model-resolver/src/resolver.ts
+++ b/libs/model-resolver/src/resolver.ts
@@ -32,6 +32,7 @@ import {
  migrateModelId,
  type PhaseModelEntry,
  type ThinkingLevel,
+  type ReasoningEffort,
 } from '@automaker/types';

 // Pattern definitions for Codex/OpenAI models
@@ -162,8 +163,10 @@ export function getEffectiveModel(
 export interface ResolvedPhaseModel {
  /** Resolved model string (full model ID) */
  model: string;
-  /** Optional thinking level for extended thinking */
+  /** Optional thinking level for extended thinking (Claude models) */
  thinkingLevel?: ThinkingLevel;
+  /** Optional reasoning effort for timeout calculation (Codex models) */
+  reasoningEffort?: ReasoningEffort;
  /** Provider ID if using a ClaudeCompatibleProvider */
  providerId?: string;
 }
@@ -205,6 +208,7 @@ export function resolvePhaseModel(
    return {
      model: resolveModelString(undefined, defaultModel),
      thinkingLevel: undefined,
+      reasoningEffort: undefined,
    };
  }

@@ -214,12 +218,13 @@ export function resolvePhaseModel(
    return {
      model: resolveModelString(phaseModel, defaultModel),
      thinkingLevel: undefined,
+      reasoningEffort: undefined,
    };
  }

  // Handle new PhaseModelEntry object format
  console.log(
-    `[ModelResolver] phaseModel is object format: model="${phaseModel.model}", thinkingLevel="${phaseModel.thinkingLevel}", providerId="${phaseModel.providerId}"`
+    `[ModelResolver] phaseModel is object format: model="${phaseModel.model}", thinkingLevel="${phaseModel.thinkingLevel}", reasoningEffort="${phaseModel.reasoningEffort}", providerId="${phaseModel.providerId}"`
  );

  // If providerId is set, pass through the model string unchanged
@@ -231,6 +236,7 @@ export function resolvePhaseModel(
    return {
      model: phaseModel.model, // Pass through unchanged
      thinkingLevel: phaseModel.thinkingLevel,
+      reasoningEffort: phaseModel.reasoningEffort,
      providerId: phaseModel.providerId,
    };
  }
@@ -239,5 +245,6 @@ export function resolvePhaseModel(
  return {
    model: resolveModelString(phaseModel.model, defaultModel),
    thinkingLevel: phaseModel.thinkingLevel,
+    reasoningEffort: phaseModel.reasoningEffort,
  };
 }
--- a/libs/types/src/index.ts
+++ b/libs/types/src/index.ts
@@ -272,6 +272,7 @@ export {
  getBareModelId,
  normalizeModelString,
  validateBareModelId,
+  supportsStructuredOutput,
 } from './provider-utils.js';

 // Model migration utilities
--- a/libs/types/src/provider-utils.ts
+++ b/libs/types/src/provider-utils.ts
@@ -7,7 +7,7 @@
 */

 import type { ModelProvider } from './settings.js';
-import { CURSOR_MODEL_MAP, LEGACY_CURSOR_MODEL_MAP } from './cursor-models.js';
+import { LEGACY_CURSOR_MODEL_MAP } from './cursor-models.js';
 import { CLAUDE_MODEL_MAP, CODEX_MODEL_MAP } from './model.js';
 import { OPENCODE_MODEL_CONFIG_MAP, LEGACY_OPENCODE_MODEL_MAP } from './opencode-models.js';
 import { GEMINI_MODEL_MAP } from './gemini-models.js';
@@ -345,6 +345,44 @@ export function normalizeModelString(model: string | undefined | null): string {
  return model;
 }

+/**
+ * Check if a model supports structured output (JSON schema)
+ *
+ * Structured output is a feature that allows the model to return responses
+ * conforming to a JSON schema. Currently supported by:
+ * - Claude models (native Anthropic API support)
+ * - Codex/OpenAI models (via response_format with json_schema)
+ *
+ * Models that do NOT support structured output:
+ * - Cursor models (uses different API format)
+ * - OpenCode models (various backend providers)
+ * - Gemini models (different API)
+ * - Copilot models (proxy to various backends)
+ *
+ * @param model - Model string to check
+ * @returns true if the model supports structured output
+ *
+ * @example
+ * supportsStructuredOutput('sonnet') // true (Claude)
+ * supportsStructuredOutput('claude-sonnet-4-20250514') // true (Claude)
+ * supportsStructuredOutput('codex-gpt-5.2') // true (Codex/OpenAI)
+ * supportsStructuredOutput('cursor-auto') // false
+ * supportsStructuredOutput('gemini-2.5-pro') // false
+ */
+export function supportsStructuredOutput(model: string | undefined | null): boolean {
+  // Exclude proxy providers first - they may have Claude/Codex in the model name
+  // but route through different APIs that don't support structured output
+  if (
+    isCursorModel(model) ||
+    isGeminiModel(model) ||
+    isOpencodeModel(model) ||
+    isCopilotModel(model)
+  ) {
+    return false;
+  }
+  return isClaudeModel(model) || isCodexModel(model);
+}
+
 /**
 * Validate that a model ID does not contain a provider prefix
 *