mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-02-04 09:13:08 +00:00
Merge pull request #682 from AutoMaker-Org/feature/bug-fix-app-spec-generation-for-non-claude-models-dgq0
fix: Add structured output fallback for non-Claude models in app spec generation
This commit is contained in:
@@ -98,9 +98,14 @@ const TEXT_ENCODING = 'utf-8';
|
|||||||
* This is the "no output" timeout - if the CLI doesn't produce any JSONL output
|
* This is the "no output" timeout - if the CLI doesn't produce any JSONL output
|
||||||
* for this duration, the process is killed. For reasoning models with high
|
* for this duration, the process is killed. For reasoning models with high
|
||||||
* reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout().
|
* reasoning effort, this timeout is dynamically extended via calculateReasoningTimeout().
|
||||||
|
*
|
||||||
|
* For feature generation (which can generate 50+ features), we use a much longer
|
||||||
|
* base timeout (5 minutes) since Codex models are slower at generating large JSON responses.
|
||||||
|
*
|
||||||
* @see calculateReasoningTimeout from @automaker/types
|
* @see calculateReasoningTimeout from @automaker/types
|
||||||
*/
|
*/
|
||||||
const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS;
|
const CODEX_CLI_TIMEOUT_MS = DEFAULT_TIMEOUT_MS;
|
||||||
|
const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000; // 5 minutes for feature generation
|
||||||
const CONTEXT_WINDOW_256K = 256000;
|
const CONTEXT_WINDOW_256K = 256000;
|
||||||
const MAX_OUTPUT_32K = 32000;
|
const MAX_OUTPUT_32K = 32000;
|
||||||
const MAX_OUTPUT_16K = 16000;
|
const MAX_OUTPUT_16K = 16000;
|
||||||
@@ -827,7 +832,14 @@ export class CodexProvider extends BaseProvider {
|
|||||||
// Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time
|
// Higher reasoning effort (e.g., 'xhigh' for "xtra thinking" mode) requires more time
|
||||||
// for the model to generate reasoning tokens before producing output.
|
// for the model to generate reasoning tokens before producing output.
|
||||||
// This fixes GitHub issue #530 where features would get stuck with reasoning models.
|
// This fixes GitHub issue #530 where features would get stuck with reasoning models.
|
||||||
const timeout = calculateReasoningTimeout(options.reasoningEffort, CODEX_CLI_TIMEOUT_MS);
|
//
|
||||||
|
// For feature generation with 'xhigh', use the extended 5-minute base timeout
|
||||||
|
// since generating 50+ features takes significantly longer than normal operations.
|
||||||
|
const baseTimeout =
|
||||||
|
options.reasoningEffort === 'xhigh'
|
||||||
|
? CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS
|
||||||
|
: CODEX_CLI_TIMEOUT_MS;
|
||||||
|
const timeout = calculateReasoningTimeout(options.reasoningEffort, baseTimeout);
|
||||||
|
|
||||||
const stream = spawnJSONLProcess({
|
const stream = spawnJSONLProcess({
|
||||||
command: commandPath,
|
command: commandPath,
|
||||||
|
|||||||
@@ -8,10 +8,11 @@
|
|||||||
import * as secureFs from '../../lib/secure-fs.js';
|
import * as secureFs from '../../lib/secure-fs.js';
|
||||||
import type { EventEmitter } from '../../lib/events.js';
|
import type { EventEmitter } from '../../lib/events.js';
|
||||||
import { createLogger } from '@automaker/utils';
|
import { createLogger } from '@automaker/utils';
|
||||||
import { DEFAULT_PHASE_MODELS } from '@automaker/types';
|
import { DEFAULT_PHASE_MODELS, supportsStructuredOutput, isCodexModel } from '@automaker/types';
|
||||||
import { resolvePhaseModel } from '@automaker/model-resolver';
|
import { resolvePhaseModel } from '@automaker/model-resolver';
|
||||||
import { streamingQuery } from '../../providers/simple-query-service.js';
|
import { streamingQuery } from '../../providers/simple-query-service.js';
|
||||||
import { parseAndCreateFeatures } from './parse-and-create-features.js';
|
import { parseAndCreateFeatures } from './parse-and-create-features.js';
|
||||||
|
import { extractJsonWithArray } from '../../lib/json-extractor.js';
|
||||||
import { getAppSpecPath } from '@automaker/platform';
|
import { getAppSpecPath } from '@automaker/platform';
|
||||||
import type { SettingsService } from '../../services/settings-service.js';
|
import type { SettingsService } from '../../services/settings-service.js';
|
||||||
import {
|
import {
|
||||||
@@ -25,6 +26,64 @@ const logger = createLogger('SpecRegeneration');
|
|||||||
|
|
||||||
const DEFAULT_MAX_FEATURES = 50;
|
const DEFAULT_MAX_FEATURES = 50;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timeout for Codex models when generating features (5 minutes).
|
||||||
|
* Codex models are slower and need more time to generate 50+ features.
|
||||||
|
*/
|
||||||
|
const CODEX_FEATURE_GENERATION_TIMEOUT_MS = 300000; // 5 minutes
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type for extracted features JSON response
|
||||||
|
*/
|
||||||
|
interface FeaturesExtractionResult {
|
||||||
|
features: Array<{
|
||||||
|
id: string;
|
||||||
|
category?: string;
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
priority?: number;
|
||||||
|
complexity?: 'simple' | 'moderate' | 'complex';
|
||||||
|
dependencies?: string[];
|
||||||
|
}>;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* JSON schema for features output format (Claude/Codex structured output)
|
||||||
|
*/
|
||||||
|
const featuresOutputSchema = {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
features: {
|
||||||
|
type: 'array',
|
||||||
|
items: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
id: { type: 'string', description: 'Unique feature identifier (kebab-case)' },
|
||||||
|
category: { type: 'string', description: 'Feature category' },
|
||||||
|
title: { type: 'string', description: 'Short, descriptive title' },
|
||||||
|
description: { type: 'string', description: 'Detailed feature description' },
|
||||||
|
priority: {
|
||||||
|
type: 'number',
|
||||||
|
description: 'Priority level: 1 (highest) to 5 (lowest)',
|
||||||
|
},
|
||||||
|
complexity: {
|
||||||
|
type: 'string',
|
||||||
|
enum: ['simple', 'moderate', 'complex'],
|
||||||
|
description: 'Implementation complexity',
|
||||||
|
},
|
||||||
|
dependencies: {
|
||||||
|
type: 'array',
|
||||||
|
items: { type: 'string' },
|
||||||
|
description: 'IDs of features this depends on',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ['id', 'title', 'description'],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ['features'],
|
||||||
|
} as const;
|
||||||
|
|
||||||
export async function generateFeaturesFromSpec(
|
export async function generateFeaturesFromSpec(
|
||||||
projectPath: string,
|
projectPath: string,
|
||||||
events: EventEmitter,
|
events: EventEmitter,
|
||||||
@@ -136,23 +195,80 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb
|
|||||||
provider: undefined,
|
provider: undefined,
|
||||||
credentials: undefined,
|
credentials: undefined,
|
||||||
};
|
};
|
||||||
const { model, thinkingLevel } = resolvePhaseModel(phaseModelEntry);
|
const { model, thinkingLevel, reasoningEffort } = resolvePhaseModel(phaseModelEntry);
|
||||||
|
|
||||||
logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
|
logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
|
||||||
|
|
||||||
|
// Codex models need extended timeout for generating many features.
|
||||||
|
// Use 'xhigh' reasoning effort to get 5-minute timeout (300s base * 1.0x = 300s).
|
||||||
|
// The Codex provider has a special 5-minute base timeout for feature generation.
|
||||||
|
const isCodex = isCodexModel(model);
|
||||||
|
const effectiveReasoningEffort = isCodex ? 'xhigh' : reasoningEffort;
|
||||||
|
|
||||||
|
if (isCodex) {
|
||||||
|
logger.info('Codex model detected - using extended timeout (5 minutes for feature generation)');
|
||||||
|
}
|
||||||
|
if (effectiveReasoningEffort) {
|
||||||
|
logger.info('Reasoning effort:', effectiveReasoningEffort);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine if we should use structured output based on model type
|
||||||
|
const useStructuredOutput = supportsStructuredOutput(model);
|
||||||
|
logger.info(
|
||||||
|
`Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
|
||||||
|
);
|
||||||
|
|
||||||
|
// Build the final prompt - for non-Claude/Codex models, include explicit JSON instructions
|
||||||
|
let finalPrompt = prompt;
|
||||||
|
if (!useStructuredOutput) {
|
||||||
|
finalPrompt = `${prompt}
|
||||||
|
|
||||||
|
CRITICAL INSTRUCTIONS:
|
||||||
|
1. DO NOT write any files. Return the JSON in your response only.
|
||||||
|
2. After analyzing the spec, respond with ONLY a JSON object - no explanations, no markdown, just raw JSON.
|
||||||
|
3. The JSON must have this exact structure:
|
||||||
|
{
|
||||||
|
"features": [
|
||||||
|
{
|
||||||
|
"id": "unique-feature-id",
|
||||||
|
"category": "Category Name",
|
||||||
|
"title": "Short Feature Title",
|
||||||
|
"description": "Detailed description of the feature",
|
||||||
|
"priority": 1,
|
||||||
|
"complexity": "simple|moderate|complex",
|
||||||
|
"dependencies": ["other-feature-id"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
4. Feature IDs must be unique, lowercase, kebab-case (e.g., "user-authentication", "data-export")
|
||||||
|
5. Priority ranges from 1 (highest) to 5 (lowest)
|
||||||
|
6. Complexity must be one of: "simple", "moderate", "complex"
|
||||||
|
7. Dependencies is an array of feature IDs that must be completed first (can be empty)
|
||||||
|
|
||||||
|
Your entire response should be valid JSON starting with { and ending with }. No text before or after.`;
|
||||||
|
}
|
||||||
|
|
||||||
// Use streamingQuery with event callbacks
|
// Use streamingQuery with event callbacks
|
||||||
const result = await streamingQuery({
|
const result = await streamingQuery({
|
||||||
prompt,
|
prompt: finalPrompt,
|
||||||
model,
|
model,
|
||||||
cwd: projectPath,
|
cwd: projectPath,
|
||||||
maxTurns: 250,
|
maxTurns: 250,
|
||||||
allowedTools: ['Read', 'Glob', 'Grep'],
|
allowedTools: ['Read', 'Glob', 'Grep'],
|
||||||
abortController,
|
abortController,
|
||||||
thinkingLevel,
|
thinkingLevel,
|
||||||
|
reasoningEffort: effectiveReasoningEffort, // Extended timeout for Codex models
|
||||||
readOnly: true, // Feature generation only reads code, doesn't write
|
readOnly: true, // Feature generation only reads code, doesn't write
|
||||||
settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
|
settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
|
||||||
claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
|
claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
|
||||||
credentials, // Pass credentials for resolving 'credentials' apiKeySource
|
credentials, // Pass credentials for resolving 'credentials' apiKeySource
|
||||||
|
outputFormat: useStructuredOutput
|
||||||
|
? {
|
||||||
|
type: 'json_schema',
|
||||||
|
schema: featuresOutputSchema,
|
||||||
|
}
|
||||||
|
: undefined,
|
||||||
onText: (text) => {
|
onText: (text) => {
|
||||||
logger.debug(`Feature text block received (${text.length} chars)`);
|
logger.debug(`Feature text block received (${text.length} chars)`);
|
||||||
events.emit('spec-regeneration:event', {
|
events.emit('spec-regeneration:event', {
|
||||||
@@ -163,15 +279,51 @@ Generate ${featureCount} NEW features that build on each other logically. Rememb
|
|||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
const responseText = result.text;
|
// Get response content - prefer structured output if available
|
||||||
|
let contentForParsing: string;
|
||||||
|
|
||||||
logger.info(`Feature stream complete.`);
|
if (result.structured_output) {
|
||||||
logger.info(`Feature response length: ${responseText.length} chars`);
|
// Use structured output from Claude/Codex models
|
||||||
logger.info('========== FULL RESPONSE TEXT ==========');
|
logger.info('✅ Received structured output from model');
|
||||||
logger.info(responseText);
|
contentForParsing = JSON.stringify(result.structured_output);
|
||||||
logger.info('========== END RESPONSE TEXT ==========');
|
logger.debug('Structured output:', contentForParsing);
|
||||||
|
} else {
|
||||||
|
// Use text response (for non-Claude/Codex models or fallback)
|
||||||
|
// Pre-extract JSON to handle conversational text that may surround the JSON response
|
||||||
|
// This follows the same pattern used in generate-spec.ts and validate-issue.ts
|
||||||
|
const rawText = result.text;
|
||||||
|
logger.info(`Feature stream complete.`);
|
||||||
|
logger.info(`Feature response length: ${rawText.length} chars`);
|
||||||
|
logger.info('========== FULL RESPONSE TEXT ==========');
|
||||||
|
logger.info(rawText);
|
||||||
|
logger.info('========== END RESPONSE TEXT ==========');
|
||||||
|
|
||||||
await parseAndCreateFeatures(projectPath, responseText, events);
|
// Pre-extract JSON from response - handles conversational text around the JSON
|
||||||
|
const extracted = extractJsonWithArray<FeaturesExtractionResult>(rawText, 'features', {
|
||||||
|
logger,
|
||||||
|
});
|
||||||
|
if (extracted) {
|
||||||
|
contentForParsing = JSON.stringify(extracted);
|
||||||
|
logger.info('✅ Pre-extracted JSON from text response');
|
||||||
|
} else {
|
||||||
|
// If pre-extraction fails, we know the next step will also fail.
|
||||||
|
// Throw an error here to avoid redundant parsing and make the failure point clearer.
|
||||||
|
logger.error(
|
||||||
|
'❌ Could not extract features JSON from model response. Full response text was:\n' +
|
||||||
|
rawText
|
||||||
|
);
|
||||||
|
const errorMessage =
|
||||||
|
'Failed to parse features from model response: No valid JSON with a "features" array found.';
|
||||||
|
events.emit('spec-regeneration:event', {
|
||||||
|
type: 'spec_regeneration_error',
|
||||||
|
error: errorMessage,
|
||||||
|
projectPath: projectPath,
|
||||||
|
});
|
||||||
|
throw new Error(errorMessage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await parseAndCreateFeatures(projectPath, contentForParsing, events);
|
||||||
|
|
||||||
logger.debug('========== generateFeaturesFromSpec() completed ==========');
|
logger.debug('========== generateFeaturesFromSpec() completed ==========');
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import * as secureFs from '../../lib/secure-fs.js';
|
|||||||
import type { EventEmitter } from '../../lib/events.js';
|
import type { EventEmitter } from '../../lib/events.js';
|
||||||
import { specOutputSchema, specToXml, type SpecOutput } from '../../lib/app-spec-format.js';
|
import { specOutputSchema, specToXml, type SpecOutput } from '../../lib/app-spec-format.js';
|
||||||
import { createLogger } from '@automaker/utils';
|
import { createLogger } from '@automaker/utils';
|
||||||
import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types';
|
import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
|
||||||
import { resolvePhaseModel } from '@automaker/model-resolver';
|
import { resolvePhaseModel } from '@automaker/model-resolver';
|
||||||
import { extractJson } from '../../lib/json-extractor.js';
|
import { extractJson } from '../../lib/json-extractor.js';
|
||||||
import { streamingQuery } from '../../providers/simple-query-service.js';
|
import { streamingQuery } from '../../providers/simple-query-service.js';
|
||||||
@@ -120,10 +120,13 @@ ${prompts.appSpec.structuredSpecInstructions}`;
|
|||||||
let responseText = '';
|
let responseText = '';
|
||||||
let structuredOutput: SpecOutput | null = null;
|
let structuredOutput: SpecOutput | null = null;
|
||||||
|
|
||||||
// Determine if we should use structured output (Claude supports it, Cursor doesn't)
|
// Determine if we should use structured output based on model type
|
||||||
const useStructuredOutput = !isCursorModel(model);
|
const useStructuredOutput = supportsStructuredOutput(model);
|
||||||
|
logger.info(
|
||||||
|
`Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
|
||||||
|
);
|
||||||
|
|
||||||
// Build the final prompt - for Cursor, include JSON schema instructions
|
// Build the final prompt - for non-Claude/Codex models, include JSON schema instructions
|
||||||
let finalPrompt = prompt;
|
let finalPrompt = prompt;
|
||||||
if (!useStructuredOutput) {
|
if (!useStructuredOutput) {
|
||||||
finalPrompt = `${prompt}
|
finalPrompt = `${prompt}
|
||||||
|
|||||||
@@ -10,9 +10,10 @@
|
|||||||
import * as secureFs from '../../lib/secure-fs.js';
|
import * as secureFs from '../../lib/secure-fs.js';
|
||||||
import type { EventEmitter } from '../../lib/events.js';
|
import type { EventEmitter } from '../../lib/events.js';
|
||||||
import { createLogger } from '@automaker/utils';
|
import { createLogger } from '@automaker/utils';
|
||||||
import { DEFAULT_PHASE_MODELS } from '@automaker/types';
|
import { DEFAULT_PHASE_MODELS, supportsStructuredOutput } from '@automaker/types';
|
||||||
import { resolvePhaseModel } from '@automaker/model-resolver';
|
import { resolvePhaseModel } from '@automaker/model-resolver';
|
||||||
import { streamingQuery } from '../../providers/simple-query-service.js';
|
import { streamingQuery } from '../../providers/simple-query-service.js';
|
||||||
|
import { extractJson } from '../../lib/json-extractor.js';
|
||||||
import { getAppSpecPath } from '@automaker/platform';
|
import { getAppSpecPath } from '@automaker/platform';
|
||||||
import type { SettingsService } from '../../services/settings-service.js';
|
import type { SettingsService } from '../../services/settings-service.js';
|
||||||
import {
|
import {
|
||||||
@@ -34,6 +35,28 @@ import { getNotificationService } from '../../services/notification-service.js';
|
|||||||
|
|
||||||
const logger = createLogger('SpecSync');
|
const logger = createLogger('SpecSync');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type for extracted tech stack JSON response
|
||||||
|
*/
|
||||||
|
interface TechStackExtractionResult {
|
||||||
|
technologies: string[];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* JSON schema for tech stack analysis output (Claude/Codex structured output)
|
||||||
|
*/
|
||||||
|
const techStackOutputSchema = {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
technologies: {
|
||||||
|
type: 'array',
|
||||||
|
items: { type: 'string' },
|
||||||
|
description: 'List of technologies detected in the project',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ['technologies'],
|
||||||
|
} as const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Result of a sync operation
|
* Result of a sync operation
|
||||||
*/
|
*/
|
||||||
@@ -176,8 +199,14 @@ export async function syncSpec(
|
|||||||
|
|
||||||
logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
|
logger.info('Using model:', model, provider ? `via provider: ${provider.name}` : 'direct API');
|
||||||
|
|
||||||
|
// Determine if we should use structured output based on model type
|
||||||
|
const useStructuredOutput = supportsStructuredOutput(model);
|
||||||
|
logger.info(
|
||||||
|
`Structured output mode: ${useStructuredOutput ? 'enabled (Claude/Codex)' : 'disabled (using JSON instructions)'}`
|
||||||
|
);
|
||||||
|
|
||||||
// Use AI to analyze tech stack
|
// Use AI to analyze tech stack
|
||||||
const techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack.
|
let techAnalysisPrompt = `Analyze this project and return ONLY a JSON object with the current technology stack.
|
||||||
|
|
||||||
Current known technologies: ${currentTechStack.join(', ')}
|
Current known technologies: ${currentTechStack.join(', ')}
|
||||||
|
|
||||||
@@ -193,6 +222,16 @@ Return ONLY this JSON format, no other text:
|
|||||||
"technologies": ["Technology 1", "Technology 2", ...]
|
"technologies": ["Technology 1", "Technology 2", ...]
|
||||||
}`;
|
}`;
|
||||||
|
|
||||||
|
// Add explicit JSON instructions for non-Claude/Codex models
|
||||||
|
if (!useStructuredOutput) {
|
||||||
|
techAnalysisPrompt = `${techAnalysisPrompt}
|
||||||
|
|
||||||
|
CRITICAL INSTRUCTIONS:
|
||||||
|
1. DO NOT write any files. Return the JSON in your response only.
|
||||||
|
2. Your entire response should be valid JSON starting with { and ending with }.
|
||||||
|
3. No explanations, no markdown, no text before or after the JSON.`;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const techResult = await streamingQuery({
|
const techResult = await streamingQuery({
|
||||||
prompt: techAnalysisPrompt,
|
prompt: techAnalysisPrompt,
|
||||||
@@ -206,44 +245,67 @@ Return ONLY this JSON format, no other text:
|
|||||||
settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
|
settingSources: autoLoadClaudeMd ? ['user', 'project', 'local'] : undefined,
|
||||||
claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
|
claudeCompatibleProvider: provider, // Pass provider for alternative endpoint configuration
|
||||||
credentials, // Pass credentials for resolving 'credentials' apiKeySource
|
credentials, // Pass credentials for resolving 'credentials' apiKeySource
|
||||||
|
outputFormat: useStructuredOutput
|
||||||
|
? {
|
||||||
|
type: 'json_schema',
|
||||||
|
schema: techStackOutputSchema,
|
||||||
|
}
|
||||||
|
: undefined,
|
||||||
onText: (text) => {
|
onText: (text) => {
|
||||||
logger.debug(`Tech analysis text: ${text.substring(0, 100)}`);
|
logger.debug(`Tech analysis text: ${text.substring(0, 100)}`);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
// Parse tech stack from response
|
// Parse tech stack from response - prefer structured output if available
|
||||||
const jsonMatch = techResult.text.match(/\{[\s\S]*"technologies"[\s\S]*\}/);
|
let parsedTechnologies: string[] | null = null;
|
||||||
if (jsonMatch) {
|
|
||||||
const parsed = JSON.parse(jsonMatch[0]);
|
|
||||||
if (Array.isArray(parsed.technologies)) {
|
|
||||||
const newTechStack = parsed.technologies as string[];
|
|
||||||
|
|
||||||
// Calculate differences
|
if (techResult.structured_output) {
|
||||||
const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase()));
|
// Use structured output from Claude/Codex models
|
||||||
const newSet = new Set(newTechStack.map((t) => t.toLowerCase()));
|
const structured = techResult.structured_output as unknown as TechStackExtractionResult;
|
||||||
|
if (Array.isArray(structured.technologies)) {
|
||||||
|
parsedTechnologies = structured.technologies;
|
||||||
|
logger.info('✅ Received structured output for tech analysis');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fall back to text parsing for non-Claude/Codex models
|
||||||
|
const extracted = extractJson<TechStackExtractionResult>(techResult.text, {
|
||||||
|
logger,
|
||||||
|
requiredKey: 'technologies',
|
||||||
|
requireArray: true,
|
||||||
|
});
|
||||||
|
if (extracted && Array.isArray(extracted.technologies)) {
|
||||||
|
parsedTechnologies = extracted.technologies;
|
||||||
|
logger.info('✅ Extracted tech stack from text response');
|
||||||
|
} else {
|
||||||
|
logger.warn('⚠️ Failed to extract tech stack JSON from response');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (const tech of newTechStack) {
|
if (parsedTechnologies) {
|
||||||
if (!currentSet.has(tech.toLowerCase())) {
|
const newTechStack = parsedTechnologies;
|
||||||
result.techStackUpdates.added.push(tech);
|
|
||||||
}
|
// Calculate differences
|
||||||
|
const currentSet = new Set(currentTechStack.map((t) => t.toLowerCase()));
|
||||||
|
const newSet = new Set(newTechStack.map((t) => t.toLowerCase()));
|
||||||
|
|
||||||
|
for (const tech of newTechStack) {
|
||||||
|
if (!currentSet.has(tech.toLowerCase())) {
|
||||||
|
result.techStackUpdates.added.push(tech);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (const tech of currentTechStack) {
|
for (const tech of currentTechStack) {
|
||||||
if (!newSet.has(tech.toLowerCase())) {
|
if (!newSet.has(tech.toLowerCase())) {
|
||||||
result.techStackUpdates.removed.push(tech);
|
result.techStackUpdates.removed.push(tech);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Update spec with new tech stack if there are changes
|
// Update spec with new tech stack if there are changes
|
||||||
if (
|
if (result.techStackUpdates.added.length > 0 || result.techStackUpdates.removed.length > 0) {
|
||||||
result.techStackUpdates.added.length > 0 ||
|
specContent = updateTechnologyStack(specContent, newTechStack);
|
||||||
result.techStackUpdates.removed.length > 0
|
logger.info(
|
||||||
) {
|
`Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}`
|
||||||
specContent = updateTechnologyStack(specContent, newTechStack);
|
);
|
||||||
logger.info(
|
|
||||||
`Updated tech stack: +${result.techStackUpdates.added.length}, -${result.techStackUpdates.removed.length}`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import {
|
|||||||
isCodexModel,
|
isCodexModel,
|
||||||
isCursorModel,
|
isCursorModel,
|
||||||
isOpencodeModel,
|
isOpencodeModel,
|
||||||
|
supportsStructuredOutput,
|
||||||
} from '@automaker/types';
|
} from '@automaker/types';
|
||||||
import { resolvePhaseModel } from '@automaker/model-resolver';
|
import { resolvePhaseModel } from '@automaker/model-resolver';
|
||||||
import { extractJson } from '../../../lib/json-extractor.js';
|
import { extractJson } from '../../../lib/json-extractor.js';
|
||||||
@@ -124,8 +125,9 @@ async function runValidation(
|
|||||||
const prompts = await getPromptCustomization(settingsService, '[ValidateIssue]');
|
const prompts = await getPromptCustomization(settingsService, '[ValidateIssue]');
|
||||||
const issueValidationSystemPrompt = prompts.issueValidation.systemPrompt;
|
const issueValidationSystemPrompt = prompts.issueValidation.systemPrompt;
|
||||||
|
|
||||||
// Determine if we should use structured output (Claude/Codex support it, Cursor/OpenCode don't)
|
// Determine if we should use structured output based on model type
|
||||||
const useStructuredOutput = isClaudeModel(model) || isCodexModel(model);
|
// Claude and Codex support it; Cursor, Gemini, OpenCode, Copilot don't
|
||||||
|
const useStructuredOutput = supportsStructuredOutput(model);
|
||||||
|
|
||||||
// Build the final prompt - for Cursor, include system prompt and JSON schema instructions
|
// Build the final prompt - for Cursor, include system prompt and JSON schema instructions
|
||||||
let finalPrompt = basePrompt;
|
let finalPrompt = basePrompt;
|
||||||
|
|||||||
@@ -325,8 +325,12 @@ describe('codex-provider.ts', () => {
|
|||||||
);
|
);
|
||||||
|
|
||||||
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
const call = vi.mocked(spawnJSONLProcess).mock.calls[0][0];
|
||||||
// xhigh reasoning effort should have 4x the default timeout (120000ms)
|
// xhigh reasoning effort uses 5-minute base timeout (300000ms) for feature generation
|
||||||
expect(call.timeout).toBe(DEFAULT_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh);
|
// then applies 4x multiplier: 300000 * 4.0 = 1200000ms (20 minutes)
|
||||||
|
const CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS = 300000;
|
||||||
|
expect(call.timeout).toBe(
|
||||||
|
CODEX_FEATURE_GENERATION_BASE_TIMEOUT_MS * REASONING_TIMEOUT_MULTIPLIERS.xhigh
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('uses default timeout when no reasoning effort is specified', async () => {
|
it('uses default timeout when no reasoning effort is specified', async () => {
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ import {
|
|||||||
migrateModelId,
|
migrateModelId,
|
||||||
type PhaseModelEntry,
|
type PhaseModelEntry,
|
||||||
type ThinkingLevel,
|
type ThinkingLevel,
|
||||||
|
type ReasoningEffort,
|
||||||
} from '@automaker/types';
|
} from '@automaker/types';
|
||||||
|
|
||||||
// Pattern definitions for Codex/OpenAI models
|
// Pattern definitions for Codex/OpenAI models
|
||||||
@@ -162,8 +163,10 @@ export function getEffectiveModel(
|
|||||||
export interface ResolvedPhaseModel {
|
export interface ResolvedPhaseModel {
|
||||||
/** Resolved model string (full model ID) */
|
/** Resolved model string (full model ID) */
|
||||||
model: string;
|
model: string;
|
||||||
/** Optional thinking level for extended thinking */
|
/** Optional thinking level for extended thinking (Claude models) */
|
||||||
thinkingLevel?: ThinkingLevel;
|
thinkingLevel?: ThinkingLevel;
|
||||||
|
/** Optional reasoning effort for timeout calculation (Codex models) */
|
||||||
|
reasoningEffort?: ReasoningEffort;
|
||||||
/** Provider ID if using a ClaudeCompatibleProvider */
|
/** Provider ID if using a ClaudeCompatibleProvider */
|
||||||
providerId?: string;
|
providerId?: string;
|
||||||
}
|
}
|
||||||
@@ -205,6 +208,7 @@ export function resolvePhaseModel(
|
|||||||
return {
|
return {
|
||||||
model: resolveModelString(undefined, defaultModel),
|
model: resolveModelString(undefined, defaultModel),
|
||||||
thinkingLevel: undefined,
|
thinkingLevel: undefined,
|
||||||
|
reasoningEffort: undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -214,12 +218,13 @@ export function resolvePhaseModel(
|
|||||||
return {
|
return {
|
||||||
model: resolveModelString(phaseModel, defaultModel),
|
model: resolveModelString(phaseModel, defaultModel),
|
||||||
thinkingLevel: undefined,
|
thinkingLevel: undefined,
|
||||||
|
reasoningEffort: undefined,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle new PhaseModelEntry object format
|
// Handle new PhaseModelEntry object format
|
||||||
console.log(
|
console.log(
|
||||||
`[ModelResolver] phaseModel is object format: model="${phaseModel.model}", thinkingLevel="${phaseModel.thinkingLevel}", providerId="${phaseModel.providerId}"`
|
`[ModelResolver] phaseModel is object format: model="${phaseModel.model}", thinkingLevel="${phaseModel.thinkingLevel}", reasoningEffort="${phaseModel.reasoningEffort}", providerId="${phaseModel.providerId}"`
|
||||||
);
|
);
|
||||||
|
|
||||||
// If providerId is set, pass through the model string unchanged
|
// If providerId is set, pass through the model string unchanged
|
||||||
@@ -231,6 +236,7 @@ export function resolvePhaseModel(
|
|||||||
return {
|
return {
|
||||||
model: phaseModel.model, // Pass through unchanged
|
model: phaseModel.model, // Pass through unchanged
|
||||||
thinkingLevel: phaseModel.thinkingLevel,
|
thinkingLevel: phaseModel.thinkingLevel,
|
||||||
|
reasoningEffort: phaseModel.reasoningEffort,
|
||||||
providerId: phaseModel.providerId,
|
providerId: phaseModel.providerId,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@@ -239,5 +245,6 @@ export function resolvePhaseModel(
|
|||||||
return {
|
return {
|
||||||
model: resolveModelString(phaseModel.model, defaultModel),
|
model: resolveModelString(phaseModel.model, defaultModel),
|
||||||
thinkingLevel: phaseModel.thinkingLevel,
|
thinkingLevel: phaseModel.thinkingLevel,
|
||||||
|
reasoningEffort: phaseModel.reasoningEffort,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -272,6 +272,7 @@ export {
|
|||||||
getBareModelId,
|
getBareModelId,
|
||||||
normalizeModelString,
|
normalizeModelString,
|
||||||
validateBareModelId,
|
validateBareModelId,
|
||||||
|
supportsStructuredOutput,
|
||||||
} from './provider-utils.js';
|
} from './provider-utils.js';
|
||||||
|
|
||||||
// Model migration utilities
|
// Model migration utilities
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import type { ModelProvider } from './settings.js';
|
import type { ModelProvider } from './settings.js';
|
||||||
import { CURSOR_MODEL_MAP, LEGACY_CURSOR_MODEL_MAP } from './cursor-models.js';
|
import { LEGACY_CURSOR_MODEL_MAP } from './cursor-models.js';
|
||||||
import { CLAUDE_MODEL_MAP, CODEX_MODEL_MAP } from './model.js';
|
import { CLAUDE_MODEL_MAP, CODEX_MODEL_MAP } from './model.js';
|
||||||
import { OPENCODE_MODEL_CONFIG_MAP, LEGACY_OPENCODE_MODEL_MAP } from './opencode-models.js';
|
import { OPENCODE_MODEL_CONFIG_MAP, LEGACY_OPENCODE_MODEL_MAP } from './opencode-models.js';
|
||||||
import { GEMINI_MODEL_MAP } from './gemini-models.js';
|
import { GEMINI_MODEL_MAP } from './gemini-models.js';
|
||||||
@@ -345,6 +345,44 @@ export function normalizeModelString(model: string | undefined | null): string {
|
|||||||
return model;
|
return model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a model supports structured output (JSON schema)
|
||||||
|
*
|
||||||
|
* Structured output is a feature that allows the model to return responses
|
||||||
|
* conforming to a JSON schema. Currently supported by:
|
||||||
|
* - Claude models (native Anthropic API support)
|
||||||
|
* - Codex/OpenAI models (via response_format with json_schema)
|
||||||
|
*
|
||||||
|
* Models that do NOT support structured output:
|
||||||
|
* - Cursor models (uses different API format)
|
||||||
|
* - OpenCode models (various backend providers)
|
||||||
|
* - Gemini models (different API)
|
||||||
|
* - Copilot models (proxy to various backends)
|
||||||
|
*
|
||||||
|
* @param model - Model string to check
|
||||||
|
* @returns true if the model supports structured output
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* supportsStructuredOutput('sonnet') // true (Claude)
|
||||||
|
* supportsStructuredOutput('claude-sonnet-4-20250514') // true (Claude)
|
||||||
|
* supportsStructuredOutput('codex-gpt-5.2') // true (Codex/OpenAI)
|
||||||
|
* supportsStructuredOutput('cursor-auto') // false
|
||||||
|
* supportsStructuredOutput('gemini-2.5-pro') // false
|
||||||
|
*/
|
||||||
|
export function supportsStructuredOutput(model: string | undefined | null): boolean {
|
||||||
|
// Exclude proxy providers first - they may have Claude/Codex in the model name
|
||||||
|
// but route through different APIs that don't support structured output
|
||||||
|
if (
|
||||||
|
isCursorModel(model) ||
|
||||||
|
isGeminiModel(model) ||
|
||||||
|
isOpencodeModel(model) ||
|
||||||
|
isCopilotModel(model)
|
||||||
|
) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return isClaudeModel(model) || isCodexModel(model);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validate that a model ID does not contain a provider prefix
|
* Validate that a model ID does not contain a provider prefix
|
||||||
*
|
*
|
||||||
|
|||||||
Reference in New Issue
Block a user