mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-02-04 21:23:07 +00:00
refactor(server): Extract JSON extraction utility to shared module
Created libs/server/src/lib/json-extractor.ts with reusable JSON
extraction utilities for parsing AI responses:
- extractJson<T>(): Multi-strategy JSON extraction
- extractJsonWithKey<T>(): Extract with required key validation
- extractJsonWithArray<T>(): Extract with array property validation
Strategies (tried in order):
1. JSON in ```json code block
2. JSON in ``` code block
3. Find JSON object by matching braces (with optional required key)
4. Find any JSON object by matching braces
5. First { to last }
6. Parse entire response
Updated:
- generate-suggestions.ts: Use extractJsonWithArray('suggestions')
- validate-issue.ts: Use extractJson()
Both files now use the shared utility instead of local implementations,
following DRY principle.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
211
apps/server/src/lib/json-extractor.ts
Normal file
211
apps/server/src/lib/json-extractor.ts
Normal file
@@ -0,0 +1,211 @@
|
|||||||
|
/**
|
||||||
|
* JSON Extraction Utilities
|
||||||
|
*
|
||||||
|
* Robust JSON extraction from AI responses that may contain markdown,
|
||||||
|
* code blocks, or other text mixed with JSON content.
|
||||||
|
*
|
||||||
|
* Used by various routes that parse structured output from Cursor or
|
||||||
|
* Claude responses when structured output is not available.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { createLogger } from '@automaker/utils';
|
||||||
|
|
||||||
|
const logger = createLogger('JsonExtractor');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logger interface for optional custom logging
|
||||||
|
*/
|
||||||
|
export interface JsonExtractorLogger {
|
||||||
|
debug: (message: string, ...args: unknown[]) => void;
|
||||||
|
warn?: (message: string, ...args: unknown[]) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Options for JSON extraction
|
||||||
|
*/
|
||||||
|
export interface ExtractJsonOptions {
|
||||||
|
/** Custom logger (defaults to internal logger) */
|
||||||
|
logger?: JsonExtractorLogger;
|
||||||
|
/** Required key that must be present in the extracted JSON */
|
||||||
|
requiredKey?: string;
|
||||||
|
/** Whether the required key's value must be an array */
|
||||||
|
requireArray?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract JSON from response text using multiple strategies.
|
||||||
|
*
|
||||||
|
* Strategies tried in order:
|
||||||
|
* 1. JSON in ```json code block
|
||||||
|
* 2. JSON in ``` code block (no language)
|
||||||
|
* 3. Find JSON object by matching braces (starting with requiredKey if specified)
|
||||||
|
* 4. Find any JSON object by matching braces
|
||||||
|
* 5. Parse entire response as JSON
|
||||||
|
*
|
||||||
|
* @param responseText - The raw response text that may contain JSON
|
||||||
|
* @param options - Optional extraction options
|
||||||
|
* @returns Parsed JSON object or null if extraction fails
|
||||||
|
*/
|
||||||
|
export function extractJson<T = Record<string, unknown>>(
|
||||||
|
responseText: string,
|
||||||
|
options: ExtractJsonOptions = {}
|
||||||
|
): T | null {
|
||||||
|
const log = options.logger || logger;
|
||||||
|
const requiredKey = options.requiredKey;
|
||||||
|
const requireArray = options.requireArray ?? false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate that the result has the required key/structure
|
||||||
|
*/
|
||||||
|
const validateResult = (result: unknown): result is T => {
|
||||||
|
if (!result || typeof result !== 'object') return false;
|
||||||
|
if (requiredKey) {
|
||||||
|
const obj = result as Record<string, unknown>;
|
||||||
|
if (!(requiredKey in obj)) return false;
|
||||||
|
if (requireArray && !Array.isArray(obj[requiredKey])) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find matching closing brace by counting brackets
|
||||||
|
*/
|
||||||
|
const findMatchingBrace = (text: string, startIdx: number): number => {
|
||||||
|
let depth = 0;
|
||||||
|
for (let i = startIdx; i < text.length; i++) {
|
||||||
|
if (text[i] === '{') depth++;
|
||||||
|
if (text[i] === '}') {
|
||||||
|
depth--;
|
||||||
|
if (depth === 0) {
|
||||||
|
return i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
};
|
||||||
|
|
||||||
|
const strategies = [
|
||||||
|
// Strategy 1: JSON in ```json code block
|
||||||
|
() => {
|
||||||
|
const match = responseText.match(/```json\s*([\s\S]*?)```/);
|
||||||
|
if (match) {
|
||||||
|
log.debug('Extracting JSON from ```json code block');
|
||||||
|
return JSON.parse(match[1].trim());
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
|
||||||
|
// Strategy 2: JSON in ``` code block (no language specified)
|
||||||
|
() => {
|
||||||
|
const match = responseText.match(/```\s*([\s\S]*?)```/);
|
||||||
|
if (match) {
|
||||||
|
const content = match[1].trim();
|
||||||
|
// Only try if it looks like JSON (starts with { or [)
|
||||||
|
if (content.startsWith('{') || content.startsWith('[')) {
|
||||||
|
log.debug('Extracting JSON from ``` code block');
|
||||||
|
return JSON.parse(content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
|
||||||
|
// Strategy 3: Find JSON object containing the required key (if specified)
|
||||||
|
() => {
|
||||||
|
if (!requiredKey) return null;
|
||||||
|
|
||||||
|
const searchPattern = `{"${requiredKey}"`;
|
||||||
|
const startIdx = responseText.indexOf(searchPattern);
|
||||||
|
if (startIdx === -1) return null;
|
||||||
|
|
||||||
|
const endIdx = findMatchingBrace(responseText, startIdx);
|
||||||
|
if (endIdx > startIdx) {
|
||||||
|
log.debug(`Extracting JSON with required key "${requiredKey}"`);
|
||||||
|
return JSON.parse(responseText.slice(startIdx, endIdx));
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
|
||||||
|
// Strategy 4: Find any JSON object by matching braces
|
||||||
|
() => {
|
||||||
|
const startIdx = responseText.indexOf('{');
|
||||||
|
if (startIdx === -1) return null;
|
||||||
|
|
||||||
|
const endIdx = findMatchingBrace(responseText, startIdx);
|
||||||
|
if (endIdx > startIdx) {
|
||||||
|
log.debug('Extracting JSON by brace matching');
|
||||||
|
return JSON.parse(responseText.slice(startIdx, endIdx));
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
|
||||||
|
// Strategy 5: Find JSON using first { to last } (may be less accurate)
|
||||||
|
() => {
|
||||||
|
const firstBrace = responseText.indexOf('{');
|
||||||
|
const lastBrace = responseText.lastIndexOf('}');
|
||||||
|
if (firstBrace !== -1 && lastBrace > firstBrace) {
|
||||||
|
log.debug('Extracting JSON from first { to last }');
|
||||||
|
return JSON.parse(responseText.slice(firstBrace, lastBrace + 1));
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
|
||||||
|
// Strategy 6: Try parsing the entire response as JSON
|
||||||
|
() => {
|
||||||
|
const trimmed = responseText.trim();
|
||||||
|
if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
|
||||||
|
log.debug('Parsing entire response as JSON');
|
||||||
|
return JSON.parse(trimmed);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const strategy of strategies) {
|
||||||
|
try {
|
||||||
|
const result = strategy();
|
||||||
|
if (validateResult(result)) {
|
||||||
|
log.debug('Successfully extracted JSON');
|
||||||
|
return result as T;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Strategy failed, try next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug('Failed to extract JSON from response');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract JSON with a specific required key.
|
||||||
|
* Convenience wrapper around extractJson.
|
||||||
|
*
|
||||||
|
* @param responseText - The raw response text
|
||||||
|
* @param requiredKey - Key that must be present in the extracted JSON
|
||||||
|
* @param options - Additional options
|
||||||
|
* @returns Parsed JSON object or null
|
||||||
|
*/
|
||||||
|
export function extractJsonWithKey<T = Record<string, unknown>>(
|
||||||
|
responseText: string,
|
||||||
|
requiredKey: string,
|
||||||
|
options: Omit<ExtractJsonOptions, 'requiredKey'> = {}
|
||||||
|
): T | null {
|
||||||
|
return extractJson<T>(responseText, { ...options, requiredKey });
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract JSON that has a required array property.
|
||||||
|
* Useful for extracting responses like { "suggestions": [...] }
|
||||||
|
*
|
||||||
|
* @param responseText - The raw response text
|
||||||
|
* @param arrayKey - Key that must contain an array
|
||||||
|
* @param options - Additional options
|
||||||
|
* @returns Parsed JSON object or null
|
||||||
|
*/
|
||||||
|
export function extractJsonWithArray<T = Record<string, unknown>>(
|
||||||
|
responseText: string,
|
||||||
|
arrayKey: string,
|
||||||
|
options: Omit<ExtractJsonOptions, 'requiredKey' | 'requireArray'> = {}
|
||||||
|
): T | null {
|
||||||
|
return extractJson<T>(responseText, { ...options, requiredKey: arrayKey, requireArray: true });
|
||||||
|
}
|
||||||
@@ -17,6 +17,7 @@ import type {
|
|||||||
} from '@automaker/types';
|
} from '@automaker/types';
|
||||||
import { isCursorModel } from '@automaker/types';
|
import { isCursorModel } from '@automaker/types';
|
||||||
import { createSuggestionsOptions } from '../../../lib/sdk-options.js';
|
import { createSuggestionsOptions } from '../../../lib/sdk-options.js';
|
||||||
|
import { extractJson } from '../../../lib/json-extractor.js';
|
||||||
import { writeValidation } from '../../../lib/validation-storage.js';
|
import { writeValidation } from '../../../lib/validation-storage.js';
|
||||||
import { ProviderFactory } from '../../../providers/provider-factory.js';
|
import { ProviderFactory } from '../../../providers/provider-factory.js';
|
||||||
import {
|
import {
|
||||||
@@ -37,73 +38,6 @@ import { getAutoLoadClaudeMdSetting } from '../../../lib/settings-helpers.js';
|
|||||||
/** Valid Claude model values for validation */
|
/** Valid Claude model values for validation */
|
||||||
const VALID_CLAUDE_MODELS: readonly ModelAlias[] = ['opus', 'sonnet', 'haiku'] as const;
|
const VALID_CLAUDE_MODELS: readonly ModelAlias[] = ['opus', 'sonnet', 'haiku'] as const;
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract JSON from a response that may contain markdown code blocks or other text.
|
|
||||||
* Tries multiple extraction strategies in order of likelihood.
|
|
||||||
*/
|
|
||||||
function extractJsonFromResponse<T>(responseText: string, log: typeof logger): T | null {
|
|
||||||
const strategies = [
|
|
||||||
// Strategy 1: JSON in ```json code block
|
|
||||||
() => {
|
|
||||||
const match = responseText.match(/```json\s*([\s\S]*?)```/);
|
|
||||||
if (match) {
|
|
||||||
log.debug('Extracting JSON from ```json code block');
|
|
||||||
return JSON.parse(match[1].trim()) as T;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
// Strategy 2: JSON in ``` code block (no language specified)
|
|
||||||
() => {
|
|
||||||
const match = responseText.match(/```\s*([\s\S]*?)```/);
|
|
||||||
if (match) {
|
|
||||||
const content = match[1].trim();
|
|
||||||
// Only try if it looks like JSON (starts with { or [)
|
|
||||||
if (content.startsWith('{') || content.startsWith('[')) {
|
|
||||||
log.debug('Extracting JSON from ``` code block');
|
|
||||||
return JSON.parse(content) as T;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
// Strategy 3: Find JSON object directly in text (first { to last })
|
|
||||||
() => {
|
|
||||||
const firstBrace = responseText.indexOf('{');
|
|
||||||
const lastBrace = responseText.lastIndexOf('}');
|
|
||||||
if (firstBrace !== -1 && lastBrace > firstBrace) {
|
|
||||||
const jsonCandidate = responseText.slice(firstBrace, lastBrace + 1);
|
|
||||||
log.debug('Extracting JSON object from raw text');
|
|
||||||
return JSON.parse(jsonCandidate) as T;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
// Strategy 4: Try parsing the entire response as JSON
|
|
||||||
() => {
|
|
||||||
const trimmed = responseText.trim();
|
|
||||||
if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
|
|
||||||
log.debug('Parsing entire response as JSON');
|
|
||||||
return JSON.parse(trimmed) as T;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const strategy of strategies) {
|
|
||||||
try {
|
|
||||||
const result = strategy();
|
|
||||||
if (result !== null) {
|
|
||||||
log.debug('Successfully parsed JSON from Cursor response:', result);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// Strategy failed, try next one
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.error('Failed to extract JSON from Cursor response after trying all strategies');
|
|
||||||
log.debug('Raw response:', responseText.slice(0, 500) + (responseText.length > 500 ? '...' : ''));
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Request body for issue validation
|
* Request body for issue validation
|
||||||
*/
|
*/
|
||||||
@@ -201,9 +135,9 @@ ${prompt}`;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse JSON from the response text
|
// Parse JSON from the response text using shared utility
|
||||||
if (responseText) {
|
if (responseText) {
|
||||||
validationResult = extractJsonFromResponse<IssueValidationResult>(responseText, logger);
|
validationResult = extractJson<IssueValidationResult>(responseText, { logger });
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Use Claude SDK for Claude models
|
// Use Claude SDK for Claude models
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import { createLogger } from '@automaker/utils';
|
|||||||
import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types';
|
import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types';
|
||||||
import { resolveModelString } from '@automaker/model-resolver';
|
import { resolveModelString } from '@automaker/model-resolver';
|
||||||
import { createSuggestionsOptions } from '../../lib/sdk-options.js';
|
import { createSuggestionsOptions } from '../../lib/sdk-options.js';
|
||||||
|
import { extractJsonWithArray } from '../../lib/json-extractor.js';
|
||||||
import { ProviderFactory } from '../../providers/provider-factory.js';
|
import { ProviderFactory } from '../../providers/provider-factory.js';
|
||||||
import { FeatureLoader } from '../../services/feature-loader.js';
|
import { FeatureLoader } from '../../services/feature-loader.js';
|
||||||
import { getAppSpecPath } from '@automaker/platform';
|
import { getAppSpecPath } from '@automaker/platform';
|
||||||
@@ -289,9 +290,13 @@ ${JSON.stringify(suggestionsSchema, null, 2)}`;
|
|||||||
})),
|
})),
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// Fallback: try to parse from text using multiple strategies
|
// Fallback: try to parse from text using shared extraction utility
|
||||||
logger.warn('No structured output received, attempting to parse from text');
|
logger.warn('No structured output received, attempting to parse from text');
|
||||||
const parsed = extractSuggestionsJson(responseText);
|
const parsed = extractJsonWithArray<{ suggestions: Array<Record<string, unknown>> }>(
|
||||||
|
responseText,
|
||||||
|
'suggestions',
|
||||||
|
{ logger }
|
||||||
|
);
|
||||||
if (parsed && parsed.suggestions) {
|
if (parsed && parsed.suggestions) {
|
||||||
events.emit('suggestions:event', {
|
events.emit('suggestions:event', {
|
||||||
type: 'suggestions_complete',
|
type: 'suggestions_complete',
|
||||||
@@ -322,99 +327,3 @@ ${JSON.stringify(suggestionsSchema, null, 2)}`;
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract suggestions JSON from response text using multiple strategies.
|
|
||||||
* Handles various formats: markdown code blocks, raw JSON, etc.
|
|
||||||
*/
|
|
||||||
function extractSuggestionsJson(
|
|
||||||
responseText: string
|
|
||||||
): { suggestions: Array<Record<string, unknown>> } | null {
|
|
||||||
const strategies = [
|
|
||||||
// Strategy 1: JSON in ```json code block
|
|
||||||
() => {
|
|
||||||
const match = responseText.match(/```json\s*([\s\S]*?)```/);
|
|
||||||
if (match) {
|
|
||||||
return JSON.parse(match[1].trim());
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
// Strategy 2: JSON in ``` code block (no language specified)
|
|
||||||
() => {
|
|
||||||
const match = responseText.match(/```\s*([\s\S]*?)```/);
|
|
||||||
if (match) {
|
|
||||||
const content = match[1].trim();
|
|
||||||
if (content.startsWith('{') && content.includes('"suggestions"')) {
|
|
||||||
return JSON.parse(content);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
// Strategy 3: Find JSON object containing "suggestions" array
|
|
||||||
() => {
|
|
||||||
// Find the start of the JSON object
|
|
||||||
const startIdx = responseText.indexOf('{"suggestions"');
|
|
||||||
if (startIdx === -1) return null;
|
|
||||||
|
|
||||||
// Find matching closing brace by counting brackets
|
|
||||||
let depth = 0;
|
|
||||||
let endIdx = -1;
|
|
||||||
for (let i = startIdx; i < responseText.length; i++) {
|
|
||||||
if (responseText[i] === '{') depth++;
|
|
||||||
if (responseText[i] === '}') {
|
|
||||||
depth--;
|
|
||||||
if (depth === 0) {
|
|
||||||
endIdx = i + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (endIdx > startIdx) {
|
|
||||||
return JSON.parse(responseText.slice(startIdx, endIdx));
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
// Strategy 4: Find any JSON object with suggestions
|
|
||||||
() => {
|
|
||||||
const startIdx = responseText.indexOf('{');
|
|
||||||
if (startIdx === -1) return null;
|
|
||||||
|
|
||||||
// Find matching closing brace
|
|
||||||
let depth = 0;
|
|
||||||
let endIdx = -1;
|
|
||||||
for (let i = startIdx; i < responseText.length; i++) {
|
|
||||||
if (responseText[i] === '{') depth++;
|
|
||||||
if (responseText[i] === '}') {
|
|
||||||
depth--;
|
|
||||||
if (depth === 0) {
|
|
||||||
endIdx = i + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (endIdx > startIdx) {
|
|
||||||
const parsed = JSON.parse(responseText.slice(startIdx, endIdx));
|
|
||||||
if (parsed.suggestions && Array.isArray(parsed.suggestions)) {
|
|
||||||
return parsed;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const strategy of strategies) {
|
|
||||||
try {
|
|
||||||
const result = strategy();
|
|
||||||
if (result && result.suggestions && Array.isArray(result.suggestions)) {
|
|
||||||
logger.debug('Successfully extracted suggestions JSON');
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// Strategy failed, try next
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user