From 19016f03d797353f47713b976154675f921db5e0 Mon Sep 17 00:00:00 2001 From: Kacper Date: Tue, 30 Dec 2025 15:18:45 +0100 Subject: [PATCH] refactor(server): Extract JSON extraction utility to shared module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created libs/server/src/lib/json-extractor.ts with reusable JSON extraction utilities for parsing AI responses: - extractJson(): Multi-strategy JSON extraction - extractJsonWithKey(): Extract with required key validation - extractJsonWithArray(): Extract with array property validation Strategies (tried in order): 1. JSON in ```json code block 2. JSON in ``` code block 3. Find JSON object by matching braces (with optional required key) 4. Find any JSON object by matching braces 5. First { to last } 6. Parse entire response Updated: - generate-suggestions.ts: Use extractJsonWithArray('suggestions') - validate-issue.ts: Use extractJson() Both files now use the shared utility instead of local implementations, following DRY principle. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- apps/server/src/lib/json-extractor.ts | 211 ++++++++++++++++++ .../routes/github/routes/validate-issue.ts | 72 +----- .../suggestions/generate-suggestions.ts | 105 +-------- 3 files changed, 221 insertions(+), 167 deletions(-) create mode 100644 apps/server/src/lib/json-extractor.ts diff --git a/apps/server/src/lib/json-extractor.ts b/apps/server/src/lib/json-extractor.ts new file mode 100644 index 00000000..a1a97dd8 --- /dev/null +++ b/apps/server/src/lib/json-extractor.ts @@ -0,0 +1,211 @@ +/** + * JSON Extraction Utilities + * + * Robust JSON extraction from AI responses that may contain markdown, + * code blocks, or other text mixed with JSON content. + * + * Used by various routes that parse structured output from Cursor or + * Claude responses when structured output is not available. + */ + +import { createLogger } from '@automaker/utils'; + +const logger = createLogger('JsonExtractor'); + +/** + * Logger interface for optional custom logging + */ +export interface JsonExtractorLogger { + debug: (message: string, ...args: unknown[]) => void; + warn?: (message: string, ...args: unknown[]) => void; +} + +/** + * Options for JSON extraction + */ +export interface ExtractJsonOptions { + /** Custom logger (defaults to internal logger) */ + logger?: JsonExtractorLogger; + /** Required key that must be present in the extracted JSON */ + requiredKey?: string; + /** Whether the required key's value must be an array */ + requireArray?: boolean; +} + +/** + * Extract JSON from response text using multiple strategies. + * + * Strategies tried in order: + * 1. JSON in ```json code block + * 2. JSON in ``` code block (no language) + * 3. Find JSON object by matching braces (starting with requiredKey if specified) + * 4. Find any JSON object by matching braces + * 5. Parse entire response as JSON + * + * @param responseText - The raw response text that may contain JSON + * @param options - Optional extraction options + * @returns Parsed JSON object or null if extraction fails + */ +export function extractJson>( + responseText: string, + options: ExtractJsonOptions = {} +): T | null { + const log = options.logger || logger; + const requiredKey = options.requiredKey; + const requireArray = options.requireArray ?? false; + + /** + * Validate that the result has the required key/structure + */ + const validateResult = (result: unknown): result is T => { + if (!result || typeof result !== 'object') return false; + if (requiredKey) { + const obj = result as Record; + if (!(requiredKey in obj)) return false; + if (requireArray && !Array.isArray(obj[requiredKey])) return false; + } + return true; + }; + + /** + * Find matching closing brace by counting brackets + */ + const findMatchingBrace = (text: string, startIdx: number): number => { + let depth = 0; + for (let i = startIdx; i < text.length; i++) { + if (text[i] === '{') depth++; + if (text[i] === '}') { + depth--; + if (depth === 0) { + return i + 1; + } + } + } + return -1; + }; + + const strategies = [ + // Strategy 1: JSON in ```json code block + () => { + const match = responseText.match(/```json\s*([\s\S]*?)```/); + if (match) { + log.debug('Extracting JSON from ```json code block'); + return JSON.parse(match[1].trim()); + } + return null; + }, + + // Strategy 2: JSON in ``` code block (no language specified) + () => { + const match = responseText.match(/```\s*([\s\S]*?)```/); + if (match) { + const content = match[1].trim(); + // Only try if it looks like JSON (starts with { or [) + if (content.startsWith('{') || content.startsWith('[')) { + log.debug('Extracting JSON from ``` code block'); + return JSON.parse(content); + } + } + return null; + }, + + // Strategy 3: Find JSON object containing the required key (if specified) + () => { + if (!requiredKey) return null; + + const searchPattern = `{"${requiredKey}"`; + const startIdx = responseText.indexOf(searchPattern); + if (startIdx === -1) return null; + + const endIdx = findMatchingBrace(responseText, startIdx); + if (endIdx > startIdx) { + log.debug(`Extracting JSON with required key "${requiredKey}"`); + return JSON.parse(responseText.slice(startIdx, endIdx)); + } + return null; + }, + + // Strategy 4: Find any JSON object by matching braces + () => { + const startIdx = responseText.indexOf('{'); + if (startIdx === -1) return null; + + const endIdx = findMatchingBrace(responseText, startIdx); + if (endIdx > startIdx) { + log.debug('Extracting JSON by brace matching'); + return JSON.parse(responseText.slice(startIdx, endIdx)); + } + return null; + }, + + // Strategy 5: Find JSON using first { to last } (may be less accurate) + () => { + const firstBrace = responseText.indexOf('{'); + const lastBrace = responseText.lastIndexOf('}'); + if (firstBrace !== -1 && lastBrace > firstBrace) { + log.debug('Extracting JSON from first { to last }'); + return JSON.parse(responseText.slice(firstBrace, lastBrace + 1)); + } + return null; + }, + + // Strategy 6: Try parsing the entire response as JSON + () => { + const trimmed = responseText.trim(); + if (trimmed.startsWith('{') || trimmed.startsWith('[')) { + log.debug('Parsing entire response as JSON'); + return JSON.parse(trimmed); + } + return null; + }, + ]; + + for (const strategy of strategies) { + try { + const result = strategy(); + if (validateResult(result)) { + log.debug('Successfully extracted JSON'); + return result as T; + } + } catch { + // Strategy failed, try next + } + } + + log.debug('Failed to extract JSON from response'); + return null; +} + +/** + * Extract JSON with a specific required key. + * Convenience wrapper around extractJson. + * + * @param responseText - The raw response text + * @param requiredKey - Key that must be present in the extracted JSON + * @param options - Additional options + * @returns Parsed JSON object or null + */ +export function extractJsonWithKey>( + responseText: string, + requiredKey: string, + options: Omit = {} +): T | null { + return extractJson(responseText, { ...options, requiredKey }); +} + +/** + * Extract JSON that has a required array property. + * Useful for extracting responses like { "suggestions": [...] } + * + * @param responseText - The raw response text + * @param arrayKey - Key that must contain an array + * @param options - Additional options + * @returns Parsed JSON object or null + */ +export function extractJsonWithArray>( + responseText: string, + arrayKey: string, + options: Omit = {} +): T | null { + return extractJson(responseText, { ...options, requiredKey: arrayKey, requireArray: true }); +} diff --git a/apps/server/src/routes/github/routes/validate-issue.ts b/apps/server/src/routes/github/routes/validate-issue.ts index 82bd6f7b..29f55ed2 100644 --- a/apps/server/src/routes/github/routes/validate-issue.ts +++ b/apps/server/src/routes/github/routes/validate-issue.ts @@ -17,6 +17,7 @@ import type { } from '@automaker/types'; import { isCursorModel } from '@automaker/types'; import { createSuggestionsOptions } from '../../../lib/sdk-options.js'; +import { extractJson } from '../../../lib/json-extractor.js'; import { writeValidation } from '../../../lib/validation-storage.js'; import { ProviderFactory } from '../../../providers/provider-factory.js'; import { @@ -37,73 +38,6 @@ import { getAutoLoadClaudeMdSetting } from '../../../lib/settings-helpers.js'; /** Valid Claude model values for validation */ const VALID_CLAUDE_MODELS: readonly ModelAlias[] = ['opus', 'sonnet', 'haiku'] as const; -/** - * Extract JSON from a response that may contain markdown code blocks or other text. - * Tries multiple extraction strategies in order of likelihood. - */ -function extractJsonFromResponse(responseText: string, log: typeof logger): T | null { - const strategies = [ - // Strategy 1: JSON in ```json code block - () => { - const match = responseText.match(/```json\s*([\s\S]*?)```/); - if (match) { - log.debug('Extracting JSON from ```json code block'); - return JSON.parse(match[1].trim()) as T; - } - return null; - }, - // Strategy 2: JSON in ``` code block (no language specified) - () => { - const match = responseText.match(/```\s*([\s\S]*?)```/); - if (match) { - const content = match[1].trim(); - // Only try if it looks like JSON (starts with { or [) - if (content.startsWith('{') || content.startsWith('[')) { - log.debug('Extracting JSON from ``` code block'); - return JSON.parse(content) as T; - } - } - return null; - }, - // Strategy 3: Find JSON object directly in text (first { to last }) - () => { - const firstBrace = responseText.indexOf('{'); - const lastBrace = responseText.lastIndexOf('}'); - if (firstBrace !== -1 && lastBrace > firstBrace) { - const jsonCandidate = responseText.slice(firstBrace, lastBrace + 1); - log.debug('Extracting JSON object from raw text'); - return JSON.parse(jsonCandidate) as T; - } - return null; - }, - // Strategy 4: Try parsing the entire response as JSON - () => { - const trimmed = responseText.trim(); - if (trimmed.startsWith('{') || trimmed.startsWith('[')) { - log.debug('Parsing entire response as JSON'); - return JSON.parse(trimmed) as T; - } - return null; - }, - ]; - - for (const strategy of strategies) { - try { - const result = strategy(); - if (result !== null) { - log.debug('Successfully parsed JSON from Cursor response:', result); - return result; - } - } catch { - // Strategy failed, try next one - } - } - - log.error('Failed to extract JSON from Cursor response after trying all strategies'); - log.debug('Raw response:', responseText.slice(0, 500) + (responseText.length > 500 ? '...' : '')); - return null; -} - /** * Request body for issue validation */ @@ -201,9 +135,9 @@ ${prompt}`; } } - // Parse JSON from the response text + // Parse JSON from the response text using shared utility if (responseText) { - validationResult = extractJsonFromResponse(responseText, logger); + validationResult = extractJson(responseText, { logger }); } } else { // Use Claude SDK for Claude models diff --git a/apps/server/src/routes/suggestions/generate-suggestions.ts b/apps/server/src/routes/suggestions/generate-suggestions.ts index 22855a89..a0eb60f1 100644 --- a/apps/server/src/routes/suggestions/generate-suggestions.ts +++ b/apps/server/src/routes/suggestions/generate-suggestions.ts @@ -11,6 +11,7 @@ import { createLogger } from '@automaker/utils'; import { DEFAULT_PHASE_MODELS, isCursorModel } from '@automaker/types'; import { resolveModelString } from '@automaker/model-resolver'; import { createSuggestionsOptions } from '../../lib/sdk-options.js'; +import { extractJsonWithArray } from '../../lib/json-extractor.js'; import { ProviderFactory } from '../../providers/provider-factory.js'; import { FeatureLoader } from '../../services/feature-loader.js'; import { getAppSpecPath } from '@automaker/platform'; @@ -289,9 +290,13 @@ ${JSON.stringify(suggestionsSchema, null, 2)}`; })), }); } else { - // Fallback: try to parse from text using multiple strategies + // Fallback: try to parse from text using shared extraction utility logger.warn('No structured output received, attempting to parse from text'); - const parsed = extractSuggestionsJson(responseText); + const parsed = extractJsonWithArray<{ suggestions: Array> }>( + responseText, + 'suggestions', + { logger } + ); if (parsed && parsed.suggestions) { events.emit('suggestions:event', { type: 'suggestions_complete', @@ -322,99 +327,3 @@ ${JSON.stringify(suggestionsSchema, null, 2)}`; }); } } - -/** - * Extract suggestions JSON from response text using multiple strategies. - * Handles various formats: markdown code blocks, raw JSON, etc. - */ -function extractSuggestionsJson( - responseText: string -): { suggestions: Array> } | null { - const strategies = [ - // Strategy 1: JSON in ```json code block - () => { - const match = responseText.match(/```json\s*([\s\S]*?)```/); - if (match) { - return JSON.parse(match[1].trim()); - } - return null; - }, - // Strategy 2: JSON in ``` code block (no language specified) - () => { - const match = responseText.match(/```\s*([\s\S]*?)```/); - if (match) { - const content = match[1].trim(); - if (content.startsWith('{') && content.includes('"suggestions"')) { - return JSON.parse(content); - } - } - return null; - }, - // Strategy 3: Find JSON object containing "suggestions" array - () => { - // Find the start of the JSON object - const startIdx = responseText.indexOf('{"suggestions"'); - if (startIdx === -1) return null; - - // Find matching closing brace by counting brackets - let depth = 0; - let endIdx = -1; - for (let i = startIdx; i < responseText.length; i++) { - if (responseText[i] === '{') depth++; - if (responseText[i] === '}') { - depth--; - if (depth === 0) { - endIdx = i + 1; - break; - } - } - } - - if (endIdx > startIdx) { - return JSON.parse(responseText.slice(startIdx, endIdx)); - } - return null; - }, - // Strategy 4: Find any JSON object with suggestions - () => { - const startIdx = responseText.indexOf('{'); - if (startIdx === -1) return null; - - // Find matching closing brace - let depth = 0; - let endIdx = -1; - for (let i = startIdx; i < responseText.length; i++) { - if (responseText[i] === '{') depth++; - if (responseText[i] === '}') { - depth--; - if (depth === 0) { - endIdx = i + 1; - break; - } - } - } - - if (endIdx > startIdx) { - const parsed = JSON.parse(responseText.slice(startIdx, endIdx)); - if (parsed.suggestions && Array.isArray(parsed.suggestions)) { - return parsed; - } - } - return null; - }, - ]; - - for (const strategy of strategies) { - try { - const result = strategy(); - if (result && result.suggestions && Array.isArray(result.suggestions)) { - logger.debug('Successfully extracted suggestions JSON'); - return result; - } - } catch { - // Strategy failed, try next - } - } - - return null; -}