/** * XML Extraction Utilities * * Robust XML parsing utilities for extracting and updating sections * from app_spec.txt XML content. Uses regex-based parsing which is * sufficient for our controlled XML structure. * * Note: If more complex XML parsing is needed in the future, consider * using a library like 'fast-xml-parser' or 'xml2js'. */ import { createLogger } from '@automaker/utils'; import type { SpecOutput } from '@automaker/types'; const logger = createLogger('XmlExtractor'); /** * Represents an implemented feature extracted from XML */ export interface ImplementedFeature { name: string; description: string; file_locations?: string[]; } /** * Logger interface for optional custom logging */ export interface XmlExtractorLogger { debug: (message: string, ...args: unknown[]) => void; warn?: (message: string, ...args: unknown[]) => void; } /** * Options for XML extraction operations */ export interface ExtractXmlOptions { /** Custom logger (defaults to internal logger) */ logger?: XmlExtractorLogger; } /** * Escape special XML characters * Handles undefined/null values by converting them to empty strings */ export function escapeXml(str: string | undefined | null): string { if (str == null) { return ''; } return str .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); } /** * Unescape XML entities back to regular characters */ export function unescapeXml(str: string): string { return str .replace(/'/g, "'") .replace(/"/g, '"') .replace(/>/g, '>') .replace(/</g, '<') .replace(/&/g, '&'); } /** * Extract the content of a specific XML section * * @param xmlContent - The full XML content * @param tagName - The tag name to extract (e.g., 'implemented_features') * @param options - Optional extraction options * @returns The content between the tags, or null if not found */ export function extractXmlSection( xmlContent: string, tagName: string, options: ExtractXmlOptions = {} ): string | null { const log = options.logger || logger; const regex = new RegExp(`<${tagName}>([\\s\\S]*?)<\\/${tagName}>`, 'i'); const match = xmlContent.match(regex); if (match) { log.debug(`Extracted <${tagName}> section`); return match[1]; } log.debug(`Section <${tagName}> not found`); return null; } /** * Extract all values from repeated XML elements * * @param xmlContent - The XML content to search * @param tagName - The tag name to extract values from * @param options - Optional extraction options * @returns Array of extracted values (unescaped) */ export function extractXmlElements( xmlContent: string, tagName: string, options: ExtractXmlOptions = {} ): string[] { const log = options.logger || logger; const values: string[] = []; const regex = new RegExp(`<${tagName}>([\\s\\S]*?)<\\/${tagName}>`, 'g'); const matches = xmlContent.matchAll(regex); for (const match of matches) { values.push(unescapeXml(match[1].trim())); } log.debug(`Extracted ${values.length} <${tagName}> elements`); return values; } /** * Extract implemented features from app_spec.txt XML content * * @param specContent - The full XML content of app_spec.txt * @param options - Optional extraction options * @returns Array of implemented features with name, description, and optional file_locations */ export function extractImplementedFeatures( specContent: string, options: ExtractXmlOptions = {} ): ImplementedFeature[] { const log = options.logger || logger; const features: ImplementedFeature[] = []; // Match ... section const implementedSection = extractXmlSection(specContent, 'implemented_features', options); if (!implementedSection) { log.debug('No implemented_features section found'); return features; } // Extract individual feature blocks const featureRegex = /([\s\S]*?)<\/feature>/g; const featureMatches = implementedSection.matchAll(featureRegex); for (const featureMatch of featureMatches) { const featureContent = featureMatch[1]; // Extract name const nameMatch = featureContent.match(/([\s\S]*?)<\/name>/); const name = nameMatch ? unescapeXml(nameMatch[1].trim()) : ''; // Extract description const descMatch = featureContent.match(/([\s\S]*?)<\/description>/); const description = descMatch ? unescapeXml(descMatch[1].trim()) : ''; // Extract file_locations if present const locationsSection = extractXmlSection(featureContent, 'file_locations', options); const file_locations = locationsSection ? extractXmlElements(locationsSection, 'location', options) : undefined; if (name) { features.push({ name, description, ...(file_locations && file_locations.length > 0 ? { file_locations } : {}), }); } } log.debug(`Extracted ${features.length} implemented features`); return features; } /** * Extract only the feature names from implemented_features section * * @param specContent - The full XML content of app_spec.txt * @param options - Optional extraction options * @returns Array of feature names */ export function extractImplementedFeatureNames( specContent: string, options: ExtractXmlOptions = {} ): string[] { const features = extractImplementedFeatures(specContent, options); return features.map((f) => f.name); } /** * Generate XML for a single implemented feature * * @param feature - The feature to convert to XML * @param indent - The base indentation level (default: 2 spaces) * @returns XML string for the feature */ export function featureToXml(feature: ImplementedFeature, indent: string = ' '): string { const i2 = indent.repeat(2); const i3 = indent.repeat(3); const i4 = indent.repeat(4); let xml = `${i2} ${i3}${escapeXml(feature.name)} ${i3}${escapeXml(feature.description)}`; if (feature.file_locations && feature.file_locations.length > 0) { xml += ` ${i3} ${feature.file_locations.map((loc) => `${i4}${escapeXml(loc)}`).join('\n')} ${i3}`; } xml += ` ${i2}`; return xml; } /** * Generate XML for an array of implemented features * * @param features - Array of features to convert to XML * @param indent - The base indentation level (default: 2 spaces) * @returns XML string for the implemented_features section content */ export function featuresToXml(features: ImplementedFeature[], indent: string = ' '): string { return features.map((f) => featureToXml(f, indent)).join('\n'); } /** * Update the implemented_features section in XML content * * @param specContent - The full XML content * @param newFeatures - The new features to set * @param options - Optional extraction options * @returns Updated XML content with the new implemented_features section */ export function updateImplementedFeaturesSection( specContent: string, newFeatures: ImplementedFeature[], options: ExtractXmlOptions = {} ): string { const log = options.logger || logger; const indent = ' '; // Generate new section content const newSectionContent = featuresToXml(newFeatures, indent); // Build the new section const newSection = ` ${newSectionContent} ${indent}`; // Check if section exists const sectionRegex = /[\s\S]*?<\/implemented_features>/; if (sectionRegex.test(specContent)) { log.debug('Replacing existing implemented_features section'); return specContent.replace(sectionRegex, newSection); } // If section doesn't exist, try to insert after core_capabilities const coreCapabilitiesEnd = ''; const insertIndex = specContent.indexOf(coreCapabilitiesEnd); if (insertIndex !== -1) { const insertPosition = insertIndex + coreCapabilitiesEnd.length; log.debug('Inserting implemented_features after core_capabilities'); return ( specContent.slice(0, insertPosition) + '\n\n' + indent + newSection + specContent.slice(insertPosition) ); } // As a fallback, insert before const projectSpecEnd = ''; const fallbackIndex = specContent.indexOf(projectSpecEnd); if (fallbackIndex !== -1) { log.debug('Inserting implemented_features before '); return ( specContent.slice(0, fallbackIndex) + indent + newSection + '\n' + specContent.slice(fallbackIndex) ); } log.warn?.('Could not find appropriate insertion point for implemented_features'); log.debug('Could not find appropriate insertion point for implemented_features'); return specContent; } /** * Add a new feature to the implemented_features section * * @param specContent - The full XML content * @param newFeature - The feature to add * @param options - Optional extraction options * @returns Updated XML content with the new feature added */ export function addImplementedFeature( specContent: string, newFeature: ImplementedFeature, options: ExtractXmlOptions = {} ): string { const log = options.logger || logger; // Extract existing features const existingFeatures = extractImplementedFeatures(specContent, options); // Check for duplicates by name const isDuplicate = existingFeatures.some( (f) => f.name.toLowerCase() === newFeature.name.toLowerCase() ); if (isDuplicate) { log.debug(`Feature "${newFeature.name}" already exists, skipping`); return specContent; } // Add the new feature const updatedFeatures = [...existingFeatures, newFeature]; log.debug(`Adding feature "${newFeature.name}"`); return updateImplementedFeaturesSection(specContent, updatedFeatures, options); } /** * Remove a feature from the implemented_features section by name * * @param specContent - The full XML content * @param featureName - The name of the feature to remove * @param options - Optional extraction options * @returns Updated XML content with the feature removed */ export function removeImplementedFeature( specContent: string, featureName: string, options: ExtractXmlOptions = {} ): string { const log = options.logger || logger; // Extract existing features const existingFeatures = extractImplementedFeatures(specContent, options); // Filter out the feature to remove const updatedFeatures = existingFeatures.filter( (f) => f.name.toLowerCase() !== featureName.toLowerCase() ); if (updatedFeatures.length === existingFeatures.length) { log.debug(`Feature "${featureName}" not found, no changes made`); return specContent; } log.debug(`Removing feature "${featureName}"`); return updateImplementedFeaturesSection(specContent, updatedFeatures, options); } /** * Update an existing feature in the implemented_features section * * @param specContent - The full XML content * @param featureName - The name of the feature to update * @param updates - Partial updates to apply to the feature * @param options - Optional extraction options * @returns Updated XML content with the feature modified */ export function updateImplementedFeature( specContent: string, featureName: string, updates: Partial, options: ExtractXmlOptions = {} ): string { const log = options.logger || logger; // Extract existing features const existingFeatures = extractImplementedFeatures(specContent, options); // Find and update the feature let found = false; const updatedFeatures = existingFeatures.map((f) => { if (f.name.toLowerCase() === featureName.toLowerCase()) { found = true; return { ...f, ...updates, // Preserve the original name if not explicitly updated name: updates.name ?? f.name, }; } return f; }); if (!found) { log.debug(`Feature "${featureName}" not found, no changes made`); return specContent; } log.debug(`Updating feature "${featureName}"`); return updateImplementedFeaturesSection(specContent, updatedFeatures, options); } /** * Check if a feature exists in the implemented_features section * * @param specContent - The full XML content * @param featureName - The name of the feature to check * @param options - Optional extraction options * @returns True if the feature exists */ export function hasImplementedFeature( specContent: string, featureName: string, options: ExtractXmlOptions = {} ): boolean { const features = extractImplementedFeatures(specContent, options); return features.some((f) => f.name.toLowerCase() === featureName.toLowerCase()); } /** * Convert extracted features to SpecOutput.implemented_features format * * @param features - Array of extracted features * @returns Features in SpecOutput format */ export function toSpecOutputFeatures( features: ImplementedFeature[] ): SpecOutput['implemented_features'] { return features.map((f) => ({ name: f.name, description: f.description, ...(f.file_locations && f.file_locations.length > 0 ? { file_locations: f.file_locations } : {}), })); } /** * Convert SpecOutput.implemented_features to ImplementedFeature format * * @param specFeatures - Features from SpecOutput * @returns Features in ImplementedFeature format */ export function fromSpecOutputFeatures( specFeatures: SpecOutput['implemented_features'] ): ImplementedFeature[] { return specFeatures.map((f) => ({ name: f.name, description: f.description, ...(f.file_locations && f.file_locations.length > 0 ? { file_locations: f.file_locations } : {}), })); }