#!/usr/bin/env node const { Command } = require('commander'); const fs = require('fs-extra'); const path = require('node:path'); const { glob } = require('glob'); const { minimatch } = require('minimatch'); /** * Recursively discover all files in a directory * @param {string} rootDir - The root directory to scan * @returns {Promise} Array of file paths */ async function discoverFiles(rootDir) { try { const gitignorePath = path.join(rootDir, '.gitignore'); const gitignorePatterns = await parseGitignore(gitignorePath); // Common gitignore patterns that should always be ignored const commonIgnorePatterns = [ // Version control '.git/**', '.svn/**', '.hg/**', '.bzr/**', // Dependencies 'node_modules/**', 'bower_components/**', 'vendor/**', 'packages/**', // Build outputs 'build/**', 'dist/**', 'out/**', 'target/**', 'bin/**', 'obj/**', 'release/**', 'debug/**', // Environment and config '.env', '.env.*', '*.env', '.config', // Logs 'logs/**', '*.log', 'npm-debug.log*', 'yarn-debug.log*', 'yarn-error.log*', 'lerna-debug.log*', // Coverage and testing 'coverage/**', '.nyc_output/**', '.coverage/**', 'test-results/**', 'junit.xml', // Cache directories '.cache/**', '.tmp/**', '.temp/**', 'tmp/**', 'temp/**', '.sass-cache/**', '.eslintcache', '.stylelintcache', // OS generated files '.DS_Store', '.DS_Store?', '._*', '.Spotlight-V100', '.Trashes', 'ehthumbs.db', 'Thumbs.db', 'desktop.ini', // IDE and editor files '.vscode/**', '.idea/**', '*.swp', '*.swo', '*~', '.project', '.classpath', '.settings/**', '*.sublime-project', '*.sublime-workspace', // Package manager files 'package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'composer.lock', 'Pipfile.lock', // Runtime and compiled files '*.pyc', '*.pyo', '*.pyd', '__pycache__/**', '*.class', '*.jar', '*.war', '*.ear', '*.o', '*.so', '*.dll', '*.exe', // Documentation build '_site/**', '.jekyll-cache/**', '.jekyll-metadata', // Flattener specific outputs 'flattened-codebase.xml', 'repomix-output.xml' ]; const combinedIgnores = [ ...gitignorePatterns, ...commonIgnorePatterns ]; // Use glob to recursively find all files, excluding common ignore patterns const files = await glob('**/*', { cwd: rootDir, nodir: true, // Only files, not directories dot: true, // Include hidden files follow: false, // Don't follow symbolic links ignore: combinedIgnores }); return files.map(file => path.resolve(rootDir, file)); } catch (error) { console.error('Error discovering files:', error.message); return []; } } /** * Parse .gitignore file and return ignore patterns * @param {string} gitignorePath - Path to .gitignore file * @returns {Promise} Array of ignore patterns */ async function parseGitignore(gitignorePath) { try { if (!await fs.pathExists(gitignorePath)) { return []; } const content = await fs.readFile(gitignorePath, 'utf8'); return content .split('\n') .map(line => line.trim()) .filter(line => line && !line.startsWith('#')) // Remove empty lines and comments .map(pattern => { // Convert gitignore patterns to glob patterns if (pattern.endsWith('/')) { return pattern + '**'; } return pattern; }); } catch (error) { console.error('Error parsing .gitignore:', error.message); return []; } } /** * Check if a file is binary using file command and heuristics * @param {string} filePath - Path to the file * @returns {Promise} True if file is binary */ async function isBinaryFile(filePath) { try { // First check by file extension const binaryExtensions = [ '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.ico', '.svg', '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.zip', '.tar', '.gz', '.rar', '.7z', '.exe', '.dll', '.so', '.dylib', '.mp3', '.mp4', '.avi', '.mov', '.wav', '.ttf', '.otf', '.woff', '.woff2', '.bin', '.dat', '.db', '.sqlite' ]; const ext = path.extname(filePath).toLowerCase(); if (binaryExtensions.includes(ext)) { return true; } // For files without clear extensions, try to read a small sample const stats = await fs.stat(filePath); if (stats.size === 0) { return false; // Empty files are considered text } // Read first 1024 bytes to check for null bytes const sampleSize = Math.min(1024, stats.size); const buffer = await fs.readFile(filePath, { encoding: null, flag: 'r' }); const sample = buffer.slice(0, sampleSize); // If we find null bytes, it's likely binary return sample.includes(0); } catch (error) { console.warn(`Warning: Could not determine if file is binary: ${filePath} - ${error.message}`); return false; // Default to text if we can't determine } } /** * Read and aggregate content from text files * @param {string[]} files - Array of file paths * @param {string} rootDir - The root directory * @param {Object} spinner - Optional spinner instance for progress display * @returns {Promise} Object containing file contents and metadata */ async function aggregateFileContents(files, rootDir, spinner = null) { const results = { textFiles: [], binaryFiles: [], errors: [], totalFiles: files.length, processedFiles: 0 }; for (const filePath of files) { try { const relativePath = path.relative(rootDir, filePath); // Update progress indicator if (spinner) { spinner.text = `Processing file ${results.processedFiles + 1}/${results.totalFiles}: ${relativePath}`; } const isBinary = await isBinaryFile(filePath); if (isBinary) { results.binaryFiles.push({ path: relativePath, absolutePath: filePath, size: (await fs.stat(filePath)).size }); } else { // Read text file content const content = await fs.readFile(filePath, 'utf8'); results.textFiles.push({ path: relativePath, absolutePath: filePath, content: content, size: content.length, lines: content.split('\n').length }); } results.processedFiles++; } catch (error) { const relativePath = path.relative(rootDir, filePath); const errorInfo = { path: relativePath, absolutePath: filePath, error: error.message }; results.errors.push(errorInfo); // Log warning without interfering with spinner if (spinner) { spinner.warn(`Warning: Could not read file ${relativePath}: ${error.message}`); } else { console.warn(`Warning: Could not read file ${relativePath}: ${error.message}`); } results.processedFiles++; } } return results; } /** * Generate XML output with aggregated file contents using streaming * @param {Object} aggregatedContent - The aggregated content object * @param {string} outputPath - The output file path * @returns {Promise} Promise that resolves when writing is complete */ async function generateXMLOutput(aggregatedContent, outputPath) { const { textFiles } = aggregatedContent; // Create write stream for efficient memory usage const writeStream = fs.createWriteStream(outputPath, { encoding: 'utf8' }); return new Promise((resolve, reject) => { writeStream.on('error', reject); writeStream.on('finish', resolve); // Write XML header writeStream.write('\n'); writeStream.write('\n'); // Process files one by one to minimize memory usage let fileIndex = 0; const writeNextFile = () => { if (fileIndex >= textFiles.length) { // All files processed, close XML and stream writeStream.write('\n'); writeStream.end(); return; } const file = textFiles[fileIndex]; fileIndex++; // Write file opening tag writeStream.write(` `); // Use CDATA for code content, handling CDATA end sequences properly if (file.content?.trim()) { const indentedContent = indentFileContent(file.content); if (file.content.includes(']]>')) { // If content contains ]]>, split it and wrap each part in CDATA writeStream.write(splitAndWrapCDATA(indentedContent)); } else { writeStream.write(``); } } else if (file.content) { // Handle empty or whitespace-only content const indentedContent = indentFileContent(file.content); writeStream.write(``); } // Write file closing tag writeStream.write('\n'); // Continue with next file on next tick to avoid stack overflow setImmediate(writeNextFile); }; // Start processing files writeNextFile(); }); } /** * Escape XML special characters for attributes * @param {string} str - String to escape * @returns {string} Escaped string */ function escapeXml(str) { if (typeof str !== 'string') { return String(str); } return str .replace(/&/g, '&') .replace(//g, '>') .replace(/"/g, '"') .replace(/'/g, '''); } /** * Indent file content with 4 spaces for each line * @param {string} content - Content to indent * @returns {string} Indented content */ function indentFileContent(content) { if (typeof content !== 'string') { return String(content); } // Split content into lines and add 4 spaces of indentation to each line return content.split('\n').map(line => ` ${line}`).join('\n'); } /** * Split content containing ]]> and wrap each part in CDATA * @param {string} content - Content to process * @returns {string} Content with properly wrapped CDATA sections */ function splitAndWrapCDATA(content) { if (typeof content !== 'string') { return String(content); } // Replace ]]> with ]]]]> to escape it within CDATA const escapedContent = content.replace(/]]>/g, ']]]]>'); return ``; } /** * Calculate statistics for the processed files * @param {Object} aggregatedContent - The aggregated content object * @param {number} xmlFileSize - The size of the generated XML file in bytes * @returns {Object} Statistics object */ function calculateStatistics(aggregatedContent, xmlFileSize) { const { textFiles, binaryFiles, errors } = aggregatedContent; // Calculate total file size in bytes const totalTextSize = textFiles.reduce((sum, file) => sum + file.size, 0); const totalBinarySize = binaryFiles.reduce((sum, file) => sum + file.size, 0); const totalSize = totalTextSize + totalBinarySize; // Calculate total lines of code const totalLines = textFiles.reduce((sum, file) => sum + file.lines, 0); // Estimate token count (rough approximation: 1 token ā‰ˆ 4 characters) const estimatedTokens = Math.ceil(xmlFileSize / 4); // Format file size const formatSize = (bytes) => { if (bytes < 1024) return `${bytes} B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; }; return { totalFiles: textFiles.length + binaryFiles.length, textFiles: textFiles.length, binaryFiles: binaryFiles.length, errorFiles: errors.length, totalSize: formatSize(totalSize), xmlSize: formatSize(xmlFileSize), totalLines, estimatedTokens: estimatedTokens.toLocaleString() }; } /** * Filter files based on .gitignore patterns * @param {string[]} files - Array of file paths * @param {string} rootDir - The root directory * @returns {Promise} Filtered array of file paths */ async function filterFiles(files, rootDir) { const gitignorePath = path.join(rootDir, '.gitignore'); const ignorePatterns = await parseGitignore(gitignorePath); if (ignorePatterns.length === 0) { return files; } // Convert absolute paths to relative for pattern matching const relativeFiles = files.map(file => path.relative(rootDir, file)); // Separate positive and negative patterns const positivePatterns = ignorePatterns.filter(p => !p.startsWith('!')); const negativePatterns = ignorePatterns.filter(p => p.startsWith('!')).map(p => p.slice(1)); // Filter out files that match ignore patterns const filteredRelative = []; for (const file of relativeFiles) { let shouldIgnore = false; // First check positive patterns (ignore these files) for (const pattern of positivePatterns) { if (minimatch(file, pattern)) { shouldIgnore = true; break; } } // Then check negative patterns (don't ignore these files even if they match positive patterns) if (shouldIgnore) { for (const pattern of negativePatterns) { if (minimatch(file, pattern)) { shouldIgnore = false; break; } } } if (!shouldIgnore) { filteredRelative.push(file); } } // Convert back to absolute paths return filteredRelative.map(file => path.resolve(rootDir, file)); } const program = new Command(); program .name('bmad-flatten') .description('BMad-Method codebase flattener tool') .version('1.0.0') .option('-i, --input ', 'Input directory to flatten', process.cwd()) .option('-o, --output ', 'Output file path', 'flattened-codebase.xml') .action(async (options) => { const inputDir = path.resolve(options.input); const outputPath = path.resolve(options.output); console.log(`Flattening codebase from: ${inputDir}`); console.log(`Output file: ${outputPath}`); try { // Verify input directory exists if (!await fs.pathExists(inputDir)) { console.error(`āŒ Error: Input directory does not exist: ${inputDir}`); process.exit(1); } // Import ora dynamically const { default: ora } = await import('ora'); // Start file discovery with spinner const discoverySpinner = ora('šŸ” Discovering files...').start(); const files = await discoverFiles(inputDir); const filteredFiles = await filterFiles(files, inputDir); discoverySpinner.succeed(`šŸ“ Found ${filteredFiles.length} files to include`); // Process files with progress tracking console.log('Reading file contents'); const processingSpinner = ora('šŸ“„ Processing files...').start(); const aggregatedContent = await aggregateFileContents(filteredFiles, inputDir, processingSpinner); processingSpinner.succeed(`āœ… Processed ${aggregatedContent.processedFiles}/${filteredFiles.length} files`); // Log processing results for test validation console.log(`Processed ${aggregatedContent.processedFiles}/${filteredFiles.length} files`); if (aggregatedContent.errors.length > 0) { console.log(`Errors: ${aggregatedContent.errors.length}`); } console.log(`Text files: ${aggregatedContent.textFiles.length}`); if (aggregatedContent.binaryFiles.length > 0) { console.log(`Binary files: ${aggregatedContent.binaryFiles.length}`); } // Generate XML output using streaming const xmlSpinner = ora('šŸ”§ Generating XML output...').start(); await generateXMLOutput(aggregatedContent, outputPath); xmlSpinner.succeed('šŸ“ XML generation completed'); // Calculate and display statistics const outputStats = await fs.stat(outputPath); const stats = calculateStatistics(aggregatedContent, outputStats.size); // Display completion summary console.log('\nšŸ“Š Completion Summary:'); console.log(`āœ… Successfully processed ${filteredFiles.length} files into ${path.basename(outputPath)}`); console.log(`šŸ“ Output file: ${outputPath}`); console.log(`šŸ“ Total source size: ${stats.totalSize}`); console.log(`šŸ“„ Generated XML size: ${stats.xmlSize}`); console.log(`šŸ“ Total lines of code: ${stats.totalLines.toLocaleString()}`); console.log(`šŸ”¢ Estimated tokens: ${stats.estimatedTokens}`); console.log(`šŸ“Š File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors`); } catch (error) { console.error('āŒ Critical error:', error.message); console.error('An unexpected error occurred.'); process.exit(1); } }); if (require.main === module) { program.parse(); } module.exports = program;