#!/usr/bin/env node const { Command } = require("commander"); const fs = require("fs-extra"); const path = require("node:path"); const process = require("node:process"); // Modularized components const { findProjectRoot } = require("./projectRoot.js"); const { promptYesNo, promptPath } = require("./prompts.js"); const { discoverFiles, filterFiles, aggregateFileContents, } = require("./files.js"); const { generateXMLOutput } = require("./xml.js"); const { calculateStatistics } = require("./stats.js"); /** * Recursively discover all files in a directory * @param {string} rootDir - The root directory to scan * @returns {Promise} Array of file paths */ /** * Parse .gitignore file and return ignore patterns * @param {string} gitignorePath - Path to .gitignore file * @returns {Promise} Array of ignore patterns */ /** * Check if a file is binary using file command and heuristics * @param {string} filePath - Path to the file * @returns {Promise} True if file is binary */ /** * Read and aggregate content from text files * @param {string[]} files - Array of file paths * @param {string} rootDir - The root directory * @param {Object} spinner - Optional spinner instance for progress display * @returns {Promise} Object containing file contents and metadata */ /** * Generate XML output with aggregated file contents using streaming * @param {Object} aggregatedContent - The aggregated content object * @param {string} outputPath - The output file path * @returns {Promise} Promise that resolves when writing is complete */ /** * Calculate statistics for the processed files * @param {Object} aggregatedContent - The aggregated content object * @param {number} xmlFileSize - The size of the generated XML file in bytes * @returns {Object} Statistics object */ /** * Filter files based on .gitignore patterns * @param {string[]} files - Array of file paths * @param {string} rootDir - The root directory * @returns {Promise} Filtered array of file paths */ /** * Attempt to find the project root by walking up from startDir * Looks for common project markers like .git, package.json, pyproject.toml, etc. * @param {string} startDir * @returns {Promise} project root directory or null if not found */ const program = new Command(); program .name("bmad-flatten") .description("BMad-Method codebase flattener tool") .version("1.0.0") .option("-i, --input ", "Input directory to flatten", process.cwd()) .option("-o, --output ", "Output file path", "flattened-codebase.xml") .action(async (options) => { let inputDir = path.resolve(options.input); let outputPath = path.resolve(options.output); // Detect if user explicitly provided -i/--input or -o/--output const argv = process.argv.slice(2); const userSpecifiedInput = argv.some((a) => a === "-i" || a === "--input" || a.startsWith("--input=") ); const userSpecifiedOutput = argv.some((a) => a === "-o" || a === "--output" || a.startsWith("--output=") ); const noPathArgs = !userSpecifiedInput && !userSpecifiedOutput; if (noPathArgs) { const detectedRoot = await findProjectRoot(process.cwd()); const suggestedOutput = detectedRoot ? path.join(detectedRoot, "flattened-codebase.xml") : path.resolve("flattened-codebase.xml"); if (detectedRoot) { const useDefaults = await promptYesNo( `Detected project root at "${detectedRoot}". Use it as input and write output to "${suggestedOutput}"?`, true, ); if (useDefaults) { inputDir = detectedRoot; outputPath = suggestedOutput; } else { inputDir = await promptPath( "Enter input directory path", process.cwd(), ); outputPath = await promptPath( "Enter output file path", path.join(inputDir, "flattened-codebase.xml"), ); } } else { console.log("Could not auto-detect a project root."); inputDir = await promptPath( "Enter input directory path", process.cwd(), ); outputPath = await promptPath( "Enter output file path", path.join(inputDir, "flattened-codebase.xml"), ); } } // Ensure output directory exists await fs.ensureDir(path.dirname(outputPath)); try { // Verify input directory exists if (!await fs.pathExists(inputDir)) { console.error(`โŒ Error: Input directory does not exist: ${inputDir}`); process.exit(1); } // Import ora dynamically const { default: ora } = await import("ora"); // Start file discovery with spinner const discoverySpinner = ora("๐Ÿ” Discovering files...").start(); const files = await discoverFiles(inputDir); const filteredFiles = await filterFiles(files, inputDir); discoverySpinner.succeed( `๐Ÿ“ Found ${filteredFiles.length} files to include`, ); // Process files with progress tracking const processingSpinner = ora("๐Ÿ“„ Processing files...").start(); const aggregatedContent = await aggregateFileContents( filteredFiles, inputDir, processingSpinner, ); processingSpinner.succeed( `โœ… Processed ${aggregatedContent.processedFiles}/${filteredFiles.length} files`, ); if (aggregatedContent.errors.length > 0) { console.log(`Errors: ${aggregatedContent.errors.length}`); } // Generate XML output using streaming const xmlSpinner = ora("๐Ÿ”ง Generating XML output...").start(); await generateXMLOutput(aggregatedContent, outputPath); xmlSpinner.succeed("๐Ÿ“ XML generation completed"); // Calculate and display statistics const outputStats = await fs.stat(outputPath); const stats = await calculateStatistics( aggregatedContent, outputStats.size, inputDir, ); // Display completion summary console.log("\n๐Ÿ“Š Completion Summary:"); console.log( `โœ… Successfully processed ${filteredFiles.length} files into ${ path.basename(outputPath) }`, ); console.log(`๐Ÿ“ Output file: ${outputPath}`); console.log(`๐Ÿ“ Total source size: ${stats.totalSize}`); console.log(`๐Ÿ“„ Generated XML size: ${stats.xmlSize}`); console.log( `๐Ÿ“ Total lines of code: ${stats.totalLines.toLocaleString()}`, ); console.log(`๐Ÿ”ข Estimated tokens: ${stats.estimatedTokens}`); console.log( `๐Ÿ“Š File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors\n`, ); // Ask user if they want detailed stats + markdown report const generateDetailed = await promptYesNo( "Generate detailed stats (console + markdown) now?", true, ); if (generateDetailed) { // Additional detailed stats console.log("\n๐Ÿ“ˆ Size Percentiles:"); console.log( ` Avg: ${ Math.round(stats.avgFileSize).toLocaleString() } B, Median: ${ Math.round(stats.medianFileSize).toLocaleString() } B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`, ); if (Array.isArray(stats.histogram) && stats.histogram.length) { console.log("\n๐Ÿงฎ Size Histogram:"); for (const b of stats.histogram.slice(0, 2)) { console.log( ` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`, ); } if (stats.histogram.length > 2) { console.log(` โ€ฆ and ${stats.histogram.length - 2} more buckets`); } } if (Array.isArray(stats.byExtension) && stats.byExtension.length) { const topExt = stats.byExtension.slice(0, 2); console.log("\n๐Ÿ“ฆ Top Extensions:"); for (const e of topExt) { const pct = stats.totalBytes ? ((e.bytes / stats.totalBytes) * 100) : 0; console.log( ` ${e.ext}: ${e.count} files, ${e.bytes.toLocaleString()} bytes (${ pct.toFixed(2) }%)`, ); } if (stats.byExtension.length > 2) { console.log( ` โ€ฆ and ${stats.byExtension.length - 2} more extensions`, ); } } if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) { const topDir = stats.byDirectory.slice(0, 2); console.log("\n๐Ÿ“‚ Top Directories:"); for (const d of topDir) { const pct = stats.totalBytes ? ((d.bytes / stats.totalBytes) * 100) : 0; console.log( ` ${d.dir}: ${d.count} files, ${d.bytes.toLocaleString()} bytes (${ pct.toFixed(2) }%)`, ); } if (stats.byDirectory.length > 2) { console.log( ` โ€ฆ and ${stats.byDirectory.length - 2} more directories`, ); } } if ( Array.isArray(stats.depthDistribution) && stats.depthDistribution.length ) { console.log("\n๐ŸŒณ Depth Distribution:"); const dd = stats.depthDistribution.slice(0, 2); let line = " " + dd.map((d) => `${d.depth}:${d.count}`).join(" "); if (stats.depthDistribution.length > 2) { line += ` โ€ฆ +${stats.depthDistribution.length - 2} more`; } console.log(line); } if (Array.isArray(stats.longestPaths) && stats.longestPaths.length) { console.log("\n๐Ÿงต Longest Paths:"); for (const p of stats.longestPaths.slice(0, 2)) { console.log( ` ${p.path} (${p.length} chars, ${p.size.toLocaleString()} bytes)`, ); } if (stats.longestPaths.length > 2) { console.log(` โ€ฆ and ${stats.longestPaths.length - 2} more paths`); } } if (stats.temporal) { console.log("\nโฑ๏ธ Temporal:"); if (stats.temporal.oldest) { console.log( ` Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`, ); } if (stats.temporal.newest) { console.log( ` Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`, ); } if (Array.isArray(stats.temporal.ageBuckets)) { console.log(" Age buckets:"); for (const b of stats.temporal.ageBuckets.slice(0, 2)) { console.log( ` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`, ); } if (stats.temporal.ageBuckets.length > 2) { console.log( ` โ€ฆ and ${ stats.temporal.ageBuckets.length - 2 } more buckets`, ); } } } if (stats.quality) { console.log("\nโœ… Quality Signals:"); console.log(` Zero-byte files: ${stats.quality.zeroByteFiles}`); console.log(` Empty text files: ${stats.quality.emptyTextFiles}`); console.log(` Hidden files: ${stats.quality.hiddenFiles}`); console.log(` Symlinks: ${stats.quality.symlinks}`); console.log( ` Large files (>= ${ (stats.quality.largeThreshold / (1024 * 1024)).toFixed(0) } MB): ${stats.quality.largeFilesCount}`, ); console.log( ` Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`, ); } if ( Array.isArray(stats.duplicateCandidates) && stats.duplicateCandidates.length ) { console.log("\n๐Ÿงฌ Duplicate Candidates:"); for (const d of stats.duplicateCandidates.slice(0, 2)) { console.log( ` ${d.reason}: ${d.count} files @ ${d.size.toLocaleString()} bytes`, ); } if (stats.duplicateCandidates.length > 2) { console.log( ` โ€ฆ and ${stats.duplicateCandidates.length - 2} more groups`, ); } } if (typeof stats.compressibilityRatio === "number") { console.log( `\n๐Ÿ—œ๏ธ Compressibility ratio (sampled): ${ (stats.compressibilityRatio * 100).toFixed(2) }%`, ); } if (stats.git && stats.git.isRepo) { console.log("\n๐Ÿ”ง Git:"); console.log( ` Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`, ); console.log( ` Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`, ); if ( Array.isArray(stats.git.lfsCandidates) && stats.git.lfsCandidates.length ) { console.log(" LFS candidates (top 2):"); for (const f of stats.git.lfsCandidates.slice(0, 2)) { console.log(` ${f.path} (${f.size.toLocaleString()} bytes)`); } if (stats.git.lfsCandidates.length > 2) { console.log( ` โ€ฆ and ${stats.git.lfsCandidates.length - 2} more`, ); } } } if (Array.isArray(stats.largestFiles) && stats.largestFiles.length) { console.log("\n๐Ÿ“š Largest Files (top 2):"); for (const f of stats.largestFiles.slice(0, 2)) { // Show LOC for text files when available; omit ext and mtime let locStr = ""; if (!f.isBinary && Array.isArray(aggregatedContent?.textFiles)) { const tf = aggregatedContent.textFiles.find((t) => t.path === f.path ); if (tf && typeof tf.lines === "number") { locStr = `, LOC: ${tf.lines.toLocaleString()}`; } } console.log( ` ${f.path} โ€“ ${f.sizeFormatted} (${ f.percentOfTotal.toFixed(2) }%)${locStr}`, ); } if (stats.largestFiles.length > 2) { console.log(` โ€ฆ and ${stats.largestFiles.length - 2} more files`); } } // Write a comprehensive markdown report next to the XML { const mdPath = outputPath.endsWith(".xml") ? outputPath.replace(/\.xml$/i, ".stats.md") : outputPath + ".stats.md"; try { const pct = (num, den) => (den ? ((num / den) * 100) : 0); const md = []; md.push(`# ๐Ÿงพ Flatten Stats for ${path.basename(outputPath)}`); md.push(""); md.push("## ๐Ÿ“Š Summary"); md.push(`- Total source size: ${stats.totalSize}`); md.push(`- Generated XML size: ${stats.xmlSize}`); md.push( `- Total lines of code: ${stats.totalLines.toLocaleString()}`, ); md.push(`- Estimated tokens: ${stats.estimatedTokens}`); md.push( `- File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors`, ); md.push(""); // Percentiles md.push("## ๐Ÿ“ˆ Size Percentiles"); md.push( `Avg: ${ Math.round(stats.avgFileSize).toLocaleString() } B, Median: ${ Math.round(stats.medianFileSize).toLocaleString() } B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`, ); md.push(""); // Histogram if (Array.isArray(stats.histogram) && stats.histogram.length) { md.push("## ๐Ÿงฎ Size Histogram"); md.push("| Bucket | Files | Bytes |"); md.push("| --- | ---: | ---: |"); for (const b of stats.histogram) { md.push( `| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`, ); } md.push(""); } // Top Extensions if (Array.isArray(stats.byExtension) && stats.byExtension.length) { md.push("## ๐Ÿ“ฆ Top Extensions by Bytes (Top 20)"); md.push("| Ext | Files | Bytes | % of total |"); md.push("| --- | ---: | ---: | ---: |"); for (const e of stats.byExtension.slice(0, 20)) { const p = pct(e.bytes, stats.totalBytes); md.push( `| ${e.ext} | ${e.count} | ${e.bytes.toLocaleString()} | ${ p.toFixed(2) }% |`, ); } md.push(""); } // Top Directories if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) { md.push("## ๐Ÿ“‚ Top Directories by Bytes (Top 20)"); md.push("| Directory | Files | Bytes | % of total |"); md.push("| --- | ---: | ---: | ---: |"); for (const d of stats.byDirectory.slice(0, 20)) { const p = pct(d.bytes, stats.totalBytes); md.push( `| ${d.dir} | ${d.count} | ${d.bytes.toLocaleString()} | ${ p.toFixed(2) }% |`, ); } md.push(""); } // Depth distribution if ( Array.isArray(stats.depthDistribution) && stats.depthDistribution.length ) { md.push("## ๐ŸŒณ Depth Distribution"); md.push("| Depth | Count |"); md.push("| ---: | ---: |"); for (const d of stats.depthDistribution) { md.push(`| ${d.depth} | ${d.count} |`); } md.push(""); } // Longest paths if ( Array.isArray(stats.longestPaths) && stats.longestPaths.length ) { md.push("## ๐Ÿงต Longest Paths (Top 25)"); md.push("| Path | Length | Bytes |"); md.push("| --- | ---: | ---: |"); for (const pth of stats.longestPaths) { md.push( `| ${pth.path} | ${pth.length} | ${pth.size.toLocaleString()} |`, ); } md.push(""); } // Temporal if (stats.temporal) { md.push("## โฑ๏ธ Temporal"); if (stats.temporal.oldest) { md.push( `- Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`, ); } if (stats.temporal.newest) { md.push( `- Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`, ); } if (Array.isArray(stats.temporal.ageBuckets)) { md.push(""); md.push("| Age | Files | Bytes |"); md.push("| --- | ---: | ---: |"); for (const b of stats.temporal.ageBuckets) { md.push( `| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`, ); } } md.push(""); } // Quality signals if (stats.quality) { md.push("## โœ… Quality Signals"); md.push(`- Zero-byte files: ${stats.quality.zeroByteFiles}`); md.push(`- Empty text files: ${stats.quality.emptyTextFiles}`); md.push(`- Hidden files: ${stats.quality.hiddenFiles}`); md.push(`- Symlinks: ${stats.quality.symlinks}`); md.push( `- Large files (>= ${ (stats.quality.largeThreshold / (1024 * 1024)).toFixed(0) } MB): ${stats.quality.largeFilesCount}`, ); md.push( `- Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`, ); md.push(""); } // Duplicates if ( Array.isArray(stats.duplicateCandidates) && stats.duplicateCandidates.length ) { md.push("## ๐Ÿงฌ Duplicate Candidates"); md.push("| Reason | Files | Size (bytes) |"); md.push("| --- | ---: | ---: |"); for (const d of stats.duplicateCandidates) { md.push( `| ${d.reason} | ${d.count} | ${d.size.toLocaleString()} |`, ); } md.push(""); // Detailed listing of duplicate file names and locations md.push("### ๐Ÿงฌ Duplicate Groups Details"); let dupIndex = 1; for (const d of stats.duplicateCandidates) { md.push( `#### Group ${dupIndex}: ${d.count} files @ ${d.size.toLocaleString()} bytes (${d.reason})`, ); if (Array.isArray(d.files) && d.files.length) { for (const fp of d.files) { md.push(`- ${fp}`); } } else { md.push("- (file list unavailable)"); } md.push(""); dupIndex++; } md.push(""); } // Compressibility if (typeof stats.compressibilityRatio === "number") { md.push("## ๐Ÿ—œ๏ธ Compressibility"); md.push( `Sampled compressibility ratio: ${ (stats.compressibilityRatio * 100).toFixed(2) }%`, ); md.push(""); } // Git if (stats.git && stats.git.isRepo) { md.push("## ๐Ÿ”ง Git"); md.push( `- Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`, ); md.push( `- Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`, ); if ( Array.isArray(stats.git.lfsCandidates) && stats.git.lfsCandidates.length ) { md.push(""); md.push("### ๐Ÿ“ฆ LFS Candidates (Top 20)"); md.push("| Path | Bytes |"); md.push("| --- | ---: |"); for (const f of stats.git.lfsCandidates.slice(0, 20)) { md.push(`| ${f.path} | ${f.size.toLocaleString()} |`); } } md.push(""); } // Largest Files if ( Array.isArray(stats.largestFiles) && stats.largestFiles.length ) { md.push("## ๐Ÿ“š Largest Files (Top 50)"); md.push("| Path | Size | % of total | LOC |"); md.push("| --- | ---: | ---: | ---: |"); for (const f of stats.largestFiles) { let loc = ""; if ( !f.isBinary && Array.isArray(aggregatedContent?.textFiles) ) { const tf = aggregatedContent.textFiles.find((t) => t.path === f.path ); if (tf && typeof tf.lines === "number") { loc = tf.lines.toLocaleString(); } } md.push( `| ${f.path} | ${f.sizeFormatted} | ${ f.percentOfTotal.toFixed(2) }% | ${loc} |`, ); } md.push(""); } await fs.writeFile(mdPath, md.join("\n")); console.log(`\n๐Ÿงพ Detailed stats report written to: ${mdPath}`); } catch (e) { console.warn(`โš ๏ธ Failed to write stats markdown: ${e.message}`); } } } } catch (error) { console.error("โŒ Critical error:", error.message); console.error("An unexpected error occurred."); process.exit(1); } }); if (require.main === module) { program.parse(); } module.exports = program;