chore: add code formatting config and pre-commit hooks (#450)

This commit is contained in:
manjaroblack
2025-08-16 19:08:39 -05:00
committed by GitHub
parent 51284d6ecf
commit ed539432fb
130 changed files with 11886 additions and 10939 deletions

View File

@@ -1,20 +1,14 @@
#!/usr/bin/env node
const { Command } = require("commander");
const fs = require("fs-extra");
const path = require("node:path");
const process = require("node:process");
const { Command } = require('commander');
const fs = require('fs-extra');
const path = require('node:path');
const process = require('node:process');
// Modularized components
const { findProjectRoot } = require("./projectRoot.js");
const { promptYesNo, promptPath } = require("./prompts.js");
const {
discoverFiles,
filterFiles,
aggregateFileContents,
} = require("./files.js");
const { generateXMLOutput } = require("./xml.js");
const { calculateStatistics } = require("./stats.js");
const { findProjectRoot } = require('./projectRoot.js');
const { promptYesNo, promptPath } = require('./prompts.js');
const { discoverFiles, filterFiles, aggregateFileContents } = require('./files.js');
const { generateXMLOutput } = require('./xml.js');
const { calculateStatistics } = require('./stats.js');
/**
* Recursively discover all files in a directory
@@ -73,30 +67,30 @@ const { calculateStatistics } = require("./stats.js");
const program = new Command();
program
.name("bmad-flatten")
.description("BMad-Method codebase flattener tool")
.version("1.0.0")
.option("-i, --input <path>", "Input directory to flatten", process.cwd())
.option("-o, --output <path>", "Output file path", "flattened-codebase.xml")
.name('bmad-flatten')
.description('BMad-Method codebase flattener tool')
.version('1.0.0')
.option('-i, --input <path>', 'Input directory to flatten', process.cwd())
.option('-o, --output <path>', 'Output file path', 'flattened-codebase.xml')
.action(async (options) => {
let inputDir = path.resolve(options.input);
let outputPath = path.resolve(options.output);
// Detect if user explicitly provided -i/--input or -o/--output
const argv = process.argv.slice(2);
const userSpecifiedInput = argv.some((a) =>
a === "-i" || a === "--input" || a.startsWith("--input=")
const userSpecifiedInput = argv.some(
(a) => a === '-i' || a === '--input' || a.startsWith('--input='),
);
const userSpecifiedOutput = argv.some((a) =>
a === "-o" || a === "--output" || a.startsWith("--output=")
const userSpecifiedOutput = argv.some(
(a) => a === '-o' || a === '--output' || a.startsWith('--output='),
);
const noPathArgs = !userSpecifiedInput && !userSpecifiedOutput;
const noPathArguments = !userSpecifiedInput && !userSpecifiedOutput;
if (noPathArgs) {
if (noPathArguments) {
const detectedRoot = await findProjectRoot(process.cwd());
const suggestedOutput = detectedRoot
? path.join(detectedRoot, "flattened-codebase.xml")
: path.resolve("flattened-codebase.xml");
? path.join(detectedRoot, 'flattened-codebase.xml')
: path.resolve('flattened-codebase.xml');
if (detectedRoot) {
const useDefaults = await promptYesNo(
@@ -107,26 +101,25 @@ program
inputDir = detectedRoot;
outputPath = suggestedOutput;
} else {
inputDir = await promptPath(
"Enter input directory path",
process.cwd(),
);
inputDir = await promptPath('Enter input directory path', process.cwd());
outputPath = await promptPath(
"Enter output file path",
path.join(inputDir, "flattened-codebase.xml"),
'Enter output file path',
path.join(inputDir, 'flattened-codebase.xml'),
);
}
} else {
console.log("Could not auto-detect a project root.");
inputDir = await promptPath(
"Enter input directory path",
process.cwd(),
);
console.log('Could not auto-detect a project root.');
inputDir = await promptPath('Enter input directory path', process.cwd());
outputPath = await promptPath(
"Enter output file path",
path.join(inputDir, "flattened-codebase.xml"),
'Enter output file path',
path.join(inputDir, 'flattened-codebase.xml'),
);
}
} else {
console.error(
'Could not auto-detect a project root and no arguments were provided. Please specify -i/--input and -o/--output.',
);
process.exit(1);
}
// Ensure output directory exists
@@ -134,24 +127,23 @@ program
try {
// Verify input directory exists
if (!await fs.pathExists(inputDir)) {
if (!(await fs.pathExists(inputDir))) {
console.error(`❌ Error: Input directory does not exist: ${inputDir}`);
process.exit(1);
}
// Import ora dynamically
const { default: ora } = await import("ora");
const { default: ora } = await import('ora');
// Start file discovery with spinner
const discoverySpinner = ora("🔍 Discovering files...").start();
const discoverySpinner = ora('🔍 Discovering files...').start();
const files = await discoverFiles(inputDir);
const filteredFiles = await filterFiles(files, inputDir);
discoverySpinner.succeed(
`📁 Found ${filteredFiles.length} files to include`,
);
discoverySpinner.succeed(`📁 Found ${filteredFiles.length} files to include`);
// Process files with progress tracking
const processingSpinner = ora("📄 Processing files...").start();
console.log('Reading file contents');
const processingSpinner = ora('📄 Processing files...').start();
const aggregatedContent = await aggregateFileContents(
filteredFiles,
inputDir,
@@ -165,31 +157,23 @@ program
}
// Generate XML output using streaming
const xmlSpinner = ora("🔧 Generating XML output...").start();
const xmlSpinner = ora('🔧 Generating XML output...').start();
await generateXMLOutput(aggregatedContent, outputPath);
xmlSpinner.succeed("📝 XML generation completed");
xmlSpinner.succeed('📝 XML generation completed');
// Calculate and display statistics
const outputStats = await fs.stat(outputPath);
const stats = await calculateStatistics(
aggregatedContent,
outputStats.size,
inputDir,
);
const stats = await calculateStatistics(aggregatedContent, outputStats.size, inputDir);
// Display completion summary
console.log("\n📊 Completion Summary:");
console.log('\n📊 Completion Summary:');
console.log(
`✅ Successfully processed ${filteredFiles.length} files into ${
path.basename(outputPath)
}`,
`✅ Successfully processed ${filteredFiles.length} files into ${path.basename(outputPath)}`,
);
console.log(`📁 Output file: ${outputPath}`);
console.log(`📏 Total source size: ${stats.totalSize}`);
console.log(`📄 Generated XML size: ${stats.xmlSize}`);
console.log(
`📝 Total lines of code: ${stats.totalLines.toLocaleString()}`,
);
console.log(`📝 Total lines of code: ${stats.totalLines.toLocaleString()}`);
console.log(`🔢 Estimated tokens: ${stats.estimatedTokens}`);
console.log(
`📊 File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors\n`,
@@ -197,92 +181,75 @@ program
// Ask user if they want detailed stats + markdown report
const generateDetailed = await promptYesNo(
"Generate detailed stats (console + markdown) now?",
'Generate detailed stats (console + markdown) now?',
true,
);
if (generateDetailed) {
// Additional detailed stats
console.log("\n📈 Size Percentiles:");
console.log('\n📈 Size Percentiles:');
console.log(
` Avg: ${
Math.round(stats.avgFileSize).toLocaleString()
} B, Median: ${
Math.round(stats.medianFileSize).toLocaleString()
} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
` Avg: ${Math.round(stats.avgFileSize).toLocaleString()} B, Median: ${Math.round(
stats.medianFileSize,
).toLocaleString()} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
);
if (Array.isArray(stats.histogram) && stats.histogram.length) {
console.log("\n🧮 Size Histogram:");
if (Array.isArray(stats.histogram) && stats.histogram.length > 0) {
console.log('\n🧮 Size Histogram:');
for (const b of stats.histogram.slice(0, 2)) {
console.log(
` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`,
);
console.log(` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`);
}
if (stats.histogram.length > 2) {
console.log(` … and ${stats.histogram.length - 2} more buckets`);
}
}
if (Array.isArray(stats.byExtension) && stats.byExtension.length) {
if (Array.isArray(stats.byExtension) && stats.byExtension.length > 0) {
const topExt = stats.byExtension.slice(0, 2);
console.log("\n📦 Top Extensions:");
console.log('\n📦 Top Extensions:');
for (const e of topExt) {
const pct = stats.totalBytes
? ((e.bytes / stats.totalBytes) * 100)
: 0;
const pct = stats.totalBytes ? (e.bytes / stats.totalBytes) * 100 : 0;
console.log(
` ${e.ext}: ${e.count} files, ${e.bytes.toLocaleString()} bytes (${
pct.toFixed(2)
}%)`,
` ${e.ext}: ${e.count} files, ${e.bytes.toLocaleString()} bytes (${pct.toFixed(
2,
)}%)`,
);
}
if (stats.byExtension.length > 2) {
console.log(
` … and ${stats.byExtension.length - 2} more extensions`,
);
console.log(` … and ${stats.byExtension.length - 2} more extensions`);
}
}
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) {
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length > 0) {
const topDir = stats.byDirectory.slice(0, 2);
console.log("\n📂 Top Directories:");
console.log('\n📂 Top Directories:');
for (const d of topDir) {
const pct = stats.totalBytes
? ((d.bytes / stats.totalBytes) * 100)
: 0;
const pct = stats.totalBytes ? (d.bytes / stats.totalBytes) * 100 : 0;
console.log(
` ${d.dir}: ${d.count} files, ${d.bytes.toLocaleString()} bytes (${
pct.toFixed(2)
}%)`,
` ${d.dir}: ${d.count} files, ${d.bytes.toLocaleString()} bytes (${pct.toFixed(
2,
)}%)`,
);
}
if (stats.byDirectory.length > 2) {
console.log(
` … and ${stats.byDirectory.length - 2} more directories`,
);
console.log(` … and ${stats.byDirectory.length - 2} more directories`);
}
}
if (
Array.isArray(stats.depthDistribution) &&
stats.depthDistribution.length
) {
console.log("\n🌳 Depth Distribution:");
if (Array.isArray(stats.depthDistribution) && stats.depthDistribution.length > 0) {
console.log('\n🌳 Depth Distribution:');
const dd = stats.depthDistribution.slice(0, 2);
let line = " " + dd.map((d) => `${d.depth}:${d.count}`).join(" ");
let line = ' ' + dd.map((d) => `${d.depth}:${d.count}`).join(' ');
if (stats.depthDistribution.length > 2) {
line += ` … +${stats.depthDistribution.length - 2} more`;
}
console.log(line);
}
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length) {
console.log("\n🧵 Longest Paths:");
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length > 0) {
console.log('\n🧵 Longest Paths:');
for (const p of stats.longestPaths.slice(0, 2)) {
console.log(
` ${p.path} (${p.length} chars, ${p.size.toLocaleString()} bytes)`,
);
console.log(` ${p.path} (${p.length} chars, ${p.size.toLocaleString()} bytes)`);
}
if (stats.longestPaths.length > 2) {
console.log(` … and ${stats.longestPaths.length - 2} more paths`);
@@ -290,7 +257,7 @@ program
}
if (stats.temporal) {
console.log("\n⏱ Temporal:");
console.log('\n⏱ Temporal:');
if (stats.temporal.oldest) {
console.log(
` Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`,
@@ -302,104 +269,82 @@ program
);
}
if (Array.isArray(stats.temporal.ageBuckets)) {
console.log(" Age buckets:");
console.log(' Age buckets:');
for (const b of stats.temporal.ageBuckets.slice(0, 2)) {
console.log(
` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`,
);
console.log(` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`);
}
if (stats.temporal.ageBuckets.length > 2) {
console.log(
` … and ${
stats.temporal.ageBuckets.length - 2
} more buckets`,
);
console.log(` … and ${stats.temporal.ageBuckets.length - 2} more buckets`);
}
}
}
if (stats.quality) {
console.log("\n✅ Quality Signals:");
console.log('\n✅ Quality Signals:');
console.log(` Zero-byte files: ${stats.quality.zeroByteFiles}`);
console.log(` Empty text files: ${stats.quality.emptyTextFiles}`);
console.log(` Hidden files: ${stats.quality.hiddenFiles}`);
console.log(` Symlinks: ${stats.quality.symlinks}`);
console.log(
` Large files (>= ${
(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)
} MB): ${stats.quality.largeFilesCount}`,
` Large files (>= ${(stats.quality.largeThreshold / (1024 * 1024)).toFixed(
0,
)} MB): ${stats.quality.largeFilesCount}`,
);
console.log(
` Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`,
);
}
if (
Array.isArray(stats.duplicateCandidates) &&
stats.duplicateCandidates.length
) {
console.log("\n🧬 Duplicate Candidates:");
if (Array.isArray(stats.duplicateCandidates) && stats.duplicateCandidates.length > 0) {
console.log('\n🧬 Duplicate Candidates:');
for (const d of stats.duplicateCandidates.slice(0, 2)) {
console.log(
` ${d.reason}: ${d.count} files @ ${d.size.toLocaleString()} bytes`,
);
console.log(` ${d.reason}: ${d.count} files @ ${d.size.toLocaleString()} bytes`);
}
if (stats.duplicateCandidates.length > 2) {
console.log(
` … and ${stats.duplicateCandidates.length - 2} more groups`,
);
console.log(` … and ${stats.duplicateCandidates.length - 2} more groups`);
}
}
if (typeof stats.compressibilityRatio === "number") {
if (typeof stats.compressibilityRatio === 'number') {
console.log(
`\n🗜️ Compressibility ratio (sampled): ${
(stats.compressibilityRatio * 100).toFixed(2)
}%`,
`\n🗜️ Compressibility ratio (sampled): ${(stats.compressibilityRatio * 100).toFixed(
2,
)}%`,
);
}
if (stats.git && stats.git.isRepo) {
console.log("\n🔧 Git:");
console.log('\n🔧 Git:');
console.log(
` Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`,
);
console.log(
` Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`,
);
if (
Array.isArray(stats.git.lfsCandidates) &&
stats.git.lfsCandidates.length
) {
console.log(" LFS candidates (top 2):");
if (Array.isArray(stats.git.lfsCandidates) && stats.git.lfsCandidates.length > 0) {
console.log(' LFS candidates (top 2):');
for (const f of stats.git.lfsCandidates.slice(0, 2)) {
console.log(` ${f.path} (${f.size.toLocaleString()} bytes)`);
}
if (stats.git.lfsCandidates.length > 2) {
console.log(
` … and ${stats.git.lfsCandidates.length - 2} more`,
);
console.log(` … and ${stats.git.lfsCandidates.length - 2} more`);
}
}
}
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length) {
console.log("\n📚 Largest Files (top 2):");
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length > 0) {
console.log('\n📚 Largest Files (top 2):');
for (const f of stats.largestFiles.slice(0, 2)) {
// Show LOC for text files when available; omit ext and mtime
let locStr = "";
let locStr = '';
if (!f.isBinary && Array.isArray(aggregatedContent?.textFiles)) {
const tf = aggregatedContent.textFiles.find((t) =>
t.path === f.path
);
if (tf && typeof tf.lines === "number") {
const tf = aggregatedContent.textFiles.find((t) => t.path === f.path);
if (tf && typeof tf.lines === 'number') {
locStr = `, LOC: ${tf.lines.toLocaleString()}`;
}
}
console.log(
` ${f.path} ${f.sizeFormatted} (${
f.percentOfTotal.toFixed(2)
}%)${locStr}`,
` ${f.path} ${f.sizeFormatted} (${f.percentOfTotal.toFixed(2)}%)${locStr}`,
);
}
if (stats.largestFiles.length > 2) {
@@ -409,262 +354,214 @@ program
// Write a comprehensive markdown report next to the XML
{
const mdPath = outputPath.endsWith(".xml")
? outputPath.replace(/\.xml$/i, ".stats.md")
: outputPath + ".stats.md";
const mdPath = outputPath.endsWith('.xml')
? outputPath.replace(/\.xml$/i, '.stats.md')
: outputPath + '.stats.md';
try {
const pct = (num, den) => (den ? ((num / den) * 100) : 0);
const pct = (num, den) => (den ? (num / den) * 100 : 0);
const md = [];
md.push(`# 🧾 Flatten Stats for ${path.basename(outputPath)}`);
md.push("");
md.push("## 📊 Summary");
md.push(`- Total source size: ${stats.totalSize}`);
md.push(`- Generated XML size: ${stats.xmlSize}`);
md.push(
`# 🧾 Flatten Stats for ${path.basename(outputPath)}`,
'',
'## 📊 Summary',
`- Total source size: ${stats.totalSize}`,
`- Generated XML size: ${stats.xmlSize}`,
`- Total lines of code: ${stats.totalLines.toLocaleString()}`,
);
md.push(`- Estimated tokens: ${stats.estimatedTokens}`);
md.push(
`- Estimated tokens: ${stats.estimatedTokens}`,
`- File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors`,
'',
'## 📈 Size Percentiles',
`Avg: ${Math.round(stats.avgFileSize).toLocaleString()} B, Median: ${Math.round(
stats.medianFileSize,
).toLocaleString()} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
'',
);
md.push("");
// Percentiles
md.push("## 📈 Size Percentiles");
md.push(
`Avg: ${
Math.round(stats.avgFileSize).toLocaleString()
} B, Median: ${
Math.round(stats.medianFileSize).toLocaleString()
} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
);
md.push("");
// Histogram
if (Array.isArray(stats.histogram) && stats.histogram.length) {
md.push("## 🧮 Size Histogram");
md.push("| Bucket | Files | Bytes |");
md.push("| --- | ---: | ---: |");
if (Array.isArray(stats.histogram) && stats.histogram.length > 0) {
md.push(
'## 🧮 Size Histogram',
'| Bucket | Files | Bytes |',
'| --- | ---: | ---: |',
);
for (const b of stats.histogram) {
md.push(
`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`,
);
md.push(`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`);
}
md.push("");
md.push('');
}
// Top Extensions
if (Array.isArray(stats.byExtension) && stats.byExtension.length) {
md.push("## 📦 Top Extensions by Bytes (Top 20)");
md.push("| Ext | Files | Bytes | % of total |");
md.push("| --- | ---: | ---: | ---: |");
if (Array.isArray(stats.byExtension) && stats.byExtension.length > 0) {
md.push(
'## 📦 Top Extensions by Bytes (Top 20)',
'| Ext | Files | Bytes | % of total |',
'| --- | ---: | ---: | ---: |',
);
for (const e of stats.byExtension.slice(0, 20)) {
const p = pct(e.bytes, stats.totalBytes);
md.push(
`| ${e.ext} | ${e.count} | ${e.bytes.toLocaleString()} | ${
p.toFixed(2)
}% |`,
`| ${e.ext} | ${e.count} | ${e.bytes.toLocaleString()} | ${p.toFixed(2)}% |`,
);
}
md.push("");
md.push('');
}
// Top Directories
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) {
md.push("## 📂 Top Directories by Bytes (Top 20)");
md.push("| Directory | Files | Bytes | % of total |");
md.push("| --- | ---: | ---: | ---: |");
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length > 0) {
md.push(
'## 📂 Top Directories by Bytes (Top 20)',
'| Directory | Files | Bytes | % of total |',
'| --- | ---: | ---: | ---: |',
);
for (const d of stats.byDirectory.slice(0, 20)) {
const p = pct(d.bytes, stats.totalBytes);
md.push(
`| ${d.dir} | ${d.count} | ${d.bytes.toLocaleString()} | ${
p.toFixed(2)
}% |`,
`| ${d.dir} | ${d.count} | ${d.bytes.toLocaleString()} | ${p.toFixed(2)}% |`,
);
}
md.push("");
md.push('');
}
// Depth distribution
if (
Array.isArray(stats.depthDistribution) &&
stats.depthDistribution.length
) {
md.push("## 🌳 Depth Distribution");
md.push("| Depth | Count |");
md.push("| ---: | ---: |");
if (Array.isArray(stats.depthDistribution) && stats.depthDistribution.length > 0) {
md.push('## 🌳 Depth Distribution', '| Depth | Count |', '| ---: | ---: |');
for (const d of stats.depthDistribution) {
md.push(`| ${d.depth} | ${d.count} |`);
}
md.push("");
md.push('');
}
// Longest paths
if (
Array.isArray(stats.longestPaths) && stats.longestPaths.length
) {
md.push("## 🧵 Longest Paths (Top 25)");
md.push("| Path | Length | Bytes |");
md.push("| --- | ---: | ---: |");
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length > 0) {
md.push(
'## 🧵 Longest Paths (Top 25)',
'| Path | Length | Bytes |',
'| --- | ---: | ---: |',
);
for (const pth of stats.longestPaths) {
md.push(
`| ${pth.path} | ${pth.length} | ${pth.size.toLocaleString()} |`,
);
md.push(`| ${pth.path} | ${pth.length} | ${pth.size.toLocaleString()} |`);
}
md.push("");
md.push('');
}
// Temporal
if (stats.temporal) {
md.push("## ⏱️ Temporal");
md.push('## ⏱️ Temporal');
if (stats.temporal.oldest) {
md.push(
`- Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`,
);
md.push(`- Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`);
}
if (stats.temporal.newest) {
md.push(
`- Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`,
);
md.push(`- Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`);
}
if (Array.isArray(stats.temporal.ageBuckets)) {
md.push("");
md.push("| Age | Files | Bytes |");
md.push("| --- | ---: | ---: |");
md.push('', '| Age | Files | Bytes |', '| --- | ---: | ---: |');
for (const b of stats.temporal.ageBuckets) {
md.push(
`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`,
);
md.push(`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`);
}
}
md.push("");
md.push('');
}
// Quality signals
if (stats.quality) {
md.push("## ✅ Quality Signals");
md.push(`- Zero-byte files: ${stats.quality.zeroByteFiles}`);
md.push(`- Empty text files: ${stats.quality.emptyTextFiles}`);
md.push(`- Hidden files: ${stats.quality.hiddenFiles}`);
md.push(`- Symlinks: ${stats.quality.symlinks}`);
md.push(
`- Large files (>= ${
(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)
} MB): ${stats.quality.largeFilesCount}`,
);
md.push(
'## ✅ Quality Signals',
`- Zero-byte files: ${stats.quality.zeroByteFiles}`,
`- Empty text files: ${stats.quality.emptyTextFiles}`,
`- Hidden files: ${stats.quality.hiddenFiles}`,
`- Symlinks: ${stats.quality.symlinks}`,
`- Large files (>= ${(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)} MB): ${stats.quality.largeFilesCount}`,
`- Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`,
'',
);
md.push("");
}
// Duplicates
if (
Array.isArray(stats.duplicateCandidates) &&
stats.duplicateCandidates.length
) {
md.push("## 🧬 Duplicate Candidates");
md.push("| Reason | Files | Size (bytes) |");
md.push("| --- | ---: | ---: |");
if (Array.isArray(stats.duplicateCandidates) && stats.duplicateCandidates.length > 0) {
md.push(
'## 🧬 Duplicate Candidates',
'| Reason | Files | Size (bytes) |',
'| --- | ---: | ---: |',
);
for (const d of stats.duplicateCandidates) {
md.push(
`| ${d.reason} | ${d.count} | ${d.size.toLocaleString()} |`,
);
md.push(`| ${d.reason} | ${d.count} | ${d.size.toLocaleString()} |`);
}
md.push("");
// Detailed listing of duplicate file names and locations
md.push("### 🧬 Duplicate Groups Details");
md.push('', '### 🧬 Duplicate Groups Details');
let dupIndex = 1;
for (const d of stats.duplicateCandidates) {
md.push(
`#### Group ${dupIndex}: ${d.count} files @ ${d.size.toLocaleString()} bytes (${d.reason})`,
);
if (Array.isArray(d.files) && d.files.length) {
if (Array.isArray(d.files) && d.files.length > 0) {
for (const fp of d.files) {
md.push(`- ${fp}`);
}
} else {
md.push("- (file list unavailable)");
md.push('- (file list unavailable)');
}
md.push("");
md.push('');
dupIndex++;
}
md.push("");
md.push('');
}
// Compressibility
if (typeof stats.compressibilityRatio === "number") {
md.push("## 🗜️ Compressibility");
if (typeof stats.compressibilityRatio === 'number') {
md.push(
`Sampled compressibility ratio: ${
(stats.compressibilityRatio * 100).toFixed(2)
}%`,
'## 🗜️ Compressibility',
`Sampled compressibility ratio: ${(stats.compressibilityRatio * 100).toFixed(2)}%`,
'',
);
md.push("");
}
// Git
if (stats.git && stats.git.isRepo) {
md.push("## 🔧 Git");
md.push(
'## 🔧 Git',
`- Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`,
);
md.push(
`- Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`,
);
if (
Array.isArray(stats.git.lfsCandidates) &&
stats.git.lfsCandidates.length
) {
md.push("");
md.push("### 📦 LFS Candidates (Top 20)");
md.push("| Path | Bytes |");
md.push("| --- | ---: |");
if (Array.isArray(stats.git.lfsCandidates) && stats.git.lfsCandidates.length > 0) {
md.push('', '### 📦 LFS Candidates (Top 20)', '| Path | Bytes |', '| --- | ---: |');
for (const f of stats.git.lfsCandidates.slice(0, 20)) {
md.push(`| ${f.path} | ${f.size.toLocaleString()} |`);
}
}
md.push("");
md.push('');
}
// Largest Files
if (
Array.isArray(stats.largestFiles) && stats.largestFiles.length
) {
md.push("## 📚 Largest Files (Top 50)");
md.push("| Path | Size | % of total | LOC |");
md.push("| --- | ---: | ---: | ---: |");
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length > 0) {
md.push(
'## 📚 Largest Files (Top 50)',
'| Path | Size | % of total | LOC |',
'| --- | ---: | ---: | ---: |',
);
for (const f of stats.largestFiles) {
let loc = "";
if (
!f.isBinary && Array.isArray(aggregatedContent?.textFiles)
) {
const tf = aggregatedContent.textFiles.find((t) =>
t.path === f.path
);
if (tf && typeof tf.lines === "number") {
let loc = '';
if (!f.isBinary && Array.isArray(aggregatedContent?.textFiles)) {
const tf = aggregatedContent.textFiles.find((t) => t.path === f.path);
if (tf && typeof tf.lines === 'number') {
loc = tf.lines.toLocaleString();
}
}
md.push(
`| ${f.path} | ${f.sizeFormatted} | ${
f.percentOfTotal.toFixed(2)
}% | ${loc} |`,
`| ${f.path} | ${f.sizeFormatted} | ${f.percentOfTotal.toFixed(2)}% | ${loc} |`,
);
}
md.push("");
md.push('');
}
await fs.writeFile(mdPath, md.join("\n"));
await fs.writeFile(mdPath, md.join('\n'));
console.log(`\n🧾 Detailed stats report written to: ${mdPath}`);
} catch (e) {
console.warn(`⚠️ Failed to write stats markdown: ${e.message}`);
} catch (error) {
console.warn(`⚠️ Failed to write stats markdown: ${error.message}`);
}
}
}
} catch (error) {
console.error("❌ Critical error:", error.message);
console.error("An unexpected error occurred.");
console.error('❌ Critical error:', error.message);
console.error('An unexpected error occurred.');
process.exit(1);
}
});