chore: add code formatting config and pre-commit hooks (#450)
This commit is contained in:
@@ -1,20 +1,14 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
const { Command } = require("commander");
|
||||
const fs = require("fs-extra");
|
||||
const path = require("node:path");
|
||||
const process = require("node:process");
|
||||
const { Command } = require('commander');
|
||||
const fs = require('fs-extra');
|
||||
const path = require('node:path');
|
||||
const process = require('node:process');
|
||||
|
||||
// Modularized components
|
||||
const { findProjectRoot } = require("./projectRoot.js");
|
||||
const { promptYesNo, promptPath } = require("./prompts.js");
|
||||
const {
|
||||
discoverFiles,
|
||||
filterFiles,
|
||||
aggregateFileContents,
|
||||
} = require("./files.js");
|
||||
const { generateXMLOutput } = require("./xml.js");
|
||||
const { calculateStatistics } = require("./stats.js");
|
||||
const { findProjectRoot } = require('./projectRoot.js');
|
||||
const { promptYesNo, promptPath } = require('./prompts.js');
|
||||
const { discoverFiles, filterFiles, aggregateFileContents } = require('./files.js');
|
||||
const { generateXMLOutput } = require('./xml.js');
|
||||
const { calculateStatistics } = require('./stats.js');
|
||||
|
||||
/**
|
||||
* Recursively discover all files in a directory
|
||||
@@ -73,30 +67,30 @@ const { calculateStatistics } = require("./stats.js");
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name("bmad-flatten")
|
||||
.description("BMad-Method codebase flattener tool")
|
||||
.version("1.0.0")
|
||||
.option("-i, --input <path>", "Input directory to flatten", process.cwd())
|
||||
.option("-o, --output <path>", "Output file path", "flattened-codebase.xml")
|
||||
.name('bmad-flatten')
|
||||
.description('BMad-Method codebase flattener tool')
|
||||
.version('1.0.0')
|
||||
.option('-i, --input <path>', 'Input directory to flatten', process.cwd())
|
||||
.option('-o, --output <path>', 'Output file path', 'flattened-codebase.xml')
|
||||
.action(async (options) => {
|
||||
let inputDir = path.resolve(options.input);
|
||||
let outputPath = path.resolve(options.output);
|
||||
|
||||
// Detect if user explicitly provided -i/--input or -o/--output
|
||||
const argv = process.argv.slice(2);
|
||||
const userSpecifiedInput = argv.some((a) =>
|
||||
a === "-i" || a === "--input" || a.startsWith("--input=")
|
||||
const userSpecifiedInput = argv.some(
|
||||
(a) => a === '-i' || a === '--input' || a.startsWith('--input='),
|
||||
);
|
||||
const userSpecifiedOutput = argv.some((a) =>
|
||||
a === "-o" || a === "--output" || a.startsWith("--output=")
|
||||
const userSpecifiedOutput = argv.some(
|
||||
(a) => a === '-o' || a === '--output' || a.startsWith('--output='),
|
||||
);
|
||||
const noPathArgs = !userSpecifiedInput && !userSpecifiedOutput;
|
||||
const noPathArguments = !userSpecifiedInput && !userSpecifiedOutput;
|
||||
|
||||
if (noPathArgs) {
|
||||
if (noPathArguments) {
|
||||
const detectedRoot = await findProjectRoot(process.cwd());
|
||||
const suggestedOutput = detectedRoot
|
||||
? path.join(detectedRoot, "flattened-codebase.xml")
|
||||
: path.resolve("flattened-codebase.xml");
|
||||
? path.join(detectedRoot, 'flattened-codebase.xml')
|
||||
: path.resolve('flattened-codebase.xml');
|
||||
|
||||
if (detectedRoot) {
|
||||
const useDefaults = await promptYesNo(
|
||||
@@ -107,26 +101,25 @@ program
|
||||
inputDir = detectedRoot;
|
||||
outputPath = suggestedOutput;
|
||||
} else {
|
||||
inputDir = await promptPath(
|
||||
"Enter input directory path",
|
||||
process.cwd(),
|
||||
);
|
||||
inputDir = await promptPath('Enter input directory path', process.cwd());
|
||||
outputPath = await promptPath(
|
||||
"Enter output file path",
|
||||
path.join(inputDir, "flattened-codebase.xml"),
|
||||
'Enter output file path',
|
||||
path.join(inputDir, 'flattened-codebase.xml'),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
console.log("Could not auto-detect a project root.");
|
||||
inputDir = await promptPath(
|
||||
"Enter input directory path",
|
||||
process.cwd(),
|
||||
);
|
||||
console.log('Could not auto-detect a project root.');
|
||||
inputDir = await promptPath('Enter input directory path', process.cwd());
|
||||
outputPath = await promptPath(
|
||||
"Enter output file path",
|
||||
path.join(inputDir, "flattened-codebase.xml"),
|
||||
'Enter output file path',
|
||||
path.join(inputDir, 'flattened-codebase.xml'),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
console.error(
|
||||
'Could not auto-detect a project root and no arguments were provided. Please specify -i/--input and -o/--output.',
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Ensure output directory exists
|
||||
@@ -134,24 +127,23 @@ program
|
||||
|
||||
try {
|
||||
// Verify input directory exists
|
||||
if (!await fs.pathExists(inputDir)) {
|
||||
if (!(await fs.pathExists(inputDir))) {
|
||||
console.error(`❌ Error: Input directory does not exist: ${inputDir}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Import ora dynamically
|
||||
const { default: ora } = await import("ora");
|
||||
const { default: ora } = await import('ora');
|
||||
|
||||
// Start file discovery with spinner
|
||||
const discoverySpinner = ora("🔍 Discovering files...").start();
|
||||
const discoverySpinner = ora('🔍 Discovering files...').start();
|
||||
const files = await discoverFiles(inputDir);
|
||||
const filteredFiles = await filterFiles(files, inputDir);
|
||||
discoverySpinner.succeed(
|
||||
`📁 Found ${filteredFiles.length} files to include`,
|
||||
);
|
||||
discoverySpinner.succeed(`📁 Found ${filteredFiles.length} files to include`);
|
||||
|
||||
// Process files with progress tracking
|
||||
const processingSpinner = ora("📄 Processing files...").start();
|
||||
console.log('Reading file contents');
|
||||
const processingSpinner = ora('📄 Processing files...').start();
|
||||
const aggregatedContent = await aggregateFileContents(
|
||||
filteredFiles,
|
||||
inputDir,
|
||||
@@ -165,31 +157,23 @@ program
|
||||
}
|
||||
|
||||
// Generate XML output using streaming
|
||||
const xmlSpinner = ora("🔧 Generating XML output...").start();
|
||||
const xmlSpinner = ora('🔧 Generating XML output...').start();
|
||||
await generateXMLOutput(aggregatedContent, outputPath);
|
||||
xmlSpinner.succeed("📝 XML generation completed");
|
||||
xmlSpinner.succeed('📝 XML generation completed');
|
||||
|
||||
// Calculate and display statistics
|
||||
const outputStats = await fs.stat(outputPath);
|
||||
const stats = await calculateStatistics(
|
||||
aggregatedContent,
|
||||
outputStats.size,
|
||||
inputDir,
|
||||
);
|
||||
const stats = await calculateStatistics(aggregatedContent, outputStats.size, inputDir);
|
||||
|
||||
// Display completion summary
|
||||
console.log("\n📊 Completion Summary:");
|
||||
console.log('\n📊 Completion Summary:');
|
||||
console.log(
|
||||
`✅ Successfully processed ${filteredFiles.length} files into ${
|
||||
path.basename(outputPath)
|
||||
}`,
|
||||
`✅ Successfully processed ${filteredFiles.length} files into ${path.basename(outputPath)}`,
|
||||
);
|
||||
console.log(`📁 Output file: ${outputPath}`);
|
||||
console.log(`📏 Total source size: ${stats.totalSize}`);
|
||||
console.log(`📄 Generated XML size: ${stats.xmlSize}`);
|
||||
console.log(
|
||||
`📝 Total lines of code: ${stats.totalLines.toLocaleString()}`,
|
||||
);
|
||||
console.log(`📝 Total lines of code: ${stats.totalLines.toLocaleString()}`);
|
||||
console.log(`🔢 Estimated tokens: ${stats.estimatedTokens}`);
|
||||
console.log(
|
||||
`📊 File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors\n`,
|
||||
@@ -197,92 +181,75 @@ program
|
||||
|
||||
// Ask user if they want detailed stats + markdown report
|
||||
const generateDetailed = await promptYesNo(
|
||||
"Generate detailed stats (console + markdown) now?",
|
||||
'Generate detailed stats (console + markdown) now?',
|
||||
true,
|
||||
);
|
||||
|
||||
if (generateDetailed) {
|
||||
// Additional detailed stats
|
||||
console.log("\n📈 Size Percentiles:");
|
||||
console.log('\n📈 Size Percentiles:');
|
||||
console.log(
|
||||
` Avg: ${
|
||||
Math.round(stats.avgFileSize).toLocaleString()
|
||||
} B, Median: ${
|
||||
Math.round(stats.medianFileSize).toLocaleString()
|
||||
} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
|
||||
` Avg: ${Math.round(stats.avgFileSize).toLocaleString()} B, Median: ${Math.round(
|
||||
stats.medianFileSize,
|
||||
).toLocaleString()} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
|
||||
);
|
||||
|
||||
if (Array.isArray(stats.histogram) && stats.histogram.length) {
|
||||
console.log("\n🧮 Size Histogram:");
|
||||
if (Array.isArray(stats.histogram) && stats.histogram.length > 0) {
|
||||
console.log('\n🧮 Size Histogram:');
|
||||
for (const b of stats.histogram.slice(0, 2)) {
|
||||
console.log(
|
||||
` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`,
|
||||
);
|
||||
console.log(` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`);
|
||||
}
|
||||
if (stats.histogram.length > 2) {
|
||||
console.log(` … and ${stats.histogram.length - 2} more buckets`);
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(stats.byExtension) && stats.byExtension.length) {
|
||||
if (Array.isArray(stats.byExtension) && stats.byExtension.length > 0) {
|
||||
const topExt = stats.byExtension.slice(0, 2);
|
||||
console.log("\n📦 Top Extensions:");
|
||||
console.log('\n📦 Top Extensions:');
|
||||
for (const e of topExt) {
|
||||
const pct = stats.totalBytes
|
||||
? ((e.bytes / stats.totalBytes) * 100)
|
||||
: 0;
|
||||
const pct = stats.totalBytes ? (e.bytes / stats.totalBytes) * 100 : 0;
|
||||
console.log(
|
||||
` ${e.ext}: ${e.count} files, ${e.bytes.toLocaleString()} bytes (${
|
||||
pct.toFixed(2)
|
||||
}%)`,
|
||||
` ${e.ext}: ${e.count} files, ${e.bytes.toLocaleString()} bytes (${pct.toFixed(
|
||||
2,
|
||||
)}%)`,
|
||||
);
|
||||
}
|
||||
if (stats.byExtension.length > 2) {
|
||||
console.log(
|
||||
` … and ${stats.byExtension.length - 2} more extensions`,
|
||||
);
|
||||
console.log(` … and ${stats.byExtension.length - 2} more extensions`);
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) {
|
||||
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length > 0) {
|
||||
const topDir = stats.byDirectory.slice(0, 2);
|
||||
console.log("\n📂 Top Directories:");
|
||||
console.log('\n📂 Top Directories:');
|
||||
for (const d of topDir) {
|
||||
const pct = stats.totalBytes
|
||||
? ((d.bytes / stats.totalBytes) * 100)
|
||||
: 0;
|
||||
const pct = stats.totalBytes ? (d.bytes / stats.totalBytes) * 100 : 0;
|
||||
console.log(
|
||||
` ${d.dir}: ${d.count} files, ${d.bytes.toLocaleString()} bytes (${
|
||||
pct.toFixed(2)
|
||||
}%)`,
|
||||
` ${d.dir}: ${d.count} files, ${d.bytes.toLocaleString()} bytes (${pct.toFixed(
|
||||
2,
|
||||
)}%)`,
|
||||
);
|
||||
}
|
||||
if (stats.byDirectory.length > 2) {
|
||||
console.log(
|
||||
` … and ${stats.byDirectory.length - 2} more directories`,
|
||||
);
|
||||
console.log(` … and ${stats.byDirectory.length - 2} more directories`);
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
Array.isArray(stats.depthDistribution) &&
|
||||
stats.depthDistribution.length
|
||||
) {
|
||||
console.log("\n🌳 Depth Distribution:");
|
||||
if (Array.isArray(stats.depthDistribution) && stats.depthDistribution.length > 0) {
|
||||
console.log('\n🌳 Depth Distribution:');
|
||||
const dd = stats.depthDistribution.slice(0, 2);
|
||||
let line = " " + dd.map((d) => `${d.depth}:${d.count}`).join(" ");
|
||||
let line = ' ' + dd.map((d) => `${d.depth}:${d.count}`).join(' ');
|
||||
if (stats.depthDistribution.length > 2) {
|
||||
line += ` … +${stats.depthDistribution.length - 2} more`;
|
||||
}
|
||||
console.log(line);
|
||||
}
|
||||
|
||||
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length) {
|
||||
console.log("\n🧵 Longest Paths:");
|
||||
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length > 0) {
|
||||
console.log('\n🧵 Longest Paths:');
|
||||
for (const p of stats.longestPaths.slice(0, 2)) {
|
||||
console.log(
|
||||
` ${p.path} (${p.length} chars, ${p.size.toLocaleString()} bytes)`,
|
||||
);
|
||||
console.log(` ${p.path} (${p.length} chars, ${p.size.toLocaleString()} bytes)`);
|
||||
}
|
||||
if (stats.longestPaths.length > 2) {
|
||||
console.log(` … and ${stats.longestPaths.length - 2} more paths`);
|
||||
@@ -290,7 +257,7 @@ program
|
||||
}
|
||||
|
||||
if (stats.temporal) {
|
||||
console.log("\n⏱️ Temporal:");
|
||||
console.log('\n⏱️ Temporal:');
|
||||
if (stats.temporal.oldest) {
|
||||
console.log(
|
||||
` Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`,
|
||||
@@ -302,104 +269,82 @@ program
|
||||
);
|
||||
}
|
||||
if (Array.isArray(stats.temporal.ageBuckets)) {
|
||||
console.log(" Age buckets:");
|
||||
console.log(' Age buckets:');
|
||||
for (const b of stats.temporal.ageBuckets.slice(0, 2)) {
|
||||
console.log(
|
||||
` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`,
|
||||
);
|
||||
console.log(` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`);
|
||||
}
|
||||
if (stats.temporal.ageBuckets.length > 2) {
|
||||
console.log(
|
||||
` … and ${
|
||||
stats.temporal.ageBuckets.length - 2
|
||||
} more buckets`,
|
||||
);
|
||||
console.log(` … and ${stats.temporal.ageBuckets.length - 2} more buckets`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (stats.quality) {
|
||||
console.log("\n✅ Quality Signals:");
|
||||
console.log('\n✅ Quality Signals:');
|
||||
console.log(` Zero-byte files: ${stats.quality.zeroByteFiles}`);
|
||||
console.log(` Empty text files: ${stats.quality.emptyTextFiles}`);
|
||||
console.log(` Hidden files: ${stats.quality.hiddenFiles}`);
|
||||
console.log(` Symlinks: ${stats.quality.symlinks}`);
|
||||
console.log(
|
||||
` Large files (>= ${
|
||||
(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)
|
||||
} MB): ${stats.quality.largeFilesCount}`,
|
||||
` Large files (>= ${(stats.quality.largeThreshold / (1024 * 1024)).toFixed(
|
||||
0,
|
||||
)} MB): ${stats.quality.largeFilesCount}`,
|
||||
);
|
||||
console.log(
|
||||
` Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (
|
||||
Array.isArray(stats.duplicateCandidates) &&
|
||||
stats.duplicateCandidates.length
|
||||
) {
|
||||
console.log("\n🧬 Duplicate Candidates:");
|
||||
if (Array.isArray(stats.duplicateCandidates) && stats.duplicateCandidates.length > 0) {
|
||||
console.log('\n🧬 Duplicate Candidates:');
|
||||
for (const d of stats.duplicateCandidates.slice(0, 2)) {
|
||||
console.log(
|
||||
` ${d.reason}: ${d.count} files @ ${d.size.toLocaleString()} bytes`,
|
||||
);
|
||||
console.log(` ${d.reason}: ${d.count} files @ ${d.size.toLocaleString()} bytes`);
|
||||
}
|
||||
if (stats.duplicateCandidates.length > 2) {
|
||||
console.log(
|
||||
` … and ${stats.duplicateCandidates.length - 2} more groups`,
|
||||
);
|
||||
console.log(` … and ${stats.duplicateCandidates.length - 2} more groups`);
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof stats.compressibilityRatio === "number") {
|
||||
if (typeof stats.compressibilityRatio === 'number') {
|
||||
console.log(
|
||||
`\n🗜️ Compressibility ratio (sampled): ${
|
||||
(stats.compressibilityRatio * 100).toFixed(2)
|
||||
}%`,
|
||||
`\n🗜️ Compressibility ratio (sampled): ${(stats.compressibilityRatio * 100).toFixed(
|
||||
2,
|
||||
)}%`,
|
||||
);
|
||||
}
|
||||
|
||||
if (stats.git && stats.git.isRepo) {
|
||||
console.log("\n🔧 Git:");
|
||||
console.log('\n🔧 Git:');
|
||||
console.log(
|
||||
` Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`,
|
||||
);
|
||||
console.log(
|
||||
` Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`,
|
||||
);
|
||||
if (
|
||||
Array.isArray(stats.git.lfsCandidates) &&
|
||||
stats.git.lfsCandidates.length
|
||||
) {
|
||||
console.log(" LFS candidates (top 2):");
|
||||
if (Array.isArray(stats.git.lfsCandidates) && stats.git.lfsCandidates.length > 0) {
|
||||
console.log(' LFS candidates (top 2):');
|
||||
for (const f of stats.git.lfsCandidates.slice(0, 2)) {
|
||||
console.log(` ${f.path} (${f.size.toLocaleString()} bytes)`);
|
||||
}
|
||||
if (stats.git.lfsCandidates.length > 2) {
|
||||
console.log(
|
||||
` … and ${stats.git.lfsCandidates.length - 2} more`,
|
||||
);
|
||||
console.log(` … and ${stats.git.lfsCandidates.length - 2} more`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length) {
|
||||
console.log("\n📚 Largest Files (top 2):");
|
||||
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length > 0) {
|
||||
console.log('\n📚 Largest Files (top 2):');
|
||||
for (const f of stats.largestFiles.slice(0, 2)) {
|
||||
// Show LOC for text files when available; omit ext and mtime
|
||||
let locStr = "";
|
||||
let locStr = '';
|
||||
if (!f.isBinary && Array.isArray(aggregatedContent?.textFiles)) {
|
||||
const tf = aggregatedContent.textFiles.find((t) =>
|
||||
t.path === f.path
|
||||
);
|
||||
if (tf && typeof tf.lines === "number") {
|
||||
const tf = aggregatedContent.textFiles.find((t) => t.path === f.path);
|
||||
if (tf && typeof tf.lines === 'number') {
|
||||
locStr = `, LOC: ${tf.lines.toLocaleString()}`;
|
||||
}
|
||||
}
|
||||
console.log(
|
||||
` ${f.path} – ${f.sizeFormatted} (${
|
||||
f.percentOfTotal.toFixed(2)
|
||||
}%)${locStr}`,
|
||||
` ${f.path} – ${f.sizeFormatted} (${f.percentOfTotal.toFixed(2)}%)${locStr}`,
|
||||
);
|
||||
}
|
||||
if (stats.largestFiles.length > 2) {
|
||||
@@ -409,262 +354,214 @@ program
|
||||
|
||||
// Write a comprehensive markdown report next to the XML
|
||||
{
|
||||
const mdPath = outputPath.endsWith(".xml")
|
||||
? outputPath.replace(/\.xml$/i, ".stats.md")
|
||||
: outputPath + ".stats.md";
|
||||
const mdPath = outputPath.endsWith('.xml')
|
||||
? outputPath.replace(/\.xml$/i, '.stats.md')
|
||||
: outputPath + '.stats.md';
|
||||
try {
|
||||
const pct = (num, den) => (den ? ((num / den) * 100) : 0);
|
||||
const pct = (num, den) => (den ? (num / den) * 100 : 0);
|
||||
const md = [];
|
||||
md.push(`# 🧾 Flatten Stats for ${path.basename(outputPath)}`);
|
||||
md.push("");
|
||||
md.push("## 📊 Summary");
|
||||
md.push(`- Total source size: ${stats.totalSize}`);
|
||||
md.push(`- Generated XML size: ${stats.xmlSize}`);
|
||||
md.push(
|
||||
`# 🧾 Flatten Stats for ${path.basename(outputPath)}`,
|
||||
'',
|
||||
'## 📊 Summary',
|
||||
`- Total source size: ${stats.totalSize}`,
|
||||
`- Generated XML size: ${stats.xmlSize}`,
|
||||
`- Total lines of code: ${stats.totalLines.toLocaleString()}`,
|
||||
);
|
||||
md.push(`- Estimated tokens: ${stats.estimatedTokens}`);
|
||||
md.push(
|
||||
`- Estimated tokens: ${stats.estimatedTokens}`,
|
||||
`- File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors`,
|
||||
'',
|
||||
'## 📈 Size Percentiles',
|
||||
`Avg: ${Math.round(stats.avgFileSize).toLocaleString()} B, Median: ${Math.round(
|
||||
stats.medianFileSize,
|
||||
).toLocaleString()} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
|
||||
'',
|
||||
);
|
||||
md.push("");
|
||||
|
||||
// Percentiles
|
||||
md.push("## 📈 Size Percentiles");
|
||||
md.push(
|
||||
`Avg: ${
|
||||
Math.round(stats.avgFileSize).toLocaleString()
|
||||
} B, Median: ${
|
||||
Math.round(stats.medianFileSize).toLocaleString()
|
||||
} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
|
||||
);
|
||||
md.push("");
|
||||
|
||||
// Histogram
|
||||
if (Array.isArray(stats.histogram) && stats.histogram.length) {
|
||||
md.push("## 🧮 Size Histogram");
|
||||
md.push("| Bucket | Files | Bytes |");
|
||||
md.push("| --- | ---: | ---: |");
|
||||
if (Array.isArray(stats.histogram) && stats.histogram.length > 0) {
|
||||
md.push(
|
||||
'## 🧮 Size Histogram',
|
||||
'| Bucket | Files | Bytes |',
|
||||
'| --- | ---: | ---: |',
|
||||
);
|
||||
for (const b of stats.histogram) {
|
||||
md.push(
|
||||
`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`,
|
||||
);
|
||||
md.push(`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`);
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
// Top Extensions
|
||||
if (Array.isArray(stats.byExtension) && stats.byExtension.length) {
|
||||
md.push("## 📦 Top Extensions by Bytes (Top 20)");
|
||||
md.push("| Ext | Files | Bytes | % of total |");
|
||||
md.push("| --- | ---: | ---: | ---: |");
|
||||
if (Array.isArray(stats.byExtension) && stats.byExtension.length > 0) {
|
||||
md.push(
|
||||
'## 📦 Top Extensions by Bytes (Top 20)',
|
||||
'| Ext | Files | Bytes | % of total |',
|
||||
'| --- | ---: | ---: | ---: |',
|
||||
);
|
||||
for (const e of stats.byExtension.slice(0, 20)) {
|
||||
const p = pct(e.bytes, stats.totalBytes);
|
||||
md.push(
|
||||
`| ${e.ext} | ${e.count} | ${e.bytes.toLocaleString()} | ${
|
||||
p.toFixed(2)
|
||||
}% |`,
|
||||
`| ${e.ext} | ${e.count} | ${e.bytes.toLocaleString()} | ${p.toFixed(2)}% |`,
|
||||
);
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
// Top Directories
|
||||
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) {
|
||||
md.push("## 📂 Top Directories by Bytes (Top 20)");
|
||||
md.push("| Directory | Files | Bytes | % of total |");
|
||||
md.push("| --- | ---: | ---: | ---: |");
|
||||
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length > 0) {
|
||||
md.push(
|
||||
'## 📂 Top Directories by Bytes (Top 20)',
|
||||
'| Directory | Files | Bytes | % of total |',
|
||||
'| --- | ---: | ---: | ---: |',
|
||||
);
|
||||
for (const d of stats.byDirectory.slice(0, 20)) {
|
||||
const p = pct(d.bytes, stats.totalBytes);
|
||||
md.push(
|
||||
`| ${d.dir} | ${d.count} | ${d.bytes.toLocaleString()} | ${
|
||||
p.toFixed(2)
|
||||
}% |`,
|
||||
`| ${d.dir} | ${d.count} | ${d.bytes.toLocaleString()} | ${p.toFixed(2)}% |`,
|
||||
);
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
// Depth distribution
|
||||
if (
|
||||
Array.isArray(stats.depthDistribution) &&
|
||||
stats.depthDistribution.length
|
||||
) {
|
||||
md.push("## 🌳 Depth Distribution");
|
||||
md.push("| Depth | Count |");
|
||||
md.push("| ---: | ---: |");
|
||||
if (Array.isArray(stats.depthDistribution) && stats.depthDistribution.length > 0) {
|
||||
md.push('## 🌳 Depth Distribution', '| Depth | Count |', '| ---: | ---: |');
|
||||
for (const d of stats.depthDistribution) {
|
||||
md.push(`| ${d.depth} | ${d.count} |`);
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
// Longest paths
|
||||
if (
|
||||
Array.isArray(stats.longestPaths) && stats.longestPaths.length
|
||||
) {
|
||||
md.push("## 🧵 Longest Paths (Top 25)");
|
||||
md.push("| Path | Length | Bytes |");
|
||||
md.push("| --- | ---: | ---: |");
|
||||
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length > 0) {
|
||||
md.push(
|
||||
'## 🧵 Longest Paths (Top 25)',
|
||||
'| Path | Length | Bytes |',
|
||||
'| --- | ---: | ---: |',
|
||||
);
|
||||
for (const pth of stats.longestPaths) {
|
||||
md.push(
|
||||
`| ${pth.path} | ${pth.length} | ${pth.size.toLocaleString()} |`,
|
||||
);
|
||||
md.push(`| ${pth.path} | ${pth.length} | ${pth.size.toLocaleString()} |`);
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
// Temporal
|
||||
if (stats.temporal) {
|
||||
md.push("## ⏱️ Temporal");
|
||||
md.push('## ⏱️ Temporal');
|
||||
if (stats.temporal.oldest) {
|
||||
md.push(
|
||||
`- Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`,
|
||||
);
|
||||
md.push(`- Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`);
|
||||
}
|
||||
if (stats.temporal.newest) {
|
||||
md.push(
|
||||
`- Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`,
|
||||
);
|
||||
md.push(`- Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`);
|
||||
}
|
||||
if (Array.isArray(stats.temporal.ageBuckets)) {
|
||||
md.push("");
|
||||
md.push("| Age | Files | Bytes |");
|
||||
md.push("| --- | ---: | ---: |");
|
||||
md.push('', '| Age | Files | Bytes |', '| --- | ---: | ---: |');
|
||||
for (const b of stats.temporal.ageBuckets) {
|
||||
md.push(
|
||||
`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`,
|
||||
);
|
||||
md.push(`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`);
|
||||
}
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
// Quality signals
|
||||
if (stats.quality) {
|
||||
md.push("## ✅ Quality Signals");
|
||||
md.push(`- Zero-byte files: ${stats.quality.zeroByteFiles}`);
|
||||
md.push(`- Empty text files: ${stats.quality.emptyTextFiles}`);
|
||||
md.push(`- Hidden files: ${stats.quality.hiddenFiles}`);
|
||||
md.push(`- Symlinks: ${stats.quality.symlinks}`);
|
||||
md.push(
|
||||
`- Large files (>= ${
|
||||
(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)
|
||||
} MB): ${stats.quality.largeFilesCount}`,
|
||||
);
|
||||
md.push(
|
||||
'## ✅ Quality Signals',
|
||||
`- Zero-byte files: ${stats.quality.zeroByteFiles}`,
|
||||
`- Empty text files: ${stats.quality.emptyTextFiles}`,
|
||||
`- Hidden files: ${stats.quality.hiddenFiles}`,
|
||||
`- Symlinks: ${stats.quality.symlinks}`,
|
||||
`- Large files (>= ${(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)} MB): ${stats.quality.largeFilesCount}`,
|
||||
`- Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`,
|
||||
'',
|
||||
);
|
||||
md.push("");
|
||||
}
|
||||
|
||||
// Duplicates
|
||||
if (
|
||||
Array.isArray(stats.duplicateCandidates) &&
|
||||
stats.duplicateCandidates.length
|
||||
) {
|
||||
md.push("## 🧬 Duplicate Candidates");
|
||||
md.push("| Reason | Files | Size (bytes) |");
|
||||
md.push("| --- | ---: | ---: |");
|
||||
if (Array.isArray(stats.duplicateCandidates) && stats.duplicateCandidates.length > 0) {
|
||||
md.push(
|
||||
'## 🧬 Duplicate Candidates',
|
||||
'| Reason | Files | Size (bytes) |',
|
||||
'| --- | ---: | ---: |',
|
||||
);
|
||||
for (const d of stats.duplicateCandidates) {
|
||||
md.push(
|
||||
`| ${d.reason} | ${d.count} | ${d.size.toLocaleString()} |`,
|
||||
);
|
||||
md.push(`| ${d.reason} | ${d.count} | ${d.size.toLocaleString()} |`);
|
||||
}
|
||||
md.push("");
|
||||
// Detailed listing of duplicate file names and locations
|
||||
md.push("### 🧬 Duplicate Groups Details");
|
||||
md.push('', '### 🧬 Duplicate Groups Details');
|
||||
let dupIndex = 1;
|
||||
for (const d of stats.duplicateCandidates) {
|
||||
md.push(
|
||||
`#### Group ${dupIndex}: ${d.count} files @ ${d.size.toLocaleString()} bytes (${d.reason})`,
|
||||
);
|
||||
if (Array.isArray(d.files) && d.files.length) {
|
||||
if (Array.isArray(d.files) && d.files.length > 0) {
|
||||
for (const fp of d.files) {
|
||||
md.push(`- ${fp}`);
|
||||
}
|
||||
} else {
|
||||
md.push("- (file list unavailable)");
|
||||
md.push('- (file list unavailable)');
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
dupIndex++;
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
// Compressibility
|
||||
if (typeof stats.compressibilityRatio === "number") {
|
||||
md.push("## 🗜️ Compressibility");
|
||||
if (typeof stats.compressibilityRatio === 'number') {
|
||||
md.push(
|
||||
`Sampled compressibility ratio: ${
|
||||
(stats.compressibilityRatio * 100).toFixed(2)
|
||||
}%`,
|
||||
'## 🗜️ Compressibility',
|
||||
`Sampled compressibility ratio: ${(stats.compressibilityRatio * 100).toFixed(2)}%`,
|
||||
'',
|
||||
);
|
||||
md.push("");
|
||||
}
|
||||
|
||||
// Git
|
||||
if (stats.git && stats.git.isRepo) {
|
||||
md.push("## 🔧 Git");
|
||||
md.push(
|
||||
'## 🔧 Git',
|
||||
`- Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`,
|
||||
);
|
||||
md.push(
|
||||
`- Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`,
|
||||
);
|
||||
if (
|
||||
Array.isArray(stats.git.lfsCandidates) &&
|
||||
stats.git.lfsCandidates.length
|
||||
) {
|
||||
md.push("");
|
||||
md.push("### 📦 LFS Candidates (Top 20)");
|
||||
md.push("| Path | Bytes |");
|
||||
md.push("| --- | ---: |");
|
||||
if (Array.isArray(stats.git.lfsCandidates) && stats.git.lfsCandidates.length > 0) {
|
||||
md.push('', '### 📦 LFS Candidates (Top 20)', '| Path | Bytes |', '| --- | ---: |');
|
||||
for (const f of stats.git.lfsCandidates.slice(0, 20)) {
|
||||
md.push(`| ${f.path} | ${f.size.toLocaleString()} |`);
|
||||
}
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
// Largest Files
|
||||
if (
|
||||
Array.isArray(stats.largestFiles) && stats.largestFiles.length
|
||||
) {
|
||||
md.push("## 📚 Largest Files (Top 50)");
|
||||
md.push("| Path | Size | % of total | LOC |");
|
||||
md.push("| --- | ---: | ---: | ---: |");
|
||||
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length > 0) {
|
||||
md.push(
|
||||
'## 📚 Largest Files (Top 50)',
|
||||
'| Path | Size | % of total | LOC |',
|
||||
'| --- | ---: | ---: | ---: |',
|
||||
);
|
||||
for (const f of stats.largestFiles) {
|
||||
let loc = "";
|
||||
if (
|
||||
!f.isBinary && Array.isArray(aggregatedContent?.textFiles)
|
||||
) {
|
||||
const tf = aggregatedContent.textFiles.find((t) =>
|
||||
t.path === f.path
|
||||
);
|
||||
if (tf && typeof tf.lines === "number") {
|
||||
let loc = '';
|
||||
if (!f.isBinary && Array.isArray(aggregatedContent?.textFiles)) {
|
||||
const tf = aggregatedContent.textFiles.find((t) => t.path === f.path);
|
||||
if (tf && typeof tf.lines === 'number') {
|
||||
loc = tf.lines.toLocaleString();
|
||||
}
|
||||
}
|
||||
md.push(
|
||||
`| ${f.path} | ${f.sizeFormatted} | ${
|
||||
f.percentOfTotal.toFixed(2)
|
||||
}% | ${loc} |`,
|
||||
`| ${f.path} | ${f.sizeFormatted} | ${f.percentOfTotal.toFixed(2)}% | ${loc} |`,
|
||||
);
|
||||
}
|
||||
md.push("");
|
||||
md.push('');
|
||||
}
|
||||
|
||||
await fs.writeFile(mdPath, md.join("\n"));
|
||||
await fs.writeFile(mdPath, md.join('\n'));
|
||||
console.log(`\n🧾 Detailed stats report written to: ${mdPath}`);
|
||||
} catch (e) {
|
||||
console.warn(`⚠️ Failed to write stats markdown: ${e.message}`);
|
||||
} catch (error) {
|
||||
console.warn(`⚠️ Failed to write stats markdown: ${error.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("❌ Critical error:", error.message);
|
||||
console.error("An unexpected error occurred.");
|
||||
console.error('❌ Critical error:', error.message);
|
||||
console.error('An unexpected error occurred.');
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user