chore: add code formatting config and pre-commit hooks (#450)

This commit is contained in:
manjaroblack
2025-08-16 19:08:39 -05:00
committed by GitHub
parent 51284d6ecf
commit ed539432fb
130 changed files with 11886 additions and 10939 deletions

View File

@@ -1,7 +1,7 @@
const fs = require("fs-extra");
const path = require("node:path");
const os = require("node:os");
const { isBinaryFile } = require("./binary.js");
const fs = require('fs-extra');
const path = require('node:path');
const os = require('node:os');
const { isBinaryFile } = require('./binary.js');
/**
* Aggregate file contents with bounded concurrency.
@@ -22,7 +22,7 @@ async function aggregateFileContents(files, rootDir, spinner = null) {
// Automatic concurrency selection based on CPU count and workload size.
// - Base on 2x logical CPUs, clamped to [2, 64]
// - For very small workloads, avoid excessive parallelism
const cpuCount = (os.cpus && Array.isArray(os.cpus()) ? os.cpus().length : (os.cpus?.length || 4));
const cpuCount = os.cpus && Array.isArray(os.cpus()) ? os.cpus().length : os.cpus?.length || 4;
let concurrency = Math.min(64, Math.max(2, (Number(cpuCount) || 4) * 2));
if (files.length > 0 && files.length < concurrency) {
concurrency = Math.max(1, Math.min(concurrency, Math.ceil(files.length / 2)));
@@ -37,16 +37,16 @@ async function aggregateFileContents(files, rootDir, spinner = null) {
const binary = await isBinaryFile(filePath);
if (binary) {
const size = (await fs.stat(filePath)).size;
const { size } = await fs.stat(filePath);
results.binaryFiles.push({ path: relativePath, absolutePath: filePath, size });
} else {
const content = await fs.readFile(filePath, "utf8");
const content = await fs.readFile(filePath, 'utf8');
results.textFiles.push({
path: relativePath,
absolutePath: filePath,
content,
size: content.length,
lines: content.split("\n").length,
lines: content.split('\n').length,
});
}
} catch (error) {
@@ -63,8 +63,8 @@ async function aggregateFileContents(files, rootDir, spinner = null) {
}
}
for (let i = 0; i < files.length; i += concurrency) {
const slice = files.slice(i, i + concurrency);
for (let index = 0; index < files.length; index += concurrency) {
const slice = files.slice(index, index + concurrency);
await Promise.all(slice.map(processOne));
}

View File

@@ -1,6 +1,6 @@
const fsp = require("node:fs/promises");
const path = require("node:path");
const { Buffer } = require("node:buffer");
const fsp = require('node:fs/promises');
const path = require('node:path');
const { Buffer } = require('node:buffer');
/**
* Efficiently determine if a file is binary without reading the whole file.
@@ -13,25 +13,54 @@ async function isBinaryFile(filePath) {
try {
const stats = await fsp.stat(filePath);
if (stats.isDirectory()) {
throw new Error("EISDIR: illegal operation on a directory");
throw new Error('EISDIR: illegal operation on a directory');
}
const binaryExtensions = new Set([
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".ico", ".svg",
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
".zip", ".tar", ".gz", ".rar", ".7z",
".exe", ".dll", ".so", ".dylib",
".mp3", ".mp4", ".avi", ".mov", ".wav",
".ttf", ".otf", ".woff", ".woff2",
".bin", ".dat", ".db", ".sqlite",
'.jpg',
'.jpeg',
'.png',
'.gif',
'.bmp',
'.ico',
'.svg',
'.pdf',
'.doc',
'.docx',
'.xls',
'.xlsx',
'.ppt',
'.pptx',
'.zip',
'.tar',
'.gz',
'.rar',
'.7z',
'.exe',
'.dll',
'.so',
'.dylib',
'.mp3',
'.mp4',
'.avi',
'.mov',
'.wav',
'.ttf',
'.otf',
'.woff',
'.woff2',
'.bin',
'.dat',
'.db',
'.sqlite',
]);
const ext = path.extname(filePath).toLowerCase();
if (binaryExtensions.has(ext)) return true;
const extension = path.extname(filePath).toLowerCase();
if (binaryExtensions.has(extension)) return true;
if (stats.size === 0) return false;
const sampleSize = Math.min(4096, stats.size);
const fd = await fsp.open(filePath, "r");
const fd = await fsp.open(filePath, 'r');
try {
const buffer = Buffer.allocUnsafe(sampleSize);
const { bytesRead } = await fd.read(buffer, 0, sampleSize, 0);
@@ -41,9 +70,7 @@ async function isBinaryFile(filePath) {
await fd.close();
}
} catch (error) {
console.warn(
`Warning: Could not determine if file is binary: ${filePath} - ${error.message}`,
);
console.warn(`Warning: Could not determine if file is binary: ${filePath} - ${error.message}`);
return false;
}
}

View File

@@ -1,18 +1,21 @@
const path = require("node:path");
const { execFile } = require("node:child_process");
const { promisify } = require("node:util");
const { glob } = require("glob");
const { loadIgnore } = require("./ignoreRules.js");
const path = require('node:path');
const { execFile } = require('node:child_process');
const { promisify } = require('node:util');
const { glob } = require('glob');
const { loadIgnore } = require('./ignoreRules.js');
const pExecFile = promisify(execFile);
async function isGitRepo(rootDir) {
try {
const { stdout } = await pExecFile("git", [
"rev-parse",
"--is-inside-work-tree",
], { cwd: rootDir });
return String(stdout || "").toString().trim() === "true";
const { stdout } = await pExecFile('git', ['rev-parse', '--is-inside-work-tree'], {
cwd: rootDir,
});
return (
String(stdout || '')
.toString()
.trim() === 'true'
);
} catch {
return false;
}
@@ -20,12 +23,10 @@ async function isGitRepo(rootDir) {
async function gitListFiles(rootDir) {
try {
const { stdout } = await pExecFile("git", [
"ls-files",
"-co",
"--exclude-standard",
], { cwd: rootDir });
return String(stdout || "")
const { stdout } = await pExecFile('git', ['ls-files', '-co', '--exclude-standard'], {
cwd: rootDir,
});
return String(stdout || '')
.split(/\r?\n/)
.map((s) => s.trim())
.filter(Boolean);
@@ -48,14 +49,14 @@ async function discoverFiles(rootDir, options = {}) {
const { filter } = await loadIgnore(rootDir);
// Try git first
if (preferGit && await isGitRepo(rootDir)) {
if (preferGit && (await isGitRepo(rootDir))) {
const relFiles = await gitListFiles(rootDir);
const filteredRel = relFiles.filter((p) => filter(p));
return filteredRel.map((p) => path.resolve(rootDir, p));
}
// Glob fallback
const globbed = await glob("**/*", {
const globbed = await glob('**/*', {
cwd: rootDir,
nodir: true,
dot: true,

View File

@@ -1,8 +1,8 @@
const path = require("node:path");
const discovery = require("./discovery.js");
const ignoreRules = require("./ignoreRules.js");
const { isBinaryFile } = require("./binary.js");
const { aggregateFileContents } = require("./aggregate.js");
const path = require('node:path');
const discovery = require('./discovery.js');
const ignoreRules = require('./ignoreRules.js');
const { isBinaryFile } = require('./binary.js');
const { aggregateFileContents } = require('./aggregate.js');
// Backward-compatible signature; delegate to central loader
async function parseGitignore(gitignorePath) {
@@ -14,7 +14,7 @@ async function discoverFiles(rootDir) {
// Delegate to discovery module which respects .gitignore and defaults
return await discovery.discoverFiles(rootDir, { preferGit: true });
} catch (error) {
console.error("Error discovering files:", error.message);
console.error('Error discovering files:', error.message);
return [];
}
}

View File

@@ -1,147 +1,147 @@
const fs = require("fs-extra");
const path = require("node:path");
const ignore = require("ignore");
const fs = require('fs-extra');
const path = require('node:path');
const ignore = require('ignore');
// Central default ignore patterns for discovery and filtering.
// These complement .gitignore and are applied regardless of VCS presence.
const DEFAULT_PATTERNS = [
// Project/VCS
"**/.bmad-core/**",
"**/.git/**",
"**/.svn/**",
"**/.hg/**",
"**/.bzr/**",
'**/.bmad-core/**',
'**/.git/**',
'**/.svn/**',
'**/.hg/**',
'**/.bzr/**',
// Package/build outputs
"**/node_modules/**",
"**/bower_components/**",
"**/vendor/**",
"**/packages/**",
"**/build/**",
"**/dist/**",
"**/out/**",
"**/target/**",
"**/bin/**",
"**/obj/**",
"**/release/**",
"**/debug/**",
'**/node_modules/**',
'**/bower_components/**',
'**/vendor/**',
'**/packages/**',
'**/build/**',
'**/dist/**',
'**/out/**',
'**/target/**',
'**/bin/**',
'**/obj/**',
'**/release/**',
'**/debug/**',
// Environments
"**/.venv/**",
"**/venv/**",
"**/.virtualenv/**",
"**/virtualenv/**",
"**/env/**",
'**/.venv/**',
'**/venv/**',
'**/.virtualenv/**',
'**/virtualenv/**',
'**/env/**',
// Logs & coverage
"**/*.log",
"**/npm-debug.log*",
"**/yarn-debug.log*",
"**/yarn-error.log*",
"**/lerna-debug.log*",
"**/coverage/**",
"**/.nyc_output/**",
"**/.coverage/**",
"**/test-results/**",
'**/*.log',
'**/npm-debug.log*',
'**/yarn-debug.log*',
'**/yarn-error.log*',
'**/lerna-debug.log*',
'**/coverage/**',
'**/.nyc_output/**',
'**/.coverage/**',
'**/test-results/**',
// Caches & temp
"**/.cache/**",
"**/.tmp/**",
"**/.temp/**",
"**/tmp/**",
"**/temp/**",
"**/.sass-cache/**",
'**/.cache/**',
'**/.tmp/**',
'**/.temp/**',
'**/tmp/**',
'**/temp/**',
'**/.sass-cache/**',
// IDE/editor
"**/.vscode/**",
"**/.idea/**",
"**/*.swp",
"**/*.swo",
"**/*~",
"**/.project",
"**/.classpath",
"**/.settings/**",
"**/*.sublime-project",
"**/*.sublime-workspace",
'**/.vscode/**',
'**/.idea/**',
'**/*.swp',
'**/*.swo',
'**/*~',
'**/.project',
'**/.classpath',
'**/.settings/**',
'**/*.sublime-project',
'**/*.sublime-workspace',
// Lockfiles
"**/package-lock.json",
"**/yarn.lock",
"**/pnpm-lock.yaml",
"**/composer.lock",
"**/Pipfile.lock",
'**/package-lock.json',
'**/yarn.lock',
'**/pnpm-lock.yaml',
'**/composer.lock',
'**/Pipfile.lock',
// Python/Java/compiled artifacts
"**/*.pyc",
"**/*.pyo",
"**/*.pyd",
"**/__pycache__/**",
"**/*.class",
"**/*.jar",
"**/*.war",
"**/*.ear",
"**/*.o",
"**/*.so",
"**/*.dll",
"**/*.exe",
'**/*.pyc',
'**/*.pyo',
'**/*.pyd',
'**/__pycache__/**',
'**/*.class',
'**/*.jar',
'**/*.war',
'**/*.ear',
'**/*.o',
'**/*.so',
'**/*.dll',
'**/*.exe',
// System junk
"**/lib64/**",
"**/.venv/lib64/**",
"**/venv/lib64/**",
"**/_site/**",
"**/.jekyll-cache/**",
"**/.jekyll-metadata",
"**/.DS_Store",
"**/.DS_Store?",
"**/._*",
"**/.Spotlight-V100/**",
"**/.Trashes/**",
"**/ehthumbs.db",
"**/Thumbs.db",
"**/desktop.ini",
'**/lib64/**',
'**/.venv/lib64/**',
'**/venv/lib64/**',
'**/_site/**',
'**/.jekyll-cache/**',
'**/.jekyll-metadata',
'**/.DS_Store',
'**/.DS_Store?',
'**/._*',
'**/.Spotlight-V100/**',
'**/.Trashes/**',
'**/ehthumbs.db',
'**/Thumbs.db',
'**/desktop.ini',
// XML outputs
"**/flattened-codebase.xml",
"**/repomix-output.xml",
'**/flattened-codebase.xml',
'**/repomix-output.xml',
// Images, media, fonts, archives, docs, dylibs
"**/*.jpg",
"**/*.jpeg",
"**/*.png",
"**/*.gif",
"**/*.bmp",
"**/*.ico",
"**/*.svg",
"**/*.pdf",
"**/*.doc",
"**/*.docx",
"**/*.xls",
"**/*.xlsx",
"**/*.ppt",
"**/*.pptx",
"**/*.zip",
"**/*.tar",
"**/*.gz",
"**/*.rar",
"**/*.7z",
"**/*.dylib",
"**/*.mp3",
"**/*.mp4",
"**/*.avi",
"**/*.mov",
"**/*.wav",
"**/*.ttf",
"**/*.otf",
"**/*.woff",
"**/*.woff2",
'**/*.jpg',
'**/*.jpeg',
'**/*.png',
'**/*.gif',
'**/*.bmp',
'**/*.ico',
'**/*.svg',
'**/*.pdf',
'**/*.doc',
'**/*.docx',
'**/*.xls',
'**/*.xlsx',
'**/*.ppt',
'**/*.pptx',
'**/*.zip',
'**/*.tar',
'**/*.gz',
'**/*.rar',
'**/*.7z',
'**/*.dylib',
'**/*.mp3',
'**/*.mp4',
'**/*.avi',
'**/*.mov',
'**/*.wav',
'**/*.ttf',
'**/*.otf',
'**/*.woff',
'**/*.woff2',
// Env files
"**/.env",
"**/.env.*",
"**/*.env",
'**/.env',
'**/.env.*',
'**/*.env',
// Misc
"**/junit.xml",
'**/junit.xml',
];
async function readIgnoreFile(filePath) {
try {
if (!await fs.pathExists(filePath)) return [];
const content = await fs.readFile(filePath, "utf8");
if (!(await fs.pathExists(filePath))) return [];
const content = await fs.readFile(filePath, 'utf8');
return content
.split("\n")
.split('\n')
.map((l) => l.trim())
.filter((l) => l && !l.startsWith("#"));
} catch (err) {
.filter((l) => l && !l.startsWith('#'));
} catch {
return [];
}
}
@@ -153,18 +153,18 @@ async function parseGitignore(gitignorePath) {
async function loadIgnore(rootDir, extraPatterns = []) {
const ig = ignore();
const gitignorePath = path.join(rootDir, ".gitignore");
const gitignorePath = path.join(rootDir, '.gitignore');
const patterns = [
...await readIgnoreFile(gitignorePath),
...(await readIgnoreFile(gitignorePath)),
...DEFAULT_PATTERNS,
...extraPatterns,
];
// De-duplicate
const unique = Array.from(new Set(patterns.map((p) => String(p))));
const unique = [...new Set(patterns.map(String))];
ig.add(unique);
// Include-only filter: return true if path should be included
const filter = (relativePath) => !ig.ignores(relativePath.replace(/\\/g, "/"));
const filter = (relativePath) => !ig.ignores(relativePath.replaceAll('\\', '/'));
return { ig, filter, patterns: unique };
}

View File

@@ -1,20 +1,14 @@
#!/usr/bin/env node
const { Command } = require("commander");
const fs = require("fs-extra");
const path = require("node:path");
const process = require("node:process");
const { Command } = require('commander');
const fs = require('fs-extra');
const path = require('node:path');
const process = require('node:process');
// Modularized components
const { findProjectRoot } = require("./projectRoot.js");
const { promptYesNo, promptPath } = require("./prompts.js");
const {
discoverFiles,
filterFiles,
aggregateFileContents,
} = require("./files.js");
const { generateXMLOutput } = require("./xml.js");
const { calculateStatistics } = require("./stats.js");
const { findProjectRoot } = require('./projectRoot.js');
const { promptYesNo, promptPath } = require('./prompts.js');
const { discoverFiles, filterFiles, aggregateFileContents } = require('./files.js');
const { generateXMLOutput } = require('./xml.js');
const { calculateStatistics } = require('./stats.js');
/**
* Recursively discover all files in a directory
@@ -73,30 +67,30 @@ const { calculateStatistics } = require("./stats.js");
const program = new Command();
program
.name("bmad-flatten")
.description("BMad-Method codebase flattener tool")
.version("1.0.0")
.option("-i, --input <path>", "Input directory to flatten", process.cwd())
.option("-o, --output <path>", "Output file path", "flattened-codebase.xml")
.name('bmad-flatten')
.description('BMad-Method codebase flattener tool')
.version('1.0.0')
.option('-i, --input <path>', 'Input directory to flatten', process.cwd())
.option('-o, --output <path>', 'Output file path', 'flattened-codebase.xml')
.action(async (options) => {
let inputDir = path.resolve(options.input);
let outputPath = path.resolve(options.output);
// Detect if user explicitly provided -i/--input or -o/--output
const argv = process.argv.slice(2);
const userSpecifiedInput = argv.some((a) =>
a === "-i" || a === "--input" || a.startsWith("--input=")
const userSpecifiedInput = argv.some(
(a) => a === '-i' || a === '--input' || a.startsWith('--input='),
);
const userSpecifiedOutput = argv.some((a) =>
a === "-o" || a === "--output" || a.startsWith("--output=")
const userSpecifiedOutput = argv.some(
(a) => a === '-o' || a === '--output' || a.startsWith('--output='),
);
const noPathArgs = !userSpecifiedInput && !userSpecifiedOutput;
const noPathArguments = !userSpecifiedInput && !userSpecifiedOutput;
if (noPathArgs) {
if (noPathArguments) {
const detectedRoot = await findProjectRoot(process.cwd());
const suggestedOutput = detectedRoot
? path.join(detectedRoot, "flattened-codebase.xml")
: path.resolve("flattened-codebase.xml");
? path.join(detectedRoot, 'flattened-codebase.xml')
: path.resolve('flattened-codebase.xml');
if (detectedRoot) {
const useDefaults = await promptYesNo(
@@ -107,26 +101,25 @@ program
inputDir = detectedRoot;
outputPath = suggestedOutput;
} else {
inputDir = await promptPath(
"Enter input directory path",
process.cwd(),
);
inputDir = await promptPath('Enter input directory path', process.cwd());
outputPath = await promptPath(
"Enter output file path",
path.join(inputDir, "flattened-codebase.xml"),
'Enter output file path',
path.join(inputDir, 'flattened-codebase.xml'),
);
}
} else {
console.log("Could not auto-detect a project root.");
inputDir = await promptPath(
"Enter input directory path",
process.cwd(),
);
console.log('Could not auto-detect a project root.');
inputDir = await promptPath('Enter input directory path', process.cwd());
outputPath = await promptPath(
"Enter output file path",
path.join(inputDir, "flattened-codebase.xml"),
'Enter output file path',
path.join(inputDir, 'flattened-codebase.xml'),
);
}
} else {
console.error(
'Could not auto-detect a project root and no arguments were provided. Please specify -i/--input and -o/--output.',
);
process.exit(1);
}
// Ensure output directory exists
@@ -134,24 +127,23 @@ program
try {
// Verify input directory exists
if (!await fs.pathExists(inputDir)) {
if (!(await fs.pathExists(inputDir))) {
console.error(`❌ Error: Input directory does not exist: ${inputDir}`);
process.exit(1);
}
// Import ora dynamically
const { default: ora } = await import("ora");
const { default: ora } = await import('ora');
// Start file discovery with spinner
const discoverySpinner = ora("🔍 Discovering files...").start();
const discoverySpinner = ora('🔍 Discovering files...').start();
const files = await discoverFiles(inputDir);
const filteredFiles = await filterFiles(files, inputDir);
discoverySpinner.succeed(
`📁 Found ${filteredFiles.length} files to include`,
);
discoverySpinner.succeed(`📁 Found ${filteredFiles.length} files to include`);
// Process files with progress tracking
const processingSpinner = ora("📄 Processing files...").start();
console.log('Reading file contents');
const processingSpinner = ora('📄 Processing files...').start();
const aggregatedContent = await aggregateFileContents(
filteredFiles,
inputDir,
@@ -165,31 +157,23 @@ program
}
// Generate XML output using streaming
const xmlSpinner = ora("🔧 Generating XML output...").start();
const xmlSpinner = ora('🔧 Generating XML output...').start();
await generateXMLOutput(aggregatedContent, outputPath);
xmlSpinner.succeed("📝 XML generation completed");
xmlSpinner.succeed('📝 XML generation completed');
// Calculate and display statistics
const outputStats = await fs.stat(outputPath);
const stats = await calculateStatistics(
aggregatedContent,
outputStats.size,
inputDir,
);
const stats = await calculateStatistics(aggregatedContent, outputStats.size, inputDir);
// Display completion summary
console.log("\n📊 Completion Summary:");
console.log('\n📊 Completion Summary:');
console.log(
`✅ Successfully processed ${filteredFiles.length} files into ${
path.basename(outputPath)
}`,
`✅ Successfully processed ${filteredFiles.length} files into ${path.basename(outputPath)}`,
);
console.log(`📁 Output file: ${outputPath}`);
console.log(`📏 Total source size: ${stats.totalSize}`);
console.log(`📄 Generated XML size: ${stats.xmlSize}`);
console.log(
`📝 Total lines of code: ${stats.totalLines.toLocaleString()}`,
);
console.log(`📝 Total lines of code: ${stats.totalLines.toLocaleString()}`);
console.log(`🔢 Estimated tokens: ${stats.estimatedTokens}`);
console.log(
`📊 File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors\n`,
@@ -197,92 +181,75 @@ program
// Ask user if they want detailed stats + markdown report
const generateDetailed = await promptYesNo(
"Generate detailed stats (console + markdown) now?",
'Generate detailed stats (console + markdown) now?',
true,
);
if (generateDetailed) {
// Additional detailed stats
console.log("\n📈 Size Percentiles:");
console.log('\n📈 Size Percentiles:');
console.log(
` Avg: ${
Math.round(stats.avgFileSize).toLocaleString()
} B, Median: ${
Math.round(stats.medianFileSize).toLocaleString()
} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
` Avg: ${Math.round(stats.avgFileSize).toLocaleString()} B, Median: ${Math.round(
stats.medianFileSize,
).toLocaleString()} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
);
if (Array.isArray(stats.histogram) && stats.histogram.length) {
console.log("\n🧮 Size Histogram:");
if (Array.isArray(stats.histogram) && stats.histogram.length > 0) {
console.log('\n🧮 Size Histogram:');
for (const b of stats.histogram.slice(0, 2)) {
console.log(
` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`,
);
console.log(` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`);
}
if (stats.histogram.length > 2) {
console.log(` … and ${stats.histogram.length - 2} more buckets`);
}
}
if (Array.isArray(stats.byExtension) && stats.byExtension.length) {
if (Array.isArray(stats.byExtension) && stats.byExtension.length > 0) {
const topExt = stats.byExtension.slice(0, 2);
console.log("\n📦 Top Extensions:");
console.log('\n📦 Top Extensions:');
for (const e of topExt) {
const pct = stats.totalBytes
? ((e.bytes / stats.totalBytes) * 100)
: 0;
const pct = stats.totalBytes ? (e.bytes / stats.totalBytes) * 100 : 0;
console.log(
` ${e.ext}: ${e.count} files, ${e.bytes.toLocaleString()} bytes (${
pct.toFixed(2)
}%)`,
` ${e.ext}: ${e.count} files, ${e.bytes.toLocaleString()} bytes (${pct.toFixed(
2,
)}%)`,
);
}
if (stats.byExtension.length > 2) {
console.log(
` … and ${stats.byExtension.length - 2} more extensions`,
);
console.log(` … and ${stats.byExtension.length - 2} more extensions`);
}
}
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) {
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length > 0) {
const topDir = stats.byDirectory.slice(0, 2);
console.log("\n📂 Top Directories:");
console.log('\n📂 Top Directories:');
for (const d of topDir) {
const pct = stats.totalBytes
? ((d.bytes / stats.totalBytes) * 100)
: 0;
const pct = stats.totalBytes ? (d.bytes / stats.totalBytes) * 100 : 0;
console.log(
` ${d.dir}: ${d.count} files, ${d.bytes.toLocaleString()} bytes (${
pct.toFixed(2)
}%)`,
` ${d.dir}: ${d.count} files, ${d.bytes.toLocaleString()} bytes (${pct.toFixed(
2,
)}%)`,
);
}
if (stats.byDirectory.length > 2) {
console.log(
` … and ${stats.byDirectory.length - 2} more directories`,
);
console.log(` … and ${stats.byDirectory.length - 2} more directories`);
}
}
if (
Array.isArray(stats.depthDistribution) &&
stats.depthDistribution.length
) {
console.log("\n🌳 Depth Distribution:");
if (Array.isArray(stats.depthDistribution) && stats.depthDistribution.length > 0) {
console.log('\n🌳 Depth Distribution:');
const dd = stats.depthDistribution.slice(0, 2);
let line = " " + dd.map((d) => `${d.depth}:${d.count}`).join(" ");
let line = ' ' + dd.map((d) => `${d.depth}:${d.count}`).join(' ');
if (stats.depthDistribution.length > 2) {
line += ` … +${stats.depthDistribution.length - 2} more`;
}
console.log(line);
}
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length) {
console.log("\n🧵 Longest Paths:");
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length > 0) {
console.log('\n🧵 Longest Paths:');
for (const p of stats.longestPaths.slice(0, 2)) {
console.log(
` ${p.path} (${p.length} chars, ${p.size.toLocaleString()} bytes)`,
);
console.log(` ${p.path} (${p.length} chars, ${p.size.toLocaleString()} bytes)`);
}
if (stats.longestPaths.length > 2) {
console.log(` … and ${stats.longestPaths.length - 2} more paths`);
@@ -290,7 +257,7 @@ program
}
if (stats.temporal) {
console.log("\n⏱ Temporal:");
console.log('\n⏱ Temporal:');
if (stats.temporal.oldest) {
console.log(
` Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`,
@@ -302,104 +269,82 @@ program
);
}
if (Array.isArray(stats.temporal.ageBuckets)) {
console.log(" Age buckets:");
console.log(' Age buckets:');
for (const b of stats.temporal.ageBuckets.slice(0, 2)) {
console.log(
` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`,
);
console.log(` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`);
}
if (stats.temporal.ageBuckets.length > 2) {
console.log(
` … and ${
stats.temporal.ageBuckets.length - 2
} more buckets`,
);
console.log(` … and ${stats.temporal.ageBuckets.length - 2} more buckets`);
}
}
}
if (stats.quality) {
console.log("\n✅ Quality Signals:");
console.log('\n✅ Quality Signals:');
console.log(` Zero-byte files: ${stats.quality.zeroByteFiles}`);
console.log(` Empty text files: ${stats.quality.emptyTextFiles}`);
console.log(` Hidden files: ${stats.quality.hiddenFiles}`);
console.log(` Symlinks: ${stats.quality.symlinks}`);
console.log(
` Large files (>= ${
(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)
} MB): ${stats.quality.largeFilesCount}`,
` Large files (>= ${(stats.quality.largeThreshold / (1024 * 1024)).toFixed(
0,
)} MB): ${stats.quality.largeFilesCount}`,
);
console.log(
` Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`,
);
}
if (
Array.isArray(stats.duplicateCandidates) &&
stats.duplicateCandidates.length
) {
console.log("\n🧬 Duplicate Candidates:");
if (Array.isArray(stats.duplicateCandidates) && stats.duplicateCandidates.length > 0) {
console.log('\n🧬 Duplicate Candidates:');
for (const d of stats.duplicateCandidates.slice(0, 2)) {
console.log(
` ${d.reason}: ${d.count} files @ ${d.size.toLocaleString()} bytes`,
);
console.log(` ${d.reason}: ${d.count} files @ ${d.size.toLocaleString()} bytes`);
}
if (stats.duplicateCandidates.length > 2) {
console.log(
` … and ${stats.duplicateCandidates.length - 2} more groups`,
);
console.log(` … and ${stats.duplicateCandidates.length - 2} more groups`);
}
}
if (typeof stats.compressibilityRatio === "number") {
if (typeof stats.compressibilityRatio === 'number') {
console.log(
`\n🗜️ Compressibility ratio (sampled): ${
(stats.compressibilityRatio * 100).toFixed(2)
}%`,
`\n🗜️ Compressibility ratio (sampled): ${(stats.compressibilityRatio * 100).toFixed(
2,
)}%`,
);
}
if (stats.git && stats.git.isRepo) {
console.log("\n🔧 Git:");
console.log('\n🔧 Git:');
console.log(
` Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`,
);
console.log(
` Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`,
);
if (
Array.isArray(stats.git.lfsCandidates) &&
stats.git.lfsCandidates.length
) {
console.log(" LFS candidates (top 2):");
if (Array.isArray(stats.git.lfsCandidates) && stats.git.lfsCandidates.length > 0) {
console.log(' LFS candidates (top 2):');
for (const f of stats.git.lfsCandidates.slice(0, 2)) {
console.log(` ${f.path} (${f.size.toLocaleString()} bytes)`);
}
if (stats.git.lfsCandidates.length > 2) {
console.log(
` … and ${stats.git.lfsCandidates.length - 2} more`,
);
console.log(` … and ${stats.git.lfsCandidates.length - 2} more`);
}
}
}
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length) {
console.log("\n📚 Largest Files (top 2):");
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length > 0) {
console.log('\n📚 Largest Files (top 2):');
for (const f of stats.largestFiles.slice(0, 2)) {
// Show LOC for text files when available; omit ext and mtime
let locStr = "";
let locStr = '';
if (!f.isBinary && Array.isArray(aggregatedContent?.textFiles)) {
const tf = aggregatedContent.textFiles.find((t) =>
t.path === f.path
);
if (tf && typeof tf.lines === "number") {
const tf = aggregatedContent.textFiles.find((t) => t.path === f.path);
if (tf && typeof tf.lines === 'number') {
locStr = `, LOC: ${tf.lines.toLocaleString()}`;
}
}
console.log(
` ${f.path} ${f.sizeFormatted} (${
f.percentOfTotal.toFixed(2)
}%)${locStr}`,
` ${f.path} ${f.sizeFormatted} (${f.percentOfTotal.toFixed(2)}%)${locStr}`,
);
}
if (stats.largestFiles.length > 2) {
@@ -409,262 +354,214 @@ program
// Write a comprehensive markdown report next to the XML
{
const mdPath = outputPath.endsWith(".xml")
? outputPath.replace(/\.xml$/i, ".stats.md")
: outputPath + ".stats.md";
const mdPath = outputPath.endsWith('.xml')
? outputPath.replace(/\.xml$/i, '.stats.md')
: outputPath + '.stats.md';
try {
const pct = (num, den) => (den ? ((num / den) * 100) : 0);
const pct = (num, den) => (den ? (num / den) * 100 : 0);
const md = [];
md.push(`# 🧾 Flatten Stats for ${path.basename(outputPath)}`);
md.push("");
md.push("## 📊 Summary");
md.push(`- Total source size: ${stats.totalSize}`);
md.push(`- Generated XML size: ${stats.xmlSize}`);
md.push(
`# 🧾 Flatten Stats for ${path.basename(outputPath)}`,
'',
'## 📊 Summary',
`- Total source size: ${stats.totalSize}`,
`- Generated XML size: ${stats.xmlSize}`,
`- Total lines of code: ${stats.totalLines.toLocaleString()}`,
);
md.push(`- Estimated tokens: ${stats.estimatedTokens}`);
md.push(
`- Estimated tokens: ${stats.estimatedTokens}`,
`- File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors`,
'',
'## 📈 Size Percentiles',
`Avg: ${Math.round(stats.avgFileSize).toLocaleString()} B, Median: ${Math.round(
stats.medianFileSize,
).toLocaleString()} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
'',
);
md.push("");
// Percentiles
md.push("## 📈 Size Percentiles");
md.push(
`Avg: ${
Math.round(stats.avgFileSize).toLocaleString()
} B, Median: ${
Math.round(stats.medianFileSize).toLocaleString()
} B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`,
);
md.push("");
// Histogram
if (Array.isArray(stats.histogram) && stats.histogram.length) {
md.push("## 🧮 Size Histogram");
md.push("| Bucket | Files | Bytes |");
md.push("| --- | ---: | ---: |");
if (Array.isArray(stats.histogram) && stats.histogram.length > 0) {
md.push(
'## 🧮 Size Histogram',
'| Bucket | Files | Bytes |',
'| --- | ---: | ---: |',
);
for (const b of stats.histogram) {
md.push(
`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`,
);
md.push(`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`);
}
md.push("");
md.push('');
}
// Top Extensions
if (Array.isArray(stats.byExtension) && stats.byExtension.length) {
md.push("## 📦 Top Extensions by Bytes (Top 20)");
md.push("| Ext | Files | Bytes | % of total |");
md.push("| --- | ---: | ---: | ---: |");
if (Array.isArray(stats.byExtension) && stats.byExtension.length > 0) {
md.push(
'## 📦 Top Extensions by Bytes (Top 20)',
'| Ext | Files | Bytes | % of total |',
'| --- | ---: | ---: | ---: |',
);
for (const e of stats.byExtension.slice(0, 20)) {
const p = pct(e.bytes, stats.totalBytes);
md.push(
`| ${e.ext} | ${e.count} | ${e.bytes.toLocaleString()} | ${
p.toFixed(2)
}% |`,
`| ${e.ext} | ${e.count} | ${e.bytes.toLocaleString()} | ${p.toFixed(2)}% |`,
);
}
md.push("");
md.push('');
}
// Top Directories
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) {
md.push("## 📂 Top Directories by Bytes (Top 20)");
md.push("| Directory | Files | Bytes | % of total |");
md.push("| --- | ---: | ---: | ---: |");
if (Array.isArray(stats.byDirectory) && stats.byDirectory.length > 0) {
md.push(
'## 📂 Top Directories by Bytes (Top 20)',
'| Directory | Files | Bytes | % of total |',
'| --- | ---: | ---: | ---: |',
);
for (const d of stats.byDirectory.slice(0, 20)) {
const p = pct(d.bytes, stats.totalBytes);
md.push(
`| ${d.dir} | ${d.count} | ${d.bytes.toLocaleString()} | ${
p.toFixed(2)
}% |`,
`| ${d.dir} | ${d.count} | ${d.bytes.toLocaleString()} | ${p.toFixed(2)}% |`,
);
}
md.push("");
md.push('');
}
// Depth distribution
if (
Array.isArray(stats.depthDistribution) &&
stats.depthDistribution.length
) {
md.push("## 🌳 Depth Distribution");
md.push("| Depth | Count |");
md.push("| ---: | ---: |");
if (Array.isArray(stats.depthDistribution) && stats.depthDistribution.length > 0) {
md.push('## 🌳 Depth Distribution', '| Depth | Count |', '| ---: | ---: |');
for (const d of stats.depthDistribution) {
md.push(`| ${d.depth} | ${d.count} |`);
}
md.push("");
md.push('');
}
// Longest paths
if (
Array.isArray(stats.longestPaths) && stats.longestPaths.length
) {
md.push("## 🧵 Longest Paths (Top 25)");
md.push("| Path | Length | Bytes |");
md.push("| --- | ---: | ---: |");
if (Array.isArray(stats.longestPaths) && stats.longestPaths.length > 0) {
md.push(
'## 🧵 Longest Paths (Top 25)',
'| Path | Length | Bytes |',
'| --- | ---: | ---: |',
);
for (const pth of stats.longestPaths) {
md.push(
`| ${pth.path} | ${pth.length} | ${pth.size.toLocaleString()} |`,
);
md.push(`| ${pth.path} | ${pth.length} | ${pth.size.toLocaleString()} |`);
}
md.push("");
md.push('');
}
// Temporal
if (stats.temporal) {
md.push("## ⏱️ Temporal");
md.push('## ⏱️ Temporal');
if (stats.temporal.oldest) {
md.push(
`- Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`,
);
md.push(`- Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`);
}
if (stats.temporal.newest) {
md.push(
`- Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`,
);
md.push(`- Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`);
}
if (Array.isArray(stats.temporal.ageBuckets)) {
md.push("");
md.push("| Age | Files | Bytes |");
md.push("| --- | ---: | ---: |");
md.push('', '| Age | Files | Bytes |', '| --- | ---: | ---: |');
for (const b of stats.temporal.ageBuckets) {
md.push(
`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`,
);
md.push(`| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`);
}
}
md.push("");
md.push('');
}
// Quality signals
if (stats.quality) {
md.push("## ✅ Quality Signals");
md.push(`- Zero-byte files: ${stats.quality.zeroByteFiles}`);
md.push(`- Empty text files: ${stats.quality.emptyTextFiles}`);
md.push(`- Hidden files: ${stats.quality.hiddenFiles}`);
md.push(`- Symlinks: ${stats.quality.symlinks}`);
md.push(
`- Large files (>= ${
(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)
} MB): ${stats.quality.largeFilesCount}`,
);
md.push(
'## ✅ Quality Signals',
`- Zero-byte files: ${stats.quality.zeroByteFiles}`,
`- Empty text files: ${stats.quality.emptyTextFiles}`,
`- Hidden files: ${stats.quality.hiddenFiles}`,
`- Symlinks: ${stats.quality.symlinks}`,
`- Large files (>= ${(stats.quality.largeThreshold / (1024 * 1024)).toFixed(0)} MB): ${stats.quality.largeFilesCount}`,
`- Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`,
'',
);
md.push("");
}
// Duplicates
if (
Array.isArray(stats.duplicateCandidates) &&
stats.duplicateCandidates.length
) {
md.push("## 🧬 Duplicate Candidates");
md.push("| Reason | Files | Size (bytes) |");
md.push("| --- | ---: | ---: |");
if (Array.isArray(stats.duplicateCandidates) && stats.duplicateCandidates.length > 0) {
md.push(
'## 🧬 Duplicate Candidates',
'| Reason | Files | Size (bytes) |',
'| --- | ---: | ---: |',
);
for (const d of stats.duplicateCandidates) {
md.push(
`| ${d.reason} | ${d.count} | ${d.size.toLocaleString()} |`,
);
md.push(`| ${d.reason} | ${d.count} | ${d.size.toLocaleString()} |`);
}
md.push("");
// Detailed listing of duplicate file names and locations
md.push("### 🧬 Duplicate Groups Details");
md.push('', '### 🧬 Duplicate Groups Details');
let dupIndex = 1;
for (const d of stats.duplicateCandidates) {
md.push(
`#### Group ${dupIndex}: ${d.count} files @ ${d.size.toLocaleString()} bytes (${d.reason})`,
);
if (Array.isArray(d.files) && d.files.length) {
if (Array.isArray(d.files) && d.files.length > 0) {
for (const fp of d.files) {
md.push(`- ${fp}`);
}
} else {
md.push("- (file list unavailable)");
md.push('- (file list unavailable)');
}
md.push("");
md.push('');
dupIndex++;
}
md.push("");
md.push('');
}
// Compressibility
if (typeof stats.compressibilityRatio === "number") {
md.push("## 🗜️ Compressibility");
if (typeof stats.compressibilityRatio === 'number') {
md.push(
`Sampled compressibility ratio: ${
(stats.compressibilityRatio * 100).toFixed(2)
}%`,
'## 🗜️ Compressibility',
`Sampled compressibility ratio: ${(stats.compressibilityRatio * 100).toFixed(2)}%`,
'',
);
md.push("");
}
// Git
if (stats.git && stats.git.isRepo) {
md.push("## 🔧 Git");
md.push(
'## 🔧 Git',
`- Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`,
);
md.push(
`- Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`,
);
if (
Array.isArray(stats.git.lfsCandidates) &&
stats.git.lfsCandidates.length
) {
md.push("");
md.push("### 📦 LFS Candidates (Top 20)");
md.push("| Path | Bytes |");
md.push("| --- | ---: |");
if (Array.isArray(stats.git.lfsCandidates) && stats.git.lfsCandidates.length > 0) {
md.push('', '### 📦 LFS Candidates (Top 20)', '| Path | Bytes |', '| --- | ---: |');
for (const f of stats.git.lfsCandidates.slice(0, 20)) {
md.push(`| ${f.path} | ${f.size.toLocaleString()} |`);
}
}
md.push("");
md.push('');
}
// Largest Files
if (
Array.isArray(stats.largestFiles) && stats.largestFiles.length
) {
md.push("## 📚 Largest Files (Top 50)");
md.push("| Path | Size | % of total | LOC |");
md.push("| --- | ---: | ---: | ---: |");
if (Array.isArray(stats.largestFiles) && stats.largestFiles.length > 0) {
md.push(
'## 📚 Largest Files (Top 50)',
'| Path | Size | % of total | LOC |',
'| --- | ---: | ---: | ---: |',
);
for (const f of stats.largestFiles) {
let loc = "";
if (
!f.isBinary && Array.isArray(aggregatedContent?.textFiles)
) {
const tf = aggregatedContent.textFiles.find((t) =>
t.path === f.path
);
if (tf && typeof tf.lines === "number") {
let loc = '';
if (!f.isBinary && Array.isArray(aggregatedContent?.textFiles)) {
const tf = aggregatedContent.textFiles.find((t) => t.path === f.path);
if (tf && typeof tf.lines === 'number') {
loc = tf.lines.toLocaleString();
}
}
md.push(
`| ${f.path} | ${f.sizeFormatted} | ${
f.percentOfTotal.toFixed(2)
}% | ${loc} |`,
`| ${f.path} | ${f.sizeFormatted} | ${f.percentOfTotal.toFixed(2)}% | ${loc} |`,
);
}
md.push("");
md.push('');
}
await fs.writeFile(mdPath, md.join("\n"));
await fs.writeFile(mdPath, md.join('\n'));
console.log(`\n🧾 Detailed stats report written to: ${mdPath}`);
} catch (e) {
console.warn(`⚠️ Failed to write stats markdown: ${e.message}`);
} catch (error) {
console.warn(`⚠️ Failed to write stats markdown: ${error.message}`);
}
}
}
} catch (error) {
console.error("❌ Critical error:", error.message);
console.error("An unexpected error occurred.");
console.error('❌ Critical error:', error.message);
console.error('An unexpected error occurred.');
process.exit(1);
}
});

View File

@@ -1,10 +1,10 @@
const fs = require("fs-extra");
const path = require("node:path");
const fs = require('fs-extra');
const path = require('node:path');
// Deno/Node compatibility: explicitly import process
const process = require("node:process");
const { execFile } = require("node:child_process");
const { promisify } = require("node:util");
const process = require('node:process');
const { execFile } = require('node:child_process');
const { promisify } = require('node:util');
const execFileAsync = promisify(execFile);
// Simple memoization across calls (keyed by realpath of startDir)
@@ -18,7 +18,7 @@ async function _tryRun(cmd, args, cwd, timeoutMs = 500) {
windowsHide: true,
maxBuffer: 1024 * 1024,
});
const out = String(stdout || "").trim();
const out = String(stdout || '').trim();
return out || null;
} catch {
return null;
@@ -27,15 +27,17 @@ async function _tryRun(cmd, args, cwd, timeoutMs = 500) {
async function _detectVcsTopLevel(startDir) {
// Run common VCS root queries in parallel; ignore failures
const gitP = _tryRun("git", ["rev-parse", "--show-toplevel"], startDir);
const hgP = _tryRun("hg", ["root"], startDir);
const gitP = _tryRun('git', ['rev-parse', '--show-toplevel'], startDir);
const hgP = _tryRun('hg', ['root'], startDir);
const svnP = (async () => {
const show = await _tryRun("svn", ["info", "--show-item", "wc-root"], startDir);
const show = await _tryRun('svn', ['info', '--show-item', 'wc-root'], startDir);
if (show) return show;
const info = await _tryRun("svn", ["info"], startDir);
const info = await _tryRun('svn', ['info'], startDir);
if (info) {
const line = info.split(/\r?\n/).find((l) => l.toLowerCase().startsWith("working copy root path:"));
if (line) return line.split(":").slice(1).join(":").trim();
const line = info
.split(/\r?\n/)
.find((l) => l.toLowerCase().startsWith('working copy root path:'));
if (line) return line.split(':').slice(1).join(':').trim();
}
return null;
})();
@@ -71,90 +73,92 @@ async function findProjectRoot(startDir) {
const checks = [];
const add = (rel, weight) => {
const makePath = (d) => Array.isArray(rel) ? path.join(d, ...rel) : path.join(d, rel);
const makePath = (d) => (Array.isArray(rel) ? path.join(d, ...rel) : path.join(d, rel));
checks.push({ makePath, weight });
};
// Highest priority: explicit sentinel markers
add(".project-root", 110);
add(".workspace-root", 110);
add(".repo-root", 110);
add('.project-root', 110);
add('.workspace-root', 110);
add('.repo-root', 110);
// Highest priority: VCS roots
add(".git", 100);
add(".hg", 95);
add(".svn", 95);
add('.git', 100);
add('.hg', 95);
add('.svn', 95);
// Monorepo/workspace indicators
add("pnpm-workspace.yaml", 90);
add("lerna.json", 90);
add("turbo.json", 90);
add("nx.json", 90);
add("rush.json", 90);
add("go.work", 90);
add("WORKSPACE", 90);
add("WORKSPACE.bazel", 90);
add("MODULE.bazel", 90);
add("pants.toml", 90);
add('pnpm-workspace.yaml', 90);
add('lerna.json', 90);
add('turbo.json', 90);
add('nx.json', 90);
add('rush.json', 90);
add('go.work', 90);
add('WORKSPACE', 90);
add('WORKSPACE.bazel', 90);
add('MODULE.bazel', 90);
add('pants.toml', 90);
// Lockfiles and package-manager/top-level locks
add("yarn.lock", 85);
add("pnpm-lock.yaml", 85);
add("package-lock.json", 85);
add("bun.lockb", 85);
add("Cargo.lock", 85);
add("composer.lock", 85);
add("poetry.lock", 85);
add("Pipfile.lock", 85);
add("Gemfile.lock", 85);
add('yarn.lock', 85);
add('pnpm-lock.yaml', 85);
add('package-lock.json', 85);
add('bun.lockb', 85);
add('Cargo.lock', 85);
add('composer.lock', 85);
add('poetry.lock', 85);
add('Pipfile.lock', 85);
add('Gemfile.lock', 85);
// Build-system root indicators
add("settings.gradle", 80);
add("settings.gradle.kts", 80);
add("gradlew", 80);
add("pom.xml", 80);
add("build.sbt", 80);
add(["project", "build.properties"], 80);
add('settings.gradle', 80);
add('settings.gradle.kts', 80);
add('gradlew', 80);
add('pom.xml', 80);
add('build.sbt', 80);
add(['project', 'build.properties'], 80);
// Language/project config markers
add("deno.json", 75);
add("deno.jsonc", 75);
add("pyproject.toml", 75);
add("Pipfile", 75);
add("requirements.txt", 75);
add("go.mod", 75);
add("Cargo.toml", 75);
add("composer.json", 75);
add("mix.exs", 75);
add("Gemfile", 75);
add("CMakeLists.txt", 75);
add("stack.yaml", 75);
add("cabal.project", 75);
add("rebar.config", 75);
add("pubspec.yaml", 75);
add("flake.nix", 75);
add("shell.nix", 75);
add("default.nix", 75);
add(".tool-versions", 75);
add("package.json", 74); // generic Node project (lower than lockfiles/workspaces)
add('deno.json', 75);
add('deno.jsonc', 75);
add('pyproject.toml', 75);
add('Pipfile', 75);
add('requirements.txt', 75);
add('go.mod', 75);
add('Cargo.toml', 75);
add('composer.json', 75);
add('mix.exs', 75);
add('Gemfile', 75);
add('CMakeLists.txt', 75);
add('stack.yaml', 75);
add('cabal.project', 75);
add('rebar.config', 75);
add('pubspec.yaml', 75);
add('flake.nix', 75);
add('shell.nix', 75);
add('default.nix', 75);
add('.tool-versions', 75);
add('package.json', 74); // generic Node project (lower than lockfiles/workspaces)
// Changesets
add([".changeset", "config.json"], 70);
add(".changeset", 70);
add(['.changeset', 'config.json'], 70);
add('.changeset', 70);
// Custom markers via env (comma-separated names)
if (process.env.PROJECT_ROOT_MARKERS) {
for (const name of process.env.PROJECT_ROOT_MARKERS.split(",").map((s) => s.trim()).filter(Boolean)) {
for (const name of process.env.PROJECT_ROOT_MARKERS.split(',')
.map((s) => s.trim())
.filter(Boolean)) {
add(name, 72);
}
}
/** Check for package.json with "workspaces" */
const hasWorkspacePackageJson = async (d) => {
const pkgPath = path.join(d, "package.json");
const pkgPath = path.join(d, 'package.json');
if (!(await exists(pkgPath))) return false;
try {
const raw = await fs.readFile(pkgPath, "utf8");
const raw = await fs.readFile(pkgPath, 'utf8');
const pkg = JSON.parse(raw);
return Boolean(pkg && pkg.workspaces);
} catch {
@@ -172,9 +176,8 @@ async function findProjectRoot(startDir) {
while (true) {
// Special check: package.json with "workspaces"
if (await hasWorkspacePackageJson(dir)) {
if (!best || 90 >= best.weight) best = { dir, weight: 90 };
}
if ((await hasWorkspacePackageJson(dir)) && (!best || 90 >= best.weight))
best = { dir, weight: 90 };
// Evaluate all other checks in parallel
const results = await Promise.all(
@@ -201,4 +204,3 @@ async function findProjectRoot(startDir) {
}
module.exports = { findProjectRoot };

View File

@@ -1,11 +1,11 @@
const os = require("node:os");
const path = require("node:path");
const readline = require("node:readline");
const process = require("node:process");
const os = require('node:os');
const path = require('node:path');
const readline = require('node:readline');
const process = require('node:process');
function expandHome(p) {
if (!p) return p;
if (p.startsWith("~")) return path.join(os.homedir(), p.slice(1));
if (p.startsWith('~')) return path.join(os.homedir(), p.slice(1));
return p;
}
@@ -27,16 +27,16 @@ function promptQuestion(question) {
}
async function promptYesNo(question, defaultYes = true) {
const suffix = defaultYes ? " [Y/n] " : " [y/N] ";
const suffix = defaultYes ? ' [Y/n] ' : ' [y/N] ';
const ans = (await promptQuestion(`${question}${suffix}`)).trim().toLowerCase();
if (!ans) return defaultYes;
if (["y", "yes"].includes(ans)) return true;
if (["n", "no"].includes(ans)) return false;
if (['y', 'yes'].includes(ans)) return true;
if (['n', 'no'].includes(ans)) return false;
return promptYesNo(question, defaultYes);
}
async function promptPath(question, defaultValue) {
const prompt = `${question}${defaultValue ? ` (default: ${defaultValue})` : ""}: `;
const prompt = `${question}${defaultValue ? ` (default: ${defaultValue})` : ''}: `;
const ans = (await promptQuestion(prompt)).trim();
return expandHome(ans || defaultValue);
}

View File

@@ -1,11 +1,11 @@
"use strict";
'use strict';
const fs = require("node:fs/promises");
const path = require("node:path");
const zlib = require("node:zlib");
const { Buffer } = require("node:buffer");
const crypto = require("node:crypto");
const cp = require("node:child_process");
const fs = require('node:fs/promises');
const path = require('node:path');
const zlib = require('node:zlib');
const { Buffer } = require('node:buffer');
const crypto = require('node:crypto');
const cp = require('node:child_process');
const KB = 1024;
const MB = 1024 * KB;
@@ -34,17 +34,19 @@ async function enrichAllFiles(textFiles, binaryFiles) {
const allFiles = [];
async function enrich(file, isBinary) {
const ext = (path.extname(file.path) || "").toLowerCase();
const dir = path.dirname(file.path) || ".";
const ext = (path.extname(file.path) || '').toLowerCase();
const dir = path.dirname(file.path) || '.';
const depth = file.path.split(path.sep).filter(Boolean).length;
const hidden = file.path.split(path.sep).some((seg) => seg.startsWith("."));
const hidden = file.path.split(path.sep).some((seg) => seg.startsWith('.'));
let mtimeMs = 0;
let isSymlink = false;
try {
const lst = await fs.lstat(file.absolutePath);
mtimeMs = lst.mtimeMs;
isSymlink = lst.isSymbolicLink();
} catch (_) { /* ignore lstat errors during enrichment */ }
} catch {
/* ignore lstat errors during enrichment */
}
allFiles.push({
path: file.path,
absolutePath: file.absolutePath,
@@ -67,18 +69,18 @@ async function enrichAllFiles(textFiles, binaryFiles) {
function buildHistogram(allFiles) {
const buckets = [
[1 * KB, "01KB"],
[10 * KB, "110KB"],
[100 * KB, "10100KB"],
[1 * MB, "100KB1MB"],
[10 * MB, "110MB"],
[100 * MB, "10100MB"],
[Infinity, ">=100MB"],
[1 * KB, '01KB'],
[10 * KB, '110KB'],
[100 * KB, '10100KB'],
[1 * MB, '100KB1MB'],
[10 * MB, '110MB'],
[100 * MB, '10100MB'],
[Infinity, '>=100MB'],
];
const histogram = buckets.map(([_, label]) => ({ label, count: 0, bytes: 0 }));
for (const f of allFiles) {
for (let i = 0; i < buckets.length; i++) {
if (f.size < buckets[i][0]) {
for (const [i, bucket] of buckets.entries()) {
if (f.size < bucket[0]) {
histogram[i].count++;
histogram[i].bytes += f.size;
break;
@@ -91,13 +93,13 @@ function buildHistogram(allFiles) {
function aggregateByExtension(allFiles) {
const byExtension = new Map();
for (const f of allFiles) {
const key = f.ext || "<none>";
const key = f.ext || '<none>';
const v = byExtension.get(key) || { ext: key, count: 0, bytes: 0 };
v.count++;
v.bytes += f.size;
byExtension.set(key, v);
}
return Array.from(byExtension.values()).sort((a, b) => b.bytes - a.bytes);
return [...byExtension.values()].sort((a, b) => b.bytes - a.bytes);
}
function aggregateByDirectory(allFiles) {
@@ -109,15 +111,15 @@ function aggregateByDirectory(allFiles) {
byDirectory.set(dir, v);
}
for (const f of allFiles) {
const parts = f.dir === "." ? [] : f.dir.split(path.sep);
let acc = "";
const parts = f.dir === '.' ? [] : f.dir.split(path.sep);
let acc = '';
for (let i = 0; i < parts.length; i++) {
acc = i === 0 ? parts[0] : acc + path.sep + parts[i];
addDirBytes(acc, f.size);
}
if (parts.length === 0) addDirBytes(".", f.size);
if (parts.length === 0) addDirBytes('.', f.size);
}
return Array.from(byDirectory.values()).sort((a, b) => b.bytes - a.bytes);
return [...byDirectory.values()].sort((a, b) => b.bytes - a.bytes);
}
function computeDepthAndLongest(allFiles) {
@@ -129,21 +131,22 @@ function computeDepthAndLongest(allFiles) {
.sort((a, b) => b.path.length - a.path.length)
.slice(0, 25)
.map((f) => ({ path: f.path, length: f.path.length, size: f.size }));
const depthDist = Array.from(depthDistribution.entries())
const depthDist = [...depthDistribution.entries()]
.sort((a, b) => a[0] - b[0])
.map(([depth, count]) => ({ depth, count }));
return { depthDist, longestPaths };
}
function computeTemporal(allFiles, nowMs) {
let oldest = null, newest = null;
let oldest = null,
newest = null;
const ageBuckets = [
{ label: "> 1 year", minDays: 365, maxDays: Infinity, count: 0, bytes: 0 },
{ label: "612 months", minDays: 180, maxDays: 365, count: 0, bytes: 0 },
{ label: "16 months", minDays: 30, maxDays: 180, count: 0, bytes: 0 },
{ label: "730 days", minDays: 7, maxDays: 30, count: 0, bytes: 0 },
{ label: "17 days", minDays: 1, maxDays: 7, count: 0, bytes: 0 },
{ label: "< 1 day", minDays: 0, maxDays: 1, count: 0, bytes: 0 },
{ label: '> 1 year', minDays: 365, maxDays: Infinity, count: 0, bytes: 0 },
{ label: '612 months', minDays: 180, maxDays: 365, count: 0, bytes: 0 },
{ label: '16 months', minDays: 30, maxDays: 180, count: 0, bytes: 0 },
{ label: '730 days', minDays: 7, maxDays: 30, count: 0, bytes: 0 },
{ label: '17 days', minDays: 1, maxDays: 7, count: 0, bytes: 0 },
{ label: '< 1 day', minDays: 0, maxDays: 1, count: 0, bytes: 0 },
];
for (const f of allFiles) {
const ageDays = Math.max(0, (nowMs - (f.mtimeMs || nowMs)) / (24 * 60 * 60 * 1000));
@@ -158,15 +161,21 @@ function computeTemporal(allFiles, nowMs) {
if (!newest || f.mtimeMs > newest.mtimeMs) newest = f;
}
return {
oldest: oldest ? { path: oldest.path, mtime: oldest.mtimeMs ? new Date(oldest.mtimeMs).toISOString() : null } : null,
newest: newest ? { path: newest.path, mtime: newest.mtimeMs ? new Date(newest.mtimeMs).toISOString() : null } : null,
oldest: oldest
? { path: oldest.path, mtime: oldest.mtimeMs ? new Date(oldest.mtimeMs).toISOString() : null }
: null,
newest: newest
? { path: newest.path, mtime: newest.mtimeMs ? new Date(newest.mtimeMs).toISOString() : null }
: null,
ageBuckets,
};
}
function computeQuality(allFiles, textFiles) {
const zeroByteFiles = allFiles.filter((f) => f.size === 0).length;
const emptyTextFiles = textFiles.filter((f) => (f.size || 0) === 0 || (f.lines || 0) === 0).length;
const emptyTextFiles = textFiles.filter(
(f) => (f.size || 0) === 0 || (f.lines || 0) === 0,
).length;
const hiddenFiles = allFiles.filter((f) => f.hidden).length;
const symlinks = allFiles.filter((f) => f.isSymlink).length;
const largeThreshold = 50 * MB;
@@ -201,18 +210,31 @@ function computeDuplicates(allFiles, textFiles) {
for (const tf of textGroup) {
try {
const src = textFiles.find((x) => x.absolutePath === tf.absolutePath);
const content = src ? src.content : "";
const h = crypto.createHash("sha1").update(content).digest("hex");
const content = src ? src.content : '';
const h = crypto.createHash('sha1').update(content).digest('hex');
const g = contentHashGroups.get(h) || [];
g.push(tf);
contentHashGroups.set(h, g);
} catch (_) { /* ignore hashing errors for duplicate detection */ }
} catch {
/* ignore hashing errors for duplicate detection */
}
}
for (const [_h, g] of contentHashGroups.entries()) {
if (g.length > 1) duplicateCandidates.push({ reason: "same-size+text-hash", size: Number(sizeKey), count: g.length, files: g.map((f) => f.path) });
if (g.length > 1)
duplicateCandidates.push({
reason: 'same-size+text-hash',
size: Number(sizeKey),
count: g.length,
files: g.map((f) => f.path),
});
}
if (otherGroup.length > 1) {
duplicateCandidates.push({ reason: "same-size", size: Number(sizeKey), count: otherGroup.length, files: otherGroup.map((f) => f.path) });
duplicateCandidates.push({
reason: 'same-size',
size: Number(sizeKey),
count: otherGroup.length,
files: otherGroup.map((f) => f.path),
});
}
}
return duplicateCandidates;
@@ -226,10 +248,12 @@ function estimateCompressibility(textFiles) {
const sampleLen = Math.min(256 * 1024, tf.size || 0);
if (sampleLen <= 0) continue;
const sample = tf.content.slice(0, sampleLen);
const gz = zlib.gzipSync(Buffer.from(sample, "utf8"));
const gz = zlib.gzipSync(Buffer.from(sample, 'utf8'));
compSampleBytes += sampleLen;
compCompressedBytes += gz.length;
} catch (_) { /* ignore compression errors during sampling */ }
} catch {
/* ignore compression errors during sampling */
}
}
return compSampleBytes > 0 ? compCompressedBytes / compSampleBytes : null;
}
@@ -245,20 +269,34 @@ function computeGitInfo(allFiles, rootDir, largeThreshold) {
};
try {
if (!rootDir) return info;
const top = cp.execFileSync("git", ["rev-parse", "--show-toplevel"], { cwd: rootDir, stdio: ["ignore", "pipe", "ignore"] }).toString().trim();
const top = cp
.execFileSync('git', ['rev-parse', '--show-toplevel'], {
cwd: rootDir,
stdio: ['ignore', 'pipe', 'ignore'],
})
.toString()
.trim();
if (!top) return info;
info.isRepo = true;
const out = cp.execFileSync("git", ["ls-files", "-z"], { cwd: rootDir, stdio: ["ignore", "pipe", "ignore"] });
const tracked = new Set(out.toString().split("\0").filter(Boolean));
let trackedBytes = 0, trackedCount = 0, untrackedBytes = 0, untrackedCount = 0;
const out = cp.execFileSync('git', ['ls-files', '-z'], {
cwd: rootDir,
stdio: ['ignore', 'pipe', 'ignore'],
});
const tracked = new Set(out.toString().split('\0').filter(Boolean));
let trackedBytes = 0,
trackedCount = 0,
untrackedBytes = 0,
untrackedCount = 0;
const lfsCandidates = [];
for (const f of allFiles) {
const isTracked = tracked.has(f.path);
if (isTracked) {
trackedCount++; trackedBytes += f.size;
trackedCount++;
trackedBytes += f.size;
if (f.size >= largeThreshold) lfsCandidates.push({ path: f.path, size: f.size });
} else {
untrackedCount++; untrackedBytes += f.size;
untrackedCount++;
untrackedBytes += f.size;
}
}
info.trackedCount = trackedCount;
@@ -266,7 +304,9 @@ function computeGitInfo(allFiles, rootDir, largeThreshold) {
info.untrackedCount = untrackedCount;
info.untrackedBytes = untrackedBytes;
info.lfsCandidates = lfsCandidates.sort((a, b) => b.size - a.size).slice(0, 50);
} catch (_) { /* git not available or not a repo, ignore */ }
} catch {
/* git not available or not a repo, ignore */
}
return info;
}
@@ -280,34 +320,58 @@ function computeLargestFiles(allFiles, totalBytes) {
size: f.size,
sizeFormatted: formatSize(f.size),
percentOfTotal: toPct(f.size, totalBytes),
ext: f.ext || "",
ext: f.ext || '',
isBinary: f.isBinary,
mtime: f.mtimeMs ? new Date(f.mtimeMs).toISOString() : null,
}));
}
function mdTable(rows, headers) {
const header = `| ${headers.join(" | ")} |`;
const sep = `| ${headers.map(() => "---").join(" | ")} |`;
const body = rows.map((r) => `| ${r.join(" | ")} |`).join("\n");
const header = `| ${headers.join(' | ')} |`;
const sep = `| ${headers.map(() => '---').join(' | ')} |`;
const body = rows.map((r) => `| ${r.join(' | ')} |`).join('\n');
return `${header}\n${sep}\n${body}`;
}
function buildMarkdownReport(largestFiles, byExtensionArr, byDirectoryArr, totalBytes) {
const toPct = (num, den) => (den === 0 ? 0 : (num / den) * 100);
const md = [];
md.push("\n### Top Largest Files (Top 50)\n");
md.push(mdTable(
largestFiles.map((f) => [f.path, f.sizeFormatted, `${f.percentOfTotal.toFixed(2)}%`, f.ext || "", f.isBinary ? "binary" : "text"]),
["Path", "Size", "% of total", "Ext", "Type"],
));
md.push("\n\n### Top Extensions by Bytes (Top 20)\n");
const topExtRows = byExtensionArr.slice(0, 20).map((e) => [e.ext, String(e.count), formatSize(e.bytes), `${toPct(e.bytes, totalBytes).toFixed(2)}%`]);
md.push(mdTable(topExtRows, ["Ext", "Count", "Bytes", "% of total"]));
md.push("\n\n### Top Directories by Bytes (Top 20)\n");
const topDirRows = byDirectoryArr.slice(0, 20).map((d) => [d.dir, String(d.count), formatSize(d.bytes), `${toPct(d.bytes, totalBytes).toFixed(2)}%`]);
md.push(mdTable(topDirRows, ["Directory", "Files", "Bytes", "% of total"]));
return md.join("\n");
md.push(
'\n### Top Largest Files (Top 50)\n',
mdTable(
largestFiles.map((f) => [
f.path,
f.sizeFormatted,
`${f.percentOfTotal.toFixed(2)}%`,
f.ext || '',
f.isBinary ? 'binary' : 'text',
]),
['Path', 'Size', '% of total', 'Ext', 'Type'],
),
'\n\n### Top Extensions by Bytes (Top 20)\n',
);
const topExtRows = byExtensionArr
.slice(0, 20)
.map((e) => [
e.ext,
String(e.count),
formatSize(e.bytes),
`${toPct(e.bytes, totalBytes).toFixed(2)}%`,
]);
md.push(
mdTable(topExtRows, ['Ext', 'Count', 'Bytes', '% of total']),
'\n\n### Top Directories by Bytes (Top 20)\n',
);
const topDirRows = byDirectoryArr
.slice(0, 20)
.map((d) => [
d.dir,
String(d.count),
formatSize(d.bytes),
`${toPct(d.bytes, totalBytes).toFixed(2)}%`,
]);
md.push(mdTable(topDirRows, ['Directory', 'Files', 'Bytes', '% of total']));
return md.join('\n');
}
module.exports = {

View File

@@ -1,4 +1,4 @@
const H = require("./stats.helpers.js");
const H = require('./stats.helpers.js');
async function calculateStatistics(aggregatedContent, xmlFileSize, rootDir) {
const { textFiles, binaryFiles, errors } = aggregatedContent;
@@ -10,8 +10,8 @@ async function calculateStatistics(aggregatedContent, xmlFileSize, rootDir) {
const allFiles = await H.enrichAllFiles(textFiles, binaryFiles);
const totalBytes = allFiles.reduce((s, f) => s + f.size, 0);
const sizes = allFiles.map((f) => f.size).sort((a, b) => a - b);
const avgSize = sizes.length ? totalBytes / sizes.length : 0;
const medianSize = sizes.length ? H.percentile(sizes, 50) : 0;
const avgSize = sizes.length > 0 ? totalBytes / sizes.length : 0;
const medianSize = sizes.length > 0 ? H.percentile(sizes, 50) : 0;
const p90 = H.percentile(sizes, 90);
const p95 = H.percentile(sizes, 95);
const p99 = H.percentile(sizes, 99);

View File

@@ -1,4 +1,3 @@
#!/usr/bin/env node
/* deno-lint-ignore-file */
/*
Automatic test matrix for project root detection.
@@ -6,65 +5,65 @@
No external options or flags required. Safe to run multiple times.
*/
const os = require("node:os");
const path = require("node:path");
const fs = require("fs-extra");
const { promisify } = require("node:util");
const { execFile } = require("node:child_process");
const process = require("node:process");
const os = require('node:os');
const path = require('node:path');
const fs = require('fs-extra');
const { promisify } = require('node:util');
const { execFile } = require('node:child_process');
const process = require('node:process');
const execFileAsync = promisify(execFile);
const { findProjectRoot } = require("./projectRoot.js");
const { findProjectRoot } = require('./projectRoot.js');
async function cmdAvailable(cmd) {
try {
await execFileAsync(cmd, ["--version"], { timeout: 500, windowsHide: true });
await execFileAsync(cmd, ['--version'], { timeout: 500, windowsHide: true });
return true;
} catch {
return false;
}
async function testSvnMarker() {
const root = await mkTmpDir("svn");
const nested = path.join(root, "proj", "code");
await fs.ensureDir(nested);
await fs.ensureDir(path.join(root, ".svn"));
const found = await findProjectRoot(nested);
assertEqual(found, root, ".svn marker should be detected");
return { name: "svn-marker", ok: true };
}
async function testSymlinkStart() {
const root = await mkTmpDir("symlink-start");
const nested = path.join(root, "a", "b");
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, ".project-root"), "\n");
const tmp = await mkTmpDir("symlink-tmp");
const link = path.join(tmp, "link-to-b");
try {
await fs.symlink(nested, link);
} catch {
// symlink may not be permitted on some systems; skip
return { name: "symlink-start", ok: true, skipped: true };
async function testSvnMarker() {
const root = await mkTmpDir('svn');
const nested = path.join(root, 'proj', 'code');
await fs.ensureDir(nested);
await fs.ensureDir(path.join(root, '.svn'));
const found = await findProjectRoot(nested);
assertEqual(found, root, '.svn marker should be detected');
return { name: 'svn-marker', ok: true };
}
const found = await findProjectRoot(link);
assertEqual(found, root, "should resolve symlinked start to real root");
return { name: "symlink-start", ok: true };
}
async function testSubmoduleLikeInnerGitFile() {
const root = await mkTmpDir("submodule-like");
const mid = path.join(root, "mid");
const leaf = path.join(mid, "leaf");
await fs.ensureDir(leaf);
// outer repo
await fs.ensureDir(path.join(root, ".git"));
// inner submodule-like .git file
await fs.writeFile(path.join(mid, ".git"), "gitdir: ../.git/modules/mid\n");
const found = await findProjectRoot(leaf);
assertEqual(found, root, "outermost .git should win on tie weight");
return { name: "submodule-like-gitfile", ok: true };
}
async function testSymlinkStart() {
const root = await mkTmpDir('symlink-start');
const nested = path.join(root, 'a', 'b');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, '.project-root'), '\n');
const tmp = await mkTmpDir('symlink-tmp');
const link = path.join(tmp, 'link-to-b');
try {
await fs.symlink(nested, link);
} catch {
// symlink may not be permitted on some systems; skip
return { name: 'symlink-start', ok: true, skipped: true };
}
const found = await findProjectRoot(link);
assertEqual(found, root, 'should resolve symlinked start to real root');
return { name: 'symlink-start', ok: true };
}
async function testSubmoduleLikeInnerGitFile() {
const root = await mkTmpDir('submodule-like');
const mid = path.join(root, 'mid');
const leaf = path.join(mid, 'leaf');
await fs.ensureDir(leaf);
// outer repo
await fs.ensureDir(path.join(root, '.git'));
// inner submodule-like .git file
await fs.writeFile(path.join(mid, '.git'), 'gitdir: ../.git/modules/mid\n');
const found = await findProjectRoot(leaf);
assertEqual(found, root, 'outermost .git should win on tie weight');
return { name: 'submodule-like-gitfile', ok: true };
}
}
async function mkTmpDir(name) {
@@ -75,274 +74,283 @@ async function mkTmpDir(name) {
function assertEqual(actual, expected, msg) {
if (actual !== expected) {
throw new Error(`${msg}: expected=\"${expected}\" actual=\"${actual}\"`);
throw new Error(`${msg}: expected="${expected}" actual="${actual}"`);
}
}
async function testSentinel() {
const root = await mkTmpDir("sentinel");
const nested = path.join(root, "a", "b", "c");
const root = await mkTmpDir('sentinel');
const nested = path.join(root, 'a', 'b', 'c');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, ".project-root"), "\n");
await fs.writeFile(path.join(root, '.project-root'), '\n');
const found = await findProjectRoot(nested);
await assertEqual(found, root, "sentinel .project-root should win");
return { name: "sentinel", ok: true };
await assertEqual(found, root, 'sentinel .project-root should win');
return { name: 'sentinel', ok: true };
}
async function testOtherSentinels() {
const root = await mkTmpDir("other-sentinels");
const nested = path.join(root, "x", "y");
const root = await mkTmpDir('other-sentinels');
const nested = path.join(root, 'x', 'y');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, ".workspace-root"), "\n");
await fs.writeFile(path.join(root, '.workspace-root'), '\n');
const found1 = await findProjectRoot(nested);
assertEqual(found1, root, "sentinel .workspace-root should win");
assertEqual(found1, root, 'sentinel .workspace-root should win');
await fs.remove(path.join(root, ".workspace-root"));
await fs.writeFile(path.join(root, ".repo-root"), "\n");
await fs.remove(path.join(root, '.workspace-root'));
await fs.writeFile(path.join(root, '.repo-root'), '\n');
const found2 = await findProjectRoot(nested);
assertEqual(found2, root, "sentinel .repo-root should win");
return { name: "other-sentinels", ok: true };
assertEqual(found2, root, 'sentinel .repo-root should win');
return { name: 'other-sentinels', ok: true };
}
async function testGitCliAndMarker() {
const hasGit = await cmdAvailable("git");
if (!hasGit) return { name: "git-cli", ok: true, skipped: true };
const hasGit = await cmdAvailable('git');
if (!hasGit) return { name: 'git-cli', ok: true, skipped: true };
const root = await mkTmpDir("git");
const nested = path.join(root, "pkg", "src");
const root = await mkTmpDir('git');
const nested = path.join(root, 'pkg', 'src');
await fs.ensureDir(nested);
await execFileAsync("git", ["init"], { cwd: root, timeout: 2000 });
await execFileAsync('git', ['init'], { cwd: root, timeout: 2000 });
const found = await findProjectRoot(nested);
await assertEqual(found, root, "git toplevel should be detected");
return { name: "git-cli", ok: true };
await assertEqual(found, root, 'git toplevel should be detected');
return { name: 'git-cli', ok: true };
}
async function testHgMarkerOrCli() {
// Prefer simple marker test to avoid requiring Mercurial install
const root = await mkTmpDir("hg");
const nested = path.join(root, "lib");
const root = await mkTmpDir('hg');
const nested = path.join(root, 'lib');
await fs.ensureDir(nested);
await fs.ensureDir(path.join(root, ".hg"));
await fs.ensureDir(path.join(root, '.hg'));
const found = await findProjectRoot(nested);
await assertEqual(found, root, ".hg marker should be detected");
return { name: "hg-marker", ok: true };
await assertEqual(found, root, '.hg marker should be detected');
return { name: 'hg-marker', ok: true };
}
async function testWorkspacePnpm() {
const root = await mkTmpDir("pnpm-workspace");
const pkgA = path.join(root, "packages", "a");
const root = await mkTmpDir('pnpm-workspace');
const pkgA = path.join(root, 'packages', 'a');
await fs.ensureDir(pkgA);
await fs.writeFile(path.join(root, "pnpm-workspace.yaml"), "packages:\n - packages/*\n");
await fs.writeFile(path.join(root, 'pnpm-workspace.yaml'), 'packages:\n - packages/*\n');
const found = await findProjectRoot(pkgA);
await assertEqual(found, root, "pnpm-workspace.yaml should be detected");
return { name: "pnpm-workspace", ok: true };
await assertEqual(found, root, 'pnpm-workspace.yaml should be detected');
return { name: 'pnpm-workspace', ok: true };
}
async function testPackageJsonWorkspaces() {
const root = await mkTmpDir("package-workspaces");
const pkgA = path.join(root, "packages", "a");
const root = await mkTmpDir('package-workspaces');
const pkgA = path.join(root, 'packages', 'a');
await fs.ensureDir(pkgA);
await fs.writeJson(path.join(root, "package.json"), { private: true, workspaces: ["packages/*"] }, { spaces: 2 });
await fs.writeJson(
path.join(root, 'package.json'),
{ private: true, workspaces: ['packages/*'] },
{ spaces: 2 },
);
const found = await findProjectRoot(pkgA);
await assertEqual(found, root, "package.json workspaces should be detected");
return { name: "package.json-workspaces", ok: true };
await assertEqual(found, root, 'package.json workspaces should be detected');
return { name: 'package.json-workspaces', ok: true };
}
async function testLockfiles() {
const root = await mkTmpDir("lockfiles");
const nested = path.join(root, "src");
const root = await mkTmpDir('lockfiles');
const nested = path.join(root, 'src');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "yarn.lock"), "\n");
await fs.writeFile(path.join(root, 'yarn.lock'), '\n');
const found = await findProjectRoot(nested);
await assertEqual(found, root, "yarn.lock should be detected");
return { name: "lockfiles", ok: true };
await assertEqual(found, root, 'yarn.lock should be detected');
return { name: 'lockfiles', ok: true };
}
async function testLanguageConfigs() {
const root = await mkTmpDir("lang-configs");
const nested = path.join(root, "x", "y");
const root = await mkTmpDir('lang-configs');
const nested = path.join(root, 'x', 'y');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "pyproject.toml"), "[tool.poetry]\nname='tmp'\n");
await fs.writeFile(path.join(root, 'pyproject.toml'), "[tool.poetry]\nname='tmp'\n");
const found = await findProjectRoot(nested);
await assertEqual(found, root, "pyproject.toml should be detected");
return { name: "language-configs", ok: true };
await assertEqual(found, root, 'pyproject.toml should be detected');
return { name: 'language-configs', ok: true };
}
async function testPreferOuterOnTie() {
const root = await mkTmpDir("tie");
const mid = path.join(root, "mid");
const leaf = path.join(mid, "leaf");
const root = await mkTmpDir('tie');
const mid = path.join(root, 'mid');
const leaf = path.join(mid, 'leaf');
await fs.ensureDir(leaf);
// same weight marker at two levels
await fs.writeFile(path.join(root, "requirements.txt"), "\n");
await fs.writeFile(path.join(mid, "requirements.txt"), "\n");
await fs.writeFile(path.join(root, 'requirements.txt'), '\n');
await fs.writeFile(path.join(mid, 'requirements.txt'), '\n');
const found = await findProjectRoot(leaf);
await assertEqual(found, root, "outermost directory should win on equal weight");
return { name: "prefer-outermost-tie", ok: true };
await assertEqual(found, root, 'outermost directory should win on equal weight');
return { name: 'prefer-outermost-tie', ok: true };
}
// Additional coverage: Bazel, Nx/Turbo/Rush, Go workspaces, Deno, Java/Scala, PHP, Rust, Nix, Changesets, env markers,
// and priority interaction between package.json and lockfiles.
async function testBazelWorkspace() {
const root = await mkTmpDir("bazel");
const nested = path.join(root, "apps", "svc");
const root = await mkTmpDir('bazel');
const nested = path.join(root, 'apps', 'svc');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "WORKSPACE"), "workspace(name=\"tmp\")\n");
await fs.writeFile(path.join(root, 'WORKSPACE'), 'workspace(name="tmp")\n');
const found = await findProjectRoot(nested);
await assertEqual(found, root, "Bazel WORKSPACE should be detected");
return { name: "bazel-workspace", ok: true };
await assertEqual(found, root, 'Bazel WORKSPACE should be detected');
return { name: 'bazel-workspace', ok: true };
}
async function testNx() {
const root = await mkTmpDir("nx");
const nested = path.join(root, "apps", "web");
const root = await mkTmpDir('nx');
const nested = path.join(root, 'apps', 'web');
await fs.ensureDir(nested);
await fs.writeJson(path.join(root, "nx.json"), { npmScope: "tmp" }, { spaces: 2 });
await fs.writeJson(path.join(root, 'nx.json'), { npmScope: 'tmp' }, { spaces: 2 });
const found = await findProjectRoot(nested);
await assertEqual(found, root, "nx.json should be detected");
return { name: "nx", ok: true };
await assertEqual(found, root, 'nx.json should be detected');
return { name: 'nx', ok: true };
}
async function testTurbo() {
const root = await mkTmpDir("turbo");
const nested = path.join(root, "packages", "x");
const root = await mkTmpDir('turbo');
const nested = path.join(root, 'packages', 'x');
await fs.ensureDir(nested);
await fs.writeJson(path.join(root, "turbo.json"), { pipeline: {} }, { spaces: 2 });
await fs.writeJson(path.join(root, 'turbo.json'), { pipeline: {} }, { spaces: 2 });
const found = await findProjectRoot(nested);
await assertEqual(found, root, "turbo.json should be detected");
return { name: "turbo", ok: true };
await assertEqual(found, root, 'turbo.json should be detected');
return { name: 'turbo', ok: true };
}
async function testRush() {
const root = await mkTmpDir("rush");
const nested = path.join(root, "apps", "a");
const root = await mkTmpDir('rush');
const nested = path.join(root, 'apps', 'a');
await fs.ensureDir(nested);
await fs.writeJson(path.join(root, "rush.json"), { projectFolderMinDepth: 1 }, { spaces: 2 });
await fs.writeJson(path.join(root, 'rush.json'), { projectFolderMinDepth: 1 }, { spaces: 2 });
const found = await findProjectRoot(nested);
await assertEqual(found, root, "rush.json should be detected");
return { name: "rush", ok: true };
await assertEqual(found, root, 'rush.json should be detected');
return { name: 'rush', ok: true };
}
async function testGoWorkAndMod() {
const root = await mkTmpDir("gowork");
const mod = path.join(root, "modA");
const nested = path.join(mod, "pkg");
const root = await mkTmpDir('gowork');
const mod = path.join(root, 'modA');
const nested = path.join(mod, 'pkg');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "go.work"), "go 1.22\nuse ./modA\n");
await fs.writeFile(path.join(mod, "go.mod"), "module example.com/a\ngo 1.22\n");
await fs.writeFile(path.join(root, 'go.work'), 'go 1.22\nuse ./modA\n');
await fs.writeFile(path.join(mod, 'go.mod'), 'module example.com/a\ngo 1.22\n');
const found = await findProjectRoot(nested);
await assertEqual(found, root, "go.work should define the workspace root");
return { name: "go-work", ok: true };
await assertEqual(found, root, 'go.work should define the workspace root');
return { name: 'go-work', ok: true };
}
async function testDenoJson() {
const root = await mkTmpDir("deno");
const nested = path.join(root, "src");
const root = await mkTmpDir('deno');
const nested = path.join(root, 'src');
await fs.ensureDir(nested);
await fs.writeJson(path.join(root, "deno.json"), { tasks: {} }, { spaces: 2 });
await fs.writeJson(path.join(root, 'deno.json'), { tasks: {} }, { spaces: 2 });
const found = await findProjectRoot(nested);
await assertEqual(found, root, "deno.json should be detected");
return { name: "deno-json", ok: true };
await assertEqual(found, root, 'deno.json should be detected');
return { name: 'deno-json', ok: true };
}
async function testGradleSettings() {
const root = await mkTmpDir("gradle");
const nested = path.join(root, "app");
const root = await mkTmpDir('gradle');
const nested = path.join(root, 'app');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "settings.gradle"), "rootProject.name='tmp'\n");
await fs.writeFile(path.join(root, 'settings.gradle'), "rootProject.name='tmp'\n");
const found = await findProjectRoot(nested);
await assertEqual(found, root, "settings.gradle should be detected");
return { name: "gradle-settings", ok: true };
await assertEqual(found, root, 'settings.gradle should be detected');
return { name: 'gradle-settings', ok: true };
}
async function testMavenPom() {
const root = await mkTmpDir("maven");
const nested = path.join(root, "module");
const root = await mkTmpDir('maven');
const nested = path.join(root, 'module');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "pom.xml"), "<project></project>\n");
await fs.writeFile(path.join(root, 'pom.xml'), '<project></project>\n');
const found = await findProjectRoot(nested);
await assertEqual(found, root, "pom.xml should be detected");
return { name: "maven-pom", ok: true };
await assertEqual(found, root, 'pom.xml should be detected');
return { name: 'maven-pom', ok: true };
}
async function testSbtBuild() {
const root = await mkTmpDir("sbt");
const nested = path.join(root, "sub");
const root = await mkTmpDir('sbt');
const nested = path.join(root, 'sub');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "build.sbt"), "name := \"tmp\"\n");
await fs.writeFile(path.join(root, 'build.sbt'), 'name := "tmp"\n');
const found = await findProjectRoot(nested);
await assertEqual(found, root, "build.sbt should be detected");
return { name: "sbt-build", ok: true };
await assertEqual(found, root, 'build.sbt should be detected');
return { name: 'sbt-build', ok: true };
}
async function testComposer() {
const root = await mkTmpDir("composer");
const nested = path.join(root, "src");
const root = await mkTmpDir('composer');
const nested = path.join(root, 'src');
await fs.ensureDir(nested);
await fs.writeJson(path.join(root, "composer.json"), { name: "tmp/pkg" }, { spaces: 2 });
await fs.writeFile(path.join(root, "composer.lock"), "{}\n");
await fs.writeJson(path.join(root, 'composer.json'), { name: 'tmp/pkg' }, { spaces: 2 });
await fs.writeFile(path.join(root, 'composer.lock'), '{}\n');
const found = await findProjectRoot(nested);
await assertEqual(found, root, "composer.{json,lock} should be detected");
return { name: "composer", ok: true };
await assertEqual(found, root, 'composer.{json,lock} should be detected');
return { name: 'composer', ok: true };
}
async function testCargo() {
const root = await mkTmpDir("cargo");
const nested = path.join(root, "src");
const root = await mkTmpDir('cargo');
const nested = path.join(root, 'src');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "Cargo.toml"), "[package]\nname='tmp'\nversion='0.0.0'\n");
await fs.writeFile(path.join(root, 'Cargo.toml'), "[package]\nname='tmp'\nversion='0.0.0'\n");
const found = await findProjectRoot(nested);
await assertEqual(found, root, "Cargo.toml should be detected");
return { name: "cargo", ok: true };
await assertEqual(found, root, 'Cargo.toml should be detected');
return { name: 'cargo', ok: true };
}
async function testNixFlake() {
const root = await mkTmpDir("nix");
const nested = path.join(root, "work");
const root = await mkTmpDir('nix');
const nested = path.join(root, 'work');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "flake.nix"), "{ }\n");
await fs.writeFile(path.join(root, 'flake.nix'), '{ }\n');
const found = await findProjectRoot(nested);
await assertEqual(found, root, "flake.nix should be detected");
return { name: "nix-flake", ok: true };
await assertEqual(found, root, 'flake.nix should be detected');
return { name: 'nix-flake', ok: true };
}
async function testChangesetConfig() {
const root = await mkTmpDir("changeset");
const nested = path.join(root, "pkg");
const root = await mkTmpDir('changeset');
const nested = path.join(root, 'pkg');
await fs.ensureDir(nested);
await fs.ensureDir(path.join(root, ".changeset"));
await fs.writeJson(path.join(root, ".changeset", "config.json"), { $schema: "https://unpkg.com/@changesets/config@2.3.1/schema.json" }, { spaces: 2 });
await fs.ensureDir(path.join(root, '.changeset'));
await fs.writeJson(
path.join(root, '.changeset', 'config.json'),
{ $schema: 'https://unpkg.com/@changesets/config@2.3.1/schema.json' },
{ spaces: 2 },
);
const found = await findProjectRoot(nested);
await assertEqual(found, root, ".changeset/config.json should be detected");
return { name: "changesets", ok: true };
await assertEqual(found, root, '.changeset/config.json should be detected');
return { name: 'changesets', ok: true };
}
async function testEnvCustomMarker() {
const root = await mkTmpDir("env-marker");
const nested = path.join(root, "dir");
const root = await mkTmpDir('env-marker');
const nested = path.join(root, 'dir');
await fs.ensureDir(nested);
await fs.writeFile(path.join(root, "MY_ROOT"), "\n");
await fs.writeFile(path.join(root, 'MY_ROOT'), '\n');
const prev = process.env.PROJECT_ROOT_MARKERS;
process.env.PROJECT_ROOT_MARKERS = "MY_ROOT";
process.env.PROJECT_ROOT_MARKERS = 'MY_ROOT';
try {
const found = await findProjectRoot(nested);
await assertEqual(found, root, "custom env marker should be honored");
await assertEqual(found, root, 'custom env marker should be honored');
} finally {
if (prev === undefined) delete process.env.PROJECT_ROOT_MARKERS; else process.env.PROJECT_ROOT_MARKERS = prev;
if (prev === undefined) delete process.env.PROJECT_ROOT_MARKERS;
else process.env.PROJECT_ROOT_MARKERS = prev;
}
return { name: "env-custom-marker", ok: true };
return { name: 'env-custom-marker', ok: true };
}
async function testPackageLowPriorityVsLock() {
const root = await mkTmpDir("pkg-vs-lock");
const nested = path.join(root, "nested");
await fs.ensureDir(path.join(nested, "deep"));
await fs.writeJson(path.join(nested, "package.json"), { name: "nested" }, { spaces: 2 });
await fs.writeFile(path.join(root, "yarn.lock"), "\n");
const found = await findProjectRoot(path.join(nested, "deep"));
await assertEqual(found, root, "lockfile at root should outrank nested package.json");
return { name: "package-vs-lock-priority", ok: true };
const root = await mkTmpDir('pkg-vs-lock');
const nested = path.join(root, 'nested');
await fs.ensureDir(path.join(nested, 'deep'));
await fs.writeJson(path.join(nested, 'package.json'), { name: 'nested' }, { spaces: 2 });
await fs.writeFile(path.join(root, 'yarn.lock'), '\n');
const found = await findProjectRoot(path.join(nested, 'deep'));
await assertEqual(found, root, 'lockfile at root should outrank nested package.json');
return { name: 'package-vs-lock-priority', ok: true };
}
async function run() {
@@ -381,25 +389,25 @@ async function run() {
try {
const r = await t();
results.push({ ...r, ok: true });
console.log(`${r.name}${r.skipped ? " (skipped)" : ""}`);
} catch (err) {
console.error(`${t.name}:`, err && err.message ? err.message : err);
results.push({ name: t.name, ok: false, error: String(err) });
console.log(`${r.name}${r.skipped ? ' (skipped)' : ''}`);
} catch (error) {
console.error(`${t.name}:`, error && error.message ? error.message : error);
results.push({ name: t.name, ok: false, error: String(error) });
}
}
const failed = results.filter((r) => !r.ok);
console.log("\nSummary:");
console.log('\nSummary:');
for (const r of results) {
console.log(`- ${r.name}: ${r.ok ? "ok" : "FAIL"}${r.skipped ? " (skipped)" : ""}`);
console.log(`- ${r.name}: ${r.ok ? 'ok' : 'FAIL'}${r.skipped ? ' (skipped)' : ''}`);
}
if (failed.length) {
if (failed.length > 0) {
process.exitCode = 1;
}
}
run().catch((e) => {
console.error("Fatal error:", e);
run().catch((error) => {
console.error('Fatal error:', error);
process.exit(1);
});

View File

@@ -1,49 +1,44 @@
const fs = require("fs-extra");
const fs = require('fs-extra');
function escapeXml(str) {
if (typeof str !== "string") {
return String(str);
function escapeXml(string_) {
if (typeof string_ !== 'string') {
return String(string_);
}
return str
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/'/g, "&apos;");
return string_.replaceAll('&', '&amp;').replaceAll('<', '&lt;').replaceAll("'", '&apos;');
}
function indentFileContent(content) {
if (typeof content !== "string") {
if (typeof content !== 'string') {
return String(content);
}
return content.split("\n").map((line) => ` ${line}`);
return content.split('\n').map((line) => ` ${line}`);
}
function generateXMLOutput(aggregatedContent, outputPath) {
const { textFiles } = aggregatedContent;
const writeStream = fs.createWriteStream(outputPath, { encoding: "utf8" });
const writeStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
return new Promise((resolve, reject) => {
writeStream.on("error", reject);
writeStream.on("finish", resolve);
writeStream.on('error', reject);
writeStream.on('finish', resolve);
writeStream.write('<?xml version="1.0" encoding="UTF-8"?>\n');
writeStream.write("<files>\n");
writeStream.write('<files>\n');
// Sort files by path for deterministic order
const filesSorted = [...textFiles].sort((a, b) =>
a.path.localeCompare(b.path)
);
const filesSorted = [...textFiles].sort((a, b) => a.path.localeCompare(b.path));
let index = 0;
const writeNext = () => {
if (index >= filesSorted.length) {
writeStream.write("</files>\n");
writeStream.write('</files>\n');
writeStream.end();
return;
}
const file = filesSorted[index++];
const p = escapeXml(file.path);
const content = typeof file.content === "string" ? file.content : "";
const content = typeof file.content === 'string' ? file.content : '';
if (content.length === 0) {
writeStream.write(`\t<file path='${p}'/>\n`);
@@ -51,27 +46,34 @@ function generateXMLOutput(aggregatedContent, outputPath) {
return;
}
const needsCdata = content.includes("<") || content.includes("&") ||
content.includes("]]>");
const needsCdata = content.includes('<') || content.includes('&') || content.includes(']]>');
if (needsCdata) {
// Open tag and CDATA on their own line with tab indent; content lines indented with two tabs
writeStream.write(`\t<file path='${p}'><![CDATA[\n`);
// Safely split any occurrences of "]]>" inside content, trim trailing newlines, indent each line with two tabs
const safe = content.replace(/]]>/g, "]]]]><![CDATA[>");
const trimmed = safe.replace(/[\r\n]+$/, "");
const indented = trimmed.length > 0
? trimmed.split("\n").map((line) => `\t\t${line}`).join("\n")
: "";
const safe = content.replaceAll(']]>', ']]]]><![CDATA[>');
const trimmed = safe.replace(/[\r\n]+$/, '');
const indented =
trimmed.length > 0
? trimmed
.split('\n')
.map((line) => `\t\t${line}`)
.join('\n')
: '';
writeStream.write(indented);
// Close CDATA and attach closing tag directly after the last content line
writeStream.write("]]></file>\n");
writeStream.write(']]></file>\n');
} else {
// Write opening tag then newline; indent content with two tabs; attach closing tag directly after last content char
writeStream.write(`\t<file path='${p}'>\n`);
const trimmed = content.replace(/[\r\n]+$/, "");
const indented = trimmed.length > 0
? trimmed.split("\n").map((line) => `\t\t${line}`).join("\n")
: "";
const trimmed = content.replace(/[\r\n]+$/, '');
const indented =
trimmed.length > 0
? trimmed
.split('\n')
.map((line) => `\t\t${line}`)
.join('\n')
: '';
writeStream.write(indented);
writeStream.write(`</file>\n`);
}