diff --git a/.gitignore b/.gitignore index 1407a3f5..972b3b7c 100644 --- a/.gitignore +++ b/.gitignore @@ -44,4 +44,4 @@ CLAUDE.md test-project-install/* sample-project/* flattened-codebase.xml - +*.stats.md diff --git a/tools/flattener/main.js b/tools/flattener/main.js index 5076c552..abed992c 100644 --- a/tools/flattener/main.js +++ b/tools/flattener/main.js @@ -127,19 +127,11 @@ program path.join(inputDir, "flattened-codebase.xml"), ); } - } else { - console.error( - "Could not auto-detect a project root and no arguments were provided. Please specify -i/--input and -o/--output.", - ); - process.exit(1); } // Ensure output directory exists await fs.ensureDir(path.dirname(outputPath)); - console.log(`Flattening codebase from: ${inputDir}`); - console.log(`Output file: ${outputPath}`); - try { // Verify input directory exists if (!await fs.pathExists(inputDir)) { @@ -159,7 +151,6 @@ program ); // Process files with progress tracking - console.log("Reading file contents"); const processingSpinner = ora("๐Ÿ“„ Processing files...").start(); const aggregatedContent = await aggregateFileContents( filteredFiles, @@ -172,10 +163,6 @@ program if (aggregatedContent.errors.length > 0) { console.log(`Errors: ${aggregatedContent.errors.length}`); } - console.log(`Text files: ${aggregatedContent.textFiles.length}`); - if (aggregatedContent.binaryFiles.length > 0) { - console.log(`Binary files: ${aggregatedContent.binaryFiles.length}`); - } // Generate XML output using streaming const xmlSpinner = ora("๐Ÿ”ง Generating XML output...").start(); @@ -184,7 +171,11 @@ program // Calculate and display statistics const outputStats = await fs.stat(outputPath); - const stats = calculateStatistics(aggregatedContent, outputStats.size); + const stats = await calculateStatistics( + aggregatedContent, + outputStats.size, + inputDir, + ); // Display completion summary console.log("\n๐Ÿ“Š Completion Summary:"); @@ -201,8 +192,476 @@ program ); console.log(`๐Ÿ”ข Estimated tokens: ${stats.estimatedTokens}`); console.log( - `๐Ÿ“Š File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors`, + `๐Ÿ“Š File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors\n`, ); + + // Ask user if they want detailed stats + markdown report + const generateDetailed = await promptYesNo( + "Generate detailed stats (console + markdown) now?", + true, + ); + + if (generateDetailed) { + // Additional detailed stats + console.log("\n๐Ÿ“ˆ Size Percentiles:"); + console.log( + ` Avg: ${ + Math.round(stats.avgFileSize).toLocaleString() + } B, Median: ${ + Math.round(stats.medianFileSize).toLocaleString() + } B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`, + ); + + if (Array.isArray(stats.histogram) && stats.histogram.length) { + console.log("\n๐Ÿงฎ Size Histogram:"); + for (const b of stats.histogram.slice(0, 2)) { + console.log( + ` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`, + ); + } + if (stats.histogram.length > 2) { + console.log(` โ€ฆ and ${stats.histogram.length - 2} more buckets`); + } + } + + if (Array.isArray(stats.byExtension) && stats.byExtension.length) { + const topExt = stats.byExtension.slice(0, 2); + console.log("\n๐Ÿ“ฆ Top Extensions:"); + for (const e of topExt) { + const pct = stats.totalBytes + ? ((e.bytes / stats.totalBytes) * 100) + : 0; + console.log( + ` ${e.ext}: ${e.count} files, ${e.bytes.toLocaleString()} bytes (${ + pct.toFixed(2) + }%)`, + ); + } + if (stats.byExtension.length > 2) { + console.log( + ` โ€ฆ and ${stats.byExtension.length - 2} more extensions`, + ); + } + } + + if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) { + const topDir = stats.byDirectory.slice(0, 2); + console.log("\n๐Ÿ“‚ Top Directories:"); + for (const d of topDir) { + const pct = stats.totalBytes + ? ((d.bytes / stats.totalBytes) * 100) + : 0; + console.log( + ` ${d.dir}: ${d.count} files, ${d.bytes.toLocaleString()} bytes (${ + pct.toFixed(2) + }%)`, + ); + } + if (stats.byDirectory.length > 2) { + console.log( + ` โ€ฆ and ${stats.byDirectory.length - 2} more directories`, + ); + } + } + + if ( + Array.isArray(stats.depthDistribution) && + stats.depthDistribution.length + ) { + console.log("\n๐ŸŒณ Depth Distribution:"); + const dd = stats.depthDistribution.slice(0, 2); + let line = " " + dd.map((d) => `${d.depth}:${d.count}`).join(" "); + if (stats.depthDistribution.length > 2) { + line += ` โ€ฆ +${stats.depthDistribution.length - 2} more`; + } + console.log(line); + } + + if (Array.isArray(stats.longestPaths) && stats.longestPaths.length) { + console.log("\n๐Ÿงต Longest Paths:"); + for (const p of stats.longestPaths.slice(0, 2)) { + console.log( + ` ${p.path} (${p.length} chars, ${p.size.toLocaleString()} bytes)`, + ); + } + if (stats.longestPaths.length > 2) { + console.log(` โ€ฆ and ${stats.longestPaths.length - 2} more paths`); + } + } + + if (stats.temporal) { + console.log("\nโฑ๏ธ Temporal:"); + if (stats.temporal.oldest) { + console.log( + ` Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`, + ); + } + if (stats.temporal.newest) { + console.log( + ` Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`, + ); + } + if (Array.isArray(stats.temporal.ageBuckets)) { + console.log(" Age buckets:"); + for (const b of stats.temporal.ageBuckets.slice(0, 2)) { + console.log( + ` ${b.label}: ${b.count} files, ${b.bytes.toLocaleString()} bytes`, + ); + } + if (stats.temporal.ageBuckets.length > 2) { + console.log( + ` โ€ฆ and ${ + stats.temporal.ageBuckets.length - 2 + } more buckets`, + ); + } + } + } + + if (stats.quality) { + console.log("\nโœ… Quality Signals:"); + console.log(` Zero-byte files: ${stats.quality.zeroByteFiles}`); + console.log(` Empty text files: ${stats.quality.emptyTextFiles}`); + console.log(` Hidden files: ${stats.quality.hiddenFiles}`); + console.log(` Symlinks: ${stats.quality.symlinks}`); + console.log( + ` Large files (>= ${ + (stats.quality.largeThreshold / (1024 * 1024)).toFixed(0) + } MB): ${stats.quality.largeFilesCount}`, + ); + console.log( + ` Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`, + ); + } + + if ( + Array.isArray(stats.duplicateCandidates) && + stats.duplicateCandidates.length + ) { + console.log("\n๐Ÿงฌ Duplicate Candidates:"); + for (const d of stats.duplicateCandidates.slice(0, 2)) { + console.log( + ` ${d.reason}: ${d.count} files @ ${d.size.toLocaleString()} bytes`, + ); + } + if (stats.duplicateCandidates.length > 2) { + console.log( + ` โ€ฆ and ${stats.duplicateCandidates.length - 2} more groups`, + ); + } + } + + if (typeof stats.compressibilityRatio === "number") { + console.log( + `\n๐Ÿ—œ๏ธ Compressibility ratio (sampled): ${ + (stats.compressibilityRatio * 100).toFixed(2) + }%`, + ); + } + + if (stats.git && stats.git.isRepo) { + console.log("\n๐Ÿ”ง Git:"); + console.log( + ` Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`, + ); + console.log( + ` Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`, + ); + if ( + Array.isArray(stats.git.lfsCandidates) && + stats.git.lfsCandidates.length + ) { + console.log(" LFS candidates (top 2):"); + for (const f of stats.git.lfsCandidates.slice(0, 2)) { + console.log(` ${f.path} (${f.size.toLocaleString()} bytes)`); + } + if (stats.git.lfsCandidates.length > 2) { + console.log( + ` โ€ฆ and ${stats.git.lfsCandidates.length - 2} more`, + ); + } + } + } + + if (Array.isArray(stats.largestFiles) && stats.largestFiles.length) { + console.log("\n๐Ÿ“š Largest Files (top 2):"); + for (const f of stats.largestFiles.slice(0, 2)) { + // Show LOC for text files when available; omit ext and mtime + let locStr = ""; + if (!f.isBinary && Array.isArray(aggregatedContent?.textFiles)) { + const tf = aggregatedContent.textFiles.find((t) => + t.path === f.path + ); + if (tf && typeof tf.lines === "number") { + locStr = `, LOC: ${tf.lines.toLocaleString()}`; + } + } + console.log( + ` ${f.path} โ€“ ${f.sizeFormatted} (${ + f.percentOfTotal.toFixed(2) + }%)${locStr}`, + ); + } + if (stats.largestFiles.length > 2) { + console.log(` โ€ฆ and ${stats.largestFiles.length - 2} more files`); + } + } + + // Write a comprehensive markdown report next to the XML + { + const mdPath = outputPath.endsWith(".xml") + ? outputPath.replace(/\.xml$/i, ".stats.md") + : outputPath + ".stats.md"; + try { + const pct = (num, den) => (den ? ((num / den) * 100) : 0); + const md = []; + md.push(`# ๐Ÿงพ Flatten Stats for ${path.basename(outputPath)}`); + md.push(""); + md.push("## ๐Ÿ“Š Summary"); + md.push(`- Total source size: ${stats.totalSize}`); + md.push(`- Generated XML size: ${stats.xmlSize}`); + md.push( + `- Total lines of code: ${stats.totalLines.toLocaleString()}`, + ); + md.push(`- Estimated tokens: ${stats.estimatedTokens}`); + md.push( + `- File breakdown: ${stats.textFiles} text, ${stats.binaryFiles} binary, ${stats.errorFiles} errors`, + ); + md.push(""); + + // Percentiles + md.push("## ๐Ÿ“ˆ Size Percentiles"); + md.push( + `Avg: ${ + Math.round(stats.avgFileSize).toLocaleString() + } B, Median: ${ + Math.round(stats.medianFileSize).toLocaleString() + } B, p90: ${stats.p90.toLocaleString()} B, p95: ${stats.p95.toLocaleString()} B, p99: ${stats.p99.toLocaleString()} B`, + ); + md.push(""); + + // Histogram + if (Array.isArray(stats.histogram) && stats.histogram.length) { + md.push("## ๐Ÿงฎ Size Histogram"); + md.push("| Bucket | Files | Bytes |"); + md.push("| --- | ---: | ---: |"); + for (const b of stats.histogram) { + md.push( + `| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`, + ); + } + md.push(""); + } + + // Top Extensions + if (Array.isArray(stats.byExtension) && stats.byExtension.length) { + md.push("## ๐Ÿ“ฆ Top Extensions by Bytes (Top 20)"); + md.push("| Ext | Files | Bytes | % of total |"); + md.push("| --- | ---: | ---: | ---: |"); + for (const e of stats.byExtension.slice(0, 20)) { + const p = pct(e.bytes, stats.totalBytes); + md.push( + `| ${e.ext} | ${e.count} | ${e.bytes.toLocaleString()} | ${ + p.toFixed(2) + }% |`, + ); + } + md.push(""); + } + + // Top Directories + if (Array.isArray(stats.byDirectory) && stats.byDirectory.length) { + md.push("## ๐Ÿ“‚ Top Directories by Bytes (Top 20)"); + md.push("| Directory | Files | Bytes | % of total |"); + md.push("| --- | ---: | ---: | ---: |"); + for (const d of stats.byDirectory.slice(0, 20)) { + const p = pct(d.bytes, stats.totalBytes); + md.push( + `| ${d.dir} | ${d.count} | ${d.bytes.toLocaleString()} | ${ + p.toFixed(2) + }% |`, + ); + } + md.push(""); + } + + // Depth distribution + if ( + Array.isArray(stats.depthDistribution) && + stats.depthDistribution.length + ) { + md.push("## ๐ŸŒณ Depth Distribution"); + md.push("| Depth | Count |"); + md.push("| ---: | ---: |"); + for (const d of stats.depthDistribution) { + md.push(`| ${d.depth} | ${d.count} |`); + } + md.push(""); + } + + // Longest paths + if ( + Array.isArray(stats.longestPaths) && stats.longestPaths.length + ) { + md.push("## ๐Ÿงต Longest Paths (Top 25)"); + md.push("| Path | Length | Bytes |"); + md.push("| --- | ---: | ---: |"); + for (const pth of stats.longestPaths) { + md.push( + `| ${pth.path} | ${pth.length} | ${pth.size.toLocaleString()} |`, + ); + } + md.push(""); + } + + // Temporal + if (stats.temporal) { + md.push("## โฑ๏ธ Temporal"); + if (stats.temporal.oldest) { + md.push( + `- Oldest: ${stats.temporal.oldest.path} (${stats.temporal.oldest.mtime})`, + ); + } + if (stats.temporal.newest) { + md.push( + `- Newest: ${stats.temporal.newest.path} (${stats.temporal.newest.mtime})`, + ); + } + if (Array.isArray(stats.temporal.ageBuckets)) { + md.push(""); + md.push("| Age | Files | Bytes |"); + md.push("| --- | ---: | ---: |"); + for (const b of stats.temporal.ageBuckets) { + md.push( + `| ${b.label} | ${b.count} | ${b.bytes.toLocaleString()} |`, + ); + } + } + md.push(""); + } + + // Quality signals + if (stats.quality) { + md.push("## โœ… Quality Signals"); + md.push(`- Zero-byte files: ${stats.quality.zeroByteFiles}`); + md.push(`- Empty text files: ${stats.quality.emptyTextFiles}`); + md.push(`- Hidden files: ${stats.quality.hiddenFiles}`); + md.push(`- Symlinks: ${stats.quality.symlinks}`); + md.push( + `- Large files (>= ${ + (stats.quality.largeThreshold / (1024 * 1024)).toFixed(0) + } MB): ${stats.quality.largeFilesCount}`, + ); + md.push( + `- Suspiciously large files (>= 100 MB): ${stats.quality.suspiciousLargeFilesCount}`, + ); + md.push(""); + } + + // Duplicates + if ( + Array.isArray(stats.duplicateCandidates) && + stats.duplicateCandidates.length + ) { + md.push("## ๐Ÿงฌ Duplicate Candidates"); + md.push("| Reason | Files | Size (bytes) |"); + md.push("| --- | ---: | ---: |"); + for (const d of stats.duplicateCandidates) { + md.push( + `| ${d.reason} | ${d.count} | ${d.size.toLocaleString()} |`, + ); + } + md.push(""); + // Detailed listing of duplicate file names and locations + md.push("### ๐Ÿงฌ Duplicate Groups Details"); + let dupIndex = 1; + for (const d of stats.duplicateCandidates) { + md.push( + `#### Group ${dupIndex}: ${d.count} files @ ${d.size.toLocaleString()} bytes (${d.reason})`, + ); + if (Array.isArray(d.files) && d.files.length) { + for (const fp of d.files) { + md.push(`- ${fp}`); + } + } else { + md.push("- (file list unavailable)"); + } + md.push(""); + dupIndex++; + } + md.push(""); + } + + // Compressibility + if (typeof stats.compressibilityRatio === "number") { + md.push("## ๐Ÿ—œ๏ธ Compressibility"); + md.push( + `Sampled compressibility ratio: ${ + (stats.compressibilityRatio * 100).toFixed(2) + }%`, + ); + md.push(""); + } + + // Git + if (stats.git && stats.git.isRepo) { + md.push("## ๐Ÿ”ง Git"); + md.push( + `- Tracked: ${stats.git.trackedCount} files, ${stats.git.trackedBytes.toLocaleString()} bytes`, + ); + md.push( + `- Untracked: ${stats.git.untrackedCount} files, ${stats.git.untrackedBytes.toLocaleString()} bytes`, + ); + if ( + Array.isArray(stats.git.lfsCandidates) && + stats.git.lfsCandidates.length + ) { + md.push(""); + md.push("### ๐Ÿ“ฆ LFS Candidates (Top 20)"); + md.push("| Path | Bytes |"); + md.push("| --- | ---: |"); + for (const f of stats.git.lfsCandidates.slice(0, 20)) { + md.push(`| ${f.path} | ${f.size.toLocaleString()} |`); + } + } + md.push(""); + } + + // Largest Files + if ( + Array.isArray(stats.largestFiles) && stats.largestFiles.length + ) { + md.push("## ๐Ÿ“š Largest Files (Top 50)"); + md.push("| Path | Size | % of total | LOC |"); + md.push("| --- | ---: | ---: | ---: |"); + for (const f of stats.largestFiles) { + let loc = ""; + if ( + !f.isBinary && Array.isArray(aggregatedContent?.textFiles) + ) { + const tf = aggregatedContent.textFiles.find((t) => + t.path === f.path + ); + if (tf && typeof tf.lines === "number") { + loc = tf.lines.toLocaleString(); + } + } + md.push( + `| ${f.path} | ${f.sizeFormatted} | ${ + f.percentOfTotal.toFixed(2) + }% | ${loc} |`, + ); + } + md.push(""); + } + + await fs.writeFile(mdPath, md.join("\n")); + console.log(`\n๐Ÿงพ Detailed stats report written to: ${mdPath}`); + } catch (e) { + console.warn(`โš ๏ธ Failed to write stats markdown: ${e.message}`); + } + } + } } catch (error) { console.error("โŒ Critical error:", error.message); console.error("An unexpected error occurred."); diff --git a/tools/flattener/projectRoot.js b/tools/flattener/projectRoot.js index bba2c368..27f3a1eb 100644 --- a/tools/flattener/projectRoot.js +++ b/tools/flattener/projectRoot.js @@ -1,45 +1,204 @@ const fs = require("fs-extra"); const path = require("node:path"); +// Deno/Node compatibility: explicitly import process +const process = require("node:process"); +const { execFile } = require("node:child_process"); +const { promisify } = require("node:util"); +const execFileAsync = promisify(execFile); + +// Simple memoization across calls (keyed by realpath of startDir) +const _cache = new Map(); + +async function _tryRun(cmd, args, cwd, timeoutMs = 500) { + try { + const { stdout } = await execFileAsync(cmd, args, { + cwd, + timeout: timeoutMs, + windowsHide: true, + maxBuffer: 1024 * 1024, + }); + const out = String(stdout || "").trim(); + return out || null; + } catch { + return null; + } +} + +async function _detectVcsTopLevel(startDir) { + // Run common VCS root queries in parallel; ignore failures + const gitP = _tryRun("git", ["rev-parse", "--show-toplevel"], startDir); + const hgP = _tryRun("hg", ["root"], startDir); + const svnP = (async () => { + const show = await _tryRun("svn", ["info", "--show-item", "wc-root"], startDir); + if (show) return show; + const info = await _tryRun("svn", ["info"], startDir); + if (info) { + const line = info.split(/\r?\n/).find((l) => l.toLowerCase().startsWith("working copy root path:")); + if (line) return line.split(":").slice(1).join(":").trim(); + } + return null; + })(); + const [git, hg, svn] = await Promise.all([gitP, hgP, svnP]); + return git || hg || svn || null; +} + /** - * Attempt to find the project root by walking up from startDir - * Looks for common project markers like .git, package.json, pyproject.toml, etc. + * Attempt to find the project root by walking up from startDir. + * Uses a robust, prioritized set of ecosystem markers (VCS > workspaces/monorepo > lock/build > language config). + * Also recognizes package.json with "workspaces" as a workspace root. + * You can augment markers via env PROJECT_ROOT_MARKERS as a comma-separated list of file/dir names. * @param {string} startDir * @returns {Promise} project root directory or null if not found */ async function findProjectRoot(startDir) { try { + // Resolve symlinks for robustness (e.g., when invoked from a symlinked path) let dir = path.resolve(startDir); - const root = path.parse(dir).root; - const markers = [ - ".git", - "package.json", - "pnpm-workspace.yaml", - "yarn.lock", - "pnpm-lock.yaml", - "pyproject.toml", - "requirements.txt", - "go.mod", - "Cargo.toml", - "composer.json", - ".hg", - ".svn", - ]; + try { + dir = await fs.realpath(dir); + } catch { + // ignore if realpath fails; continue with resolved path + } + const startKey = dir; // preserve starting point for caching + if (_cache.has(startKey)) return _cache.get(startKey); + const fsRoot = path.parse(dir).root; + + // Helper to safely check for existence + const exists = (p) => fs.pathExists(p); + + // Build checks: an array of { makePath: (dir) => string, weight } + const checks = []; + + const add = (rel, weight) => { + const makePath = (d) => Array.isArray(rel) ? path.join(d, ...rel) : path.join(d, rel); + checks.push({ makePath, weight }); + }; + + // Highest priority: explicit sentinel markers + add(".project-root", 110); + add(".workspace-root", 110); + add(".repo-root", 110); + + // Highest priority: VCS roots + add(".git", 100); + add(".hg", 95); + add(".svn", 95); + + // Monorepo/workspace indicators + add("pnpm-workspace.yaml", 90); + add("lerna.json", 90); + add("turbo.json", 90); + add("nx.json", 90); + add("rush.json", 90); + add("go.work", 90); + add("WORKSPACE", 90); + add("WORKSPACE.bazel", 90); + add("MODULE.bazel", 90); + add("pants.toml", 90); + + // Lockfiles and package-manager/top-level locks + add("yarn.lock", 85); + add("pnpm-lock.yaml", 85); + add("package-lock.json", 85); + add("bun.lockb", 85); + add("Cargo.lock", 85); + add("composer.lock", 85); + add("poetry.lock", 85); + add("Pipfile.lock", 85); + add("Gemfile.lock", 85); + + // Build-system root indicators + add("settings.gradle", 80); + add("settings.gradle.kts", 80); + add("gradlew", 80); + add("pom.xml", 80); + add("build.sbt", 80); + add(["project", "build.properties"], 80); + + // Language/project config markers + add("deno.json", 75); + add("deno.jsonc", 75); + add("pyproject.toml", 75); + add("Pipfile", 75); + add("requirements.txt", 75); + add("go.mod", 75); + add("Cargo.toml", 75); + add("composer.json", 75); + add("mix.exs", 75); + add("Gemfile", 75); + add("CMakeLists.txt", 75); + add("stack.yaml", 75); + add("cabal.project", 75); + add("rebar.config", 75); + add("pubspec.yaml", 75); + add("flake.nix", 75); + add("shell.nix", 75); + add("default.nix", 75); + add(".tool-versions", 75); + add("package.json", 74); // generic Node project (lower than lockfiles/workspaces) + + // Changesets + add([".changeset", "config.json"], 70); + add(".changeset", 70); + + // Custom markers via env (comma-separated names) + if (process.env.PROJECT_ROOT_MARKERS) { + for (const name of process.env.PROJECT_ROOT_MARKERS.split(",").map((s) => s.trim()).filter(Boolean)) { + add(name, 72); + } + } + + /** Check for package.json with "workspaces" */ + const hasWorkspacePackageJson = async (d) => { + const pkgPath = path.join(d, "package.json"); + if (!(await exists(pkgPath))) return false; + try { + const raw = await fs.readFile(pkgPath, "utf8"); + const pkg = JSON.parse(raw); + return Boolean(pkg && pkg.workspaces); + } catch { + return false; + } + }; + + let best = null; // { dir, weight } + + // Try to detect VCS toplevel once up-front; treat as authoritative slightly above .git marker + const vcsTop = await _detectVcsTopLevel(dir); + if (vcsTop) { + best = { dir: vcsTop, weight: 101 }; + } while (true) { - const exists = await Promise.all( - markers.map((m) => fs.pathExists(path.join(dir, m))), - ); - if (exists.some(Boolean)) { - return dir; + // Special check: package.json with "workspaces" + if (await hasWorkspacePackageJson(dir)) { + if (!best || 90 >= best.weight) best = { dir, weight: 90 }; } - if (dir === root) break; + + // Evaluate all other checks in parallel + const results = await Promise.all( + checks.map(async (c) => ({ c, ok: await exists(c.makePath(dir)) })), + ); + + for (const { c, ok } of results) { + if (!ok) continue; + if (!best || c.weight >= best.weight) { + best = { dir, weight: c.weight }; + } + } + + if (dir === fsRoot) break; dir = path.dirname(dir); } - return null; + + const out = best ? best.dir : null; + _cache.set(startKey, out); + return out; } catch { return null; } } module.exports = { findProjectRoot }; + diff --git a/tools/flattener/stats.helpers.js b/tools/flattener/stats.helpers.js new file mode 100644 index 00000000..bab08526 --- /dev/null +++ b/tools/flattener/stats.helpers.js @@ -0,0 +1,331 @@ +"use strict"; + +const fs = require("node:fs/promises"); +const path = require("node:path"); +const zlib = require("node:zlib"); +const { Buffer } = require("node:buffer"); +const crypto = require("node:crypto"); +const cp = require("node:child_process"); + +const KB = 1024; +const MB = 1024 * KB; + +const formatSize = (bytes) => { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; + return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`; +}; + +const percentile = (sorted, p) => { + if (sorted.length === 0) return 0; + const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil((p / 100) * sorted.length) - 1)); + return sorted[idx]; +}; + +async function processWithLimit(items, fn, concurrency = 64) { + for (let i = 0; i < items.length; i += concurrency) { + await Promise.all(items.slice(i, i + concurrency).map(fn)); + } +} + +async function enrichAllFiles(textFiles, binaryFiles) { + /** @type {Array<{ path: string; absolutePath: string; size: number; lines?: number; isBinary: boolean; ext: string; dir: string; depth: number; hidden: boolean; mtimeMs: number; isSymlink: boolean; }>} */ + const allFiles = []; + + async function enrich(file, isBinary) { + const ext = (path.extname(file.path) || "").toLowerCase(); + const dir = path.dirname(file.path) || "."; + const depth = file.path.split(path.sep).filter(Boolean).length; + const hidden = file.path.split(path.sep).some((seg) => seg.startsWith(".")); + let mtimeMs = 0; + let isSymlink = false; + try { + const lst = await fs.lstat(file.absolutePath); + mtimeMs = lst.mtimeMs; + isSymlink = lst.isSymbolicLink(); + } catch (_) { /* ignore lstat errors during enrichment */ } + allFiles.push({ + path: file.path, + absolutePath: file.absolutePath, + size: file.size || 0, + lines: file.lines, + isBinary, + ext, + dir, + depth, + hidden, + mtimeMs, + isSymlink, + }); + } + + await processWithLimit(textFiles, (f) => enrich(f, false)); + await processWithLimit(binaryFiles, (f) => enrich(f, true)); + return allFiles; +} + +function buildHistogram(allFiles) { + const buckets = [ + [1 * KB, "0โ€“1KB"], + [10 * KB, "1โ€“10KB"], + [100 * KB, "10โ€“100KB"], + [1 * MB, "100KBโ€“1MB"], + [10 * MB, "1โ€“10MB"], + [100 * MB, "10โ€“100MB"], + [Infinity, ">=100MB"], + ]; + const histogram = buckets.map(([_, label]) => ({ label, count: 0, bytes: 0 })); + for (const f of allFiles) { + for (let i = 0; i < buckets.length; i++) { + if (f.size < buckets[i][0]) { + histogram[i].count++; + histogram[i].bytes += f.size; + break; + } + } + } + return histogram; +} + +function aggregateByExtension(allFiles) { + const byExtension = new Map(); + for (const f of allFiles) { + const key = f.ext || ""; + const v = byExtension.get(key) || { ext: key, count: 0, bytes: 0 }; + v.count++; + v.bytes += f.size; + byExtension.set(key, v); + } + return Array.from(byExtension.values()).sort((a, b) => b.bytes - a.bytes); +} + +function aggregateByDirectory(allFiles) { + const byDirectory = new Map(); + function addDirBytes(dir, bytes) { + const v = byDirectory.get(dir) || { dir, count: 0, bytes: 0 }; + v.count++; + v.bytes += bytes; + byDirectory.set(dir, v); + } + for (const f of allFiles) { + const parts = f.dir === "." ? [] : f.dir.split(path.sep); + let acc = ""; + for (let i = 0; i < parts.length; i++) { + acc = i === 0 ? parts[0] : acc + path.sep + parts[i]; + addDirBytes(acc, f.size); + } + if (parts.length === 0) addDirBytes(".", f.size); + } + return Array.from(byDirectory.values()).sort((a, b) => b.bytes - a.bytes); +} + +function computeDepthAndLongest(allFiles) { + const depthDistribution = new Map(); + for (const f of allFiles) { + depthDistribution.set(f.depth, (depthDistribution.get(f.depth) || 0) + 1); + } + const longestPaths = [...allFiles] + .sort((a, b) => b.path.length - a.path.length) + .slice(0, 25) + .map((f) => ({ path: f.path, length: f.path.length, size: f.size })); + const depthDist = Array.from(depthDistribution.entries()) + .sort((a, b) => a[0] - b[0]) + .map(([depth, count]) => ({ depth, count })); + return { depthDist, longestPaths }; +} + +function computeTemporal(allFiles, nowMs) { + let oldest = null, newest = null; + const ageBuckets = [ + { label: "> 1 year", minDays: 365, maxDays: Infinity, count: 0, bytes: 0 }, + { label: "6โ€“12 months", minDays: 180, maxDays: 365, count: 0, bytes: 0 }, + { label: "1โ€“6 months", minDays: 30, maxDays: 180, count: 0, bytes: 0 }, + { label: "7โ€“30 days", minDays: 7, maxDays: 30, count: 0, bytes: 0 }, + { label: "1โ€“7 days", minDays: 1, maxDays: 7, count: 0, bytes: 0 }, + { label: "< 1 day", minDays: 0, maxDays: 1, count: 0, bytes: 0 }, + ]; + for (const f of allFiles) { + const ageDays = Math.max(0, (nowMs - (f.mtimeMs || nowMs)) / (24 * 60 * 60 * 1000)); + for (const b of ageBuckets) { + if (ageDays >= b.minDays && ageDays < b.maxDays) { + b.count++; + b.bytes += f.size; + break; + } + } + if (!oldest || f.mtimeMs < oldest.mtimeMs) oldest = f; + if (!newest || f.mtimeMs > newest.mtimeMs) newest = f; + } + return { + oldest: oldest ? { path: oldest.path, mtime: oldest.mtimeMs ? new Date(oldest.mtimeMs).toISOString() : null } : null, + newest: newest ? { path: newest.path, mtime: newest.mtimeMs ? new Date(newest.mtimeMs).toISOString() : null } : null, + ageBuckets, + }; +} + +function computeQuality(allFiles, textFiles) { + const zeroByteFiles = allFiles.filter((f) => f.size === 0).length; + const emptyTextFiles = textFiles.filter((f) => (f.size || 0) === 0 || (f.lines || 0) === 0).length; + const hiddenFiles = allFiles.filter((f) => f.hidden).length; + const symlinks = allFiles.filter((f) => f.isSymlink).length; + const largeThreshold = 50 * MB; + const suspiciousThreshold = 100 * MB; + const largeFilesCount = allFiles.filter((f) => f.size >= largeThreshold).length; + const suspiciousLargeFilesCount = allFiles.filter((f) => f.size >= suspiciousThreshold).length; + return { + zeroByteFiles, + emptyTextFiles, + hiddenFiles, + symlinks, + largeFilesCount, + suspiciousLargeFilesCount, + largeThreshold, + }; +} + +function computeDuplicates(allFiles, textFiles) { + const duplicatesBySize = new Map(); + for (const f of allFiles) { + const key = String(f.size); + const arr = duplicatesBySize.get(key) || []; + arr.push(f); + duplicatesBySize.set(key, arr); + } + const duplicateCandidates = []; + for (const [sizeKey, arr] of duplicatesBySize.entries()) { + if (arr.length < 2) continue; + const textGroup = arr.filter((f) => !f.isBinary); + const otherGroup = arr.filter((f) => f.isBinary); + const contentHashGroups = new Map(); + for (const tf of textGroup) { + try { + const src = textFiles.find((x) => x.absolutePath === tf.absolutePath); + const content = src ? src.content : ""; + const h = crypto.createHash("sha1").update(content).digest("hex"); + const g = contentHashGroups.get(h) || []; + g.push(tf); + contentHashGroups.set(h, g); + } catch (_) { /* ignore hashing errors for duplicate detection */ } + } + for (const [_h, g] of contentHashGroups.entries()) { + if (g.length > 1) duplicateCandidates.push({ reason: "same-size+text-hash", size: Number(sizeKey), count: g.length, files: g.map((f) => f.path) }); + } + if (otherGroup.length > 1) { + duplicateCandidates.push({ reason: "same-size", size: Number(sizeKey), count: otherGroup.length, files: otherGroup.map((f) => f.path) }); + } + } + return duplicateCandidates; +} + +function estimateCompressibility(textFiles) { + let compSampleBytes = 0; + let compCompressedBytes = 0; + for (const tf of textFiles) { + try { + const sampleLen = Math.min(256 * 1024, tf.size || 0); + if (sampleLen <= 0) continue; + const sample = tf.content.slice(0, sampleLen); + const gz = zlib.gzipSync(Buffer.from(sample, "utf8")); + compSampleBytes += sampleLen; + compCompressedBytes += gz.length; + } catch (_) { /* ignore compression errors during sampling */ } + } + return compSampleBytes > 0 ? compCompressedBytes / compSampleBytes : null; +} + +function computeGitInfo(allFiles, rootDir, largeThreshold) { + const info = { + isRepo: false, + trackedCount: 0, + trackedBytes: 0, + untrackedCount: 0, + untrackedBytes: 0, + lfsCandidates: [], + }; + try { + if (!rootDir) return info; + const top = cp.execFileSync("git", ["rev-parse", "--show-toplevel"], { cwd: rootDir, stdio: ["ignore", "pipe", "ignore"] }).toString().trim(); + if (!top) return info; + info.isRepo = true; + const out = cp.execFileSync("git", ["ls-files", "-z"], { cwd: rootDir, stdio: ["ignore", "pipe", "ignore"] }); + const tracked = new Set(out.toString().split("\0").filter(Boolean)); + let trackedBytes = 0, trackedCount = 0, untrackedBytes = 0, untrackedCount = 0; + const lfsCandidates = []; + for (const f of allFiles) { + const isTracked = tracked.has(f.path); + if (isTracked) { + trackedCount++; trackedBytes += f.size; + if (f.size >= largeThreshold) lfsCandidates.push({ path: f.path, size: f.size }); + } else { + untrackedCount++; untrackedBytes += f.size; + } + } + info.trackedCount = trackedCount; + info.trackedBytes = trackedBytes; + info.untrackedCount = untrackedCount; + info.untrackedBytes = untrackedBytes; + info.lfsCandidates = lfsCandidates.sort((a, b) => b.size - a.size).slice(0, 50); + } catch (_) { /* git not available or not a repo, ignore */ } + return info; +} + +function computeLargestFiles(allFiles, totalBytes) { + const toPct = (num, den) => (den === 0 ? 0 : (num / den) * 100); + return [...allFiles] + .sort((a, b) => b.size - a.size) + .slice(0, 50) + .map((f) => ({ + path: f.path, + size: f.size, + sizeFormatted: formatSize(f.size), + percentOfTotal: toPct(f.size, totalBytes), + ext: f.ext || "", + isBinary: f.isBinary, + mtime: f.mtimeMs ? new Date(f.mtimeMs).toISOString() : null, + })); +} + +function mdTable(rows, headers) { + const header = `| ${headers.join(" | ")} |`; + const sep = `| ${headers.map(() => "---").join(" | ")} |`; + const body = rows.map((r) => `| ${r.join(" | ")} |`).join("\n"); + return `${header}\n${sep}\n${body}`; +} + +function buildMarkdownReport(largestFiles, byExtensionArr, byDirectoryArr, totalBytes) { + const toPct = (num, den) => (den === 0 ? 0 : (num / den) * 100); + const md = []; + md.push("\n### Top Largest Files (Top 50)\n"); + md.push(mdTable( + largestFiles.map((f) => [f.path, f.sizeFormatted, `${f.percentOfTotal.toFixed(2)}%`, f.ext || "", f.isBinary ? "binary" : "text"]), + ["Path", "Size", "% of total", "Ext", "Type"], + )); + md.push("\n\n### Top Extensions by Bytes (Top 20)\n"); + const topExtRows = byExtensionArr.slice(0, 20).map((e) => [e.ext, String(e.count), formatSize(e.bytes), `${toPct(e.bytes, totalBytes).toFixed(2)}%`]); + md.push(mdTable(topExtRows, ["Ext", "Count", "Bytes", "% of total"])); + md.push("\n\n### Top Directories by Bytes (Top 20)\n"); + const topDirRows = byDirectoryArr.slice(0, 20).map((d) => [d.dir, String(d.count), formatSize(d.bytes), `${toPct(d.bytes, totalBytes).toFixed(2)}%`]); + md.push(mdTable(topDirRows, ["Directory", "Files", "Bytes", "% of total"])); + return md.join("\n"); +} + +module.exports = { + KB, + MB, + formatSize, + percentile, + processWithLimit, + enrichAllFiles, + buildHistogram, + aggregateByExtension, + aggregateByDirectory, + computeDepthAndLongest, + computeTemporal, + computeQuality, + computeDuplicates, + estimateCompressibility, + computeGitInfo, + computeLargestFiles, + buildMarkdownReport, +}; diff --git a/tools/flattener/stats.js b/tools/flattener/stats.js index fd08de51..7bf9f9c9 100644 --- a/tools/flattener/stats.js +++ b/tools/flattener/stats.js @@ -1,29 +1,79 @@ -function calculateStatistics(aggregatedContent, xmlFileSize) { +const H = require("./stats.helpers.js"); + +async function calculateStatistics(aggregatedContent, xmlFileSize, rootDir) { const { textFiles, binaryFiles, errors } = aggregatedContent; - const totalTextSize = textFiles.reduce((sum, file) => sum + file.size, 0); - const totalBinarySize = binaryFiles.reduce((sum, file) => sum + file.size, 0); - const totalSize = totalTextSize + totalBinarySize; - - const totalLines = textFiles.reduce((sum, file) => sum + file.lines, 0); - + const totalLines = textFiles.reduce((sum, f) => sum + (f.lines || 0), 0); const estimatedTokens = Math.ceil(xmlFileSize / 4); - const formatSize = (bytes) => { - if (bytes < 1024) return `${bytes} B`; - if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; - return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; - }; + // Build enriched file list + const allFiles = await H.enrichAllFiles(textFiles, binaryFiles); + const totalBytes = allFiles.reduce((s, f) => s + f.size, 0); + const sizes = allFiles.map((f) => f.size).sort((a, b) => a - b); + const avgSize = sizes.length ? totalBytes / sizes.length : 0; + const medianSize = sizes.length ? H.percentile(sizes, 50) : 0; + const p90 = H.percentile(sizes, 90); + const p95 = H.percentile(sizes, 95); + const p99 = H.percentile(sizes, 99); + + const histogram = H.buildHistogram(allFiles); + const byExtensionArr = H.aggregateByExtension(allFiles); + const byDirectoryArr = H.aggregateByDirectory(allFiles); + const { depthDist, longestPaths } = H.computeDepthAndLongest(allFiles); + const temporal = H.computeTemporal(allFiles, Date.now()); + const quality = H.computeQuality(allFiles, textFiles); + const duplicateCandidates = H.computeDuplicates(allFiles, textFiles); + const compressibilityRatio = H.estimateCompressibility(textFiles); + const git = H.computeGitInfo(allFiles, rootDir, quality.largeThreshold); + const largestFiles = H.computeLargestFiles(allFiles, totalBytes); + const markdownReport = H.buildMarkdownReport( + largestFiles, + byExtensionArr, + byDirectoryArr, + totalBytes, + ); return { + // Back-compat summary totalFiles: textFiles.length + binaryFiles.length, textFiles: textFiles.length, binaryFiles: binaryFiles.length, errorFiles: errors.length, - totalSize: formatSize(totalSize), - xmlSize: formatSize(xmlFileSize), + totalSize: H.formatSize(totalBytes), + totalBytes, + xmlSize: H.formatSize(xmlFileSize), totalLines, estimatedTokens: estimatedTokens.toLocaleString(), + + // Distributions and percentiles + avgFileSize: avgSize, + medianFileSize: medianSize, + p90, + p95, + p99, + histogram, + + // Extensions and directories + byExtension: byExtensionArr, + byDirectory: byDirectoryArr, + depthDistribution: depthDist, + longestPaths, + + // Temporal + temporal, + + // Quality signals + quality, + + // Duplicates and compressibility + duplicateCandidates, + compressibilityRatio, + + // Git-aware + git, + + largestFiles, + markdownReport, }; } diff --git a/tools/flattener/test-matrix.js b/tools/flattener/test-matrix.js new file mode 100644 index 00000000..c33d07dc --- /dev/null +++ b/tools/flattener/test-matrix.js @@ -0,0 +1,405 @@ +#!/usr/bin/env node +/* deno-lint-ignore-file */ +/* + Automatic test matrix for project root detection. + Creates temporary fixtures for various ecosystems and validates findProjectRoot(). + No external options or flags required. Safe to run multiple times. +*/ + +const os = require("node:os"); +const path = require("node:path"); +const fs = require("fs-extra"); +const { promisify } = require("node:util"); +const { execFile } = require("node:child_process"); +const process = require("node:process"); +const execFileAsync = promisify(execFile); + +const { findProjectRoot } = require("./projectRoot.js"); + +async function cmdAvailable(cmd) { + try { + await execFileAsync(cmd, ["--version"], { timeout: 500, windowsHide: true }); + return true; + } catch { + return false; + } + +async function testSvnMarker() { + const root = await mkTmpDir("svn"); + const nested = path.join(root, "proj", "code"); + await fs.ensureDir(nested); + await fs.ensureDir(path.join(root, ".svn")); + const found = await findProjectRoot(nested); + assertEqual(found, root, ".svn marker should be detected"); + return { name: "svn-marker", ok: true }; +} + +async function testSymlinkStart() { + const root = await mkTmpDir("symlink-start"); + const nested = path.join(root, "a", "b"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, ".project-root"), "\n"); + const tmp = await mkTmpDir("symlink-tmp"); + const link = path.join(tmp, "link-to-b"); + try { + await fs.symlink(nested, link); + } catch { + // symlink may not be permitted on some systems; skip + return { name: "symlink-start", ok: true, skipped: true }; + } + const found = await findProjectRoot(link); + assertEqual(found, root, "should resolve symlinked start to real root"); + return { name: "symlink-start", ok: true }; +} + +async function testSubmoduleLikeInnerGitFile() { + const root = await mkTmpDir("submodule-like"); + const mid = path.join(root, "mid"); + const leaf = path.join(mid, "leaf"); + await fs.ensureDir(leaf); + // outer repo + await fs.ensureDir(path.join(root, ".git")); + // inner submodule-like .git file + await fs.writeFile(path.join(mid, ".git"), "gitdir: ../.git/modules/mid\n"); + const found = await findProjectRoot(leaf); + assertEqual(found, root, "outermost .git should win on tie weight"); + return { name: "submodule-like-gitfile", ok: true }; +} +} + +async function mkTmpDir(name) { + const base = await fs.realpath(os.tmpdir()); + const dir = await fs.mkdtemp(path.join(base, `flattener-${name}-`)); + return dir; +} + +function assertEqual(actual, expected, msg) { + if (actual !== expected) { + throw new Error(`${msg}: expected=\"${expected}\" actual=\"${actual}\"`); + } +} + +async function testSentinel() { + const root = await mkTmpDir("sentinel"); + const nested = path.join(root, "a", "b", "c"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, ".project-root"), "\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "sentinel .project-root should win"); + return { name: "sentinel", ok: true }; +} + +async function testOtherSentinels() { + const root = await mkTmpDir("other-sentinels"); + const nested = path.join(root, "x", "y"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, ".workspace-root"), "\n"); + const found1 = await findProjectRoot(nested); + assertEqual(found1, root, "sentinel .workspace-root should win"); + + await fs.remove(path.join(root, ".workspace-root")); + await fs.writeFile(path.join(root, ".repo-root"), "\n"); + const found2 = await findProjectRoot(nested); + assertEqual(found2, root, "sentinel .repo-root should win"); + return { name: "other-sentinels", ok: true }; +} + +async function testGitCliAndMarker() { + const hasGit = await cmdAvailable("git"); + if (!hasGit) return { name: "git-cli", ok: true, skipped: true }; + + const root = await mkTmpDir("git"); + const nested = path.join(root, "pkg", "src"); + await fs.ensureDir(nested); + await execFileAsync("git", ["init"], { cwd: root, timeout: 2000 }); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "git toplevel should be detected"); + return { name: "git-cli", ok: true }; +} + +async function testHgMarkerOrCli() { + // Prefer simple marker test to avoid requiring Mercurial install + const root = await mkTmpDir("hg"); + const nested = path.join(root, "lib"); + await fs.ensureDir(nested); + await fs.ensureDir(path.join(root, ".hg")); + const found = await findProjectRoot(nested); + await assertEqual(found, root, ".hg marker should be detected"); + return { name: "hg-marker", ok: true }; +} + +async function testWorkspacePnpm() { + const root = await mkTmpDir("pnpm-workspace"); + const pkgA = path.join(root, "packages", "a"); + await fs.ensureDir(pkgA); + await fs.writeFile(path.join(root, "pnpm-workspace.yaml"), "packages:\n - packages/*\n"); + const found = await findProjectRoot(pkgA); + await assertEqual(found, root, "pnpm-workspace.yaml should be detected"); + return { name: "pnpm-workspace", ok: true }; +} + +async function testPackageJsonWorkspaces() { + const root = await mkTmpDir("package-workspaces"); + const pkgA = path.join(root, "packages", "a"); + await fs.ensureDir(pkgA); + await fs.writeJson(path.join(root, "package.json"), { private: true, workspaces: ["packages/*"] }, { spaces: 2 }); + const found = await findProjectRoot(pkgA); + await assertEqual(found, root, "package.json workspaces should be detected"); + return { name: "package.json-workspaces", ok: true }; +} + +async function testLockfiles() { + const root = await mkTmpDir("lockfiles"); + const nested = path.join(root, "src"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "yarn.lock"), "\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "yarn.lock should be detected"); + return { name: "lockfiles", ok: true }; +} + +async function testLanguageConfigs() { + const root = await mkTmpDir("lang-configs"); + const nested = path.join(root, "x", "y"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "pyproject.toml"), "[tool.poetry]\nname='tmp'\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "pyproject.toml should be detected"); + return { name: "language-configs", ok: true }; +} + +async function testPreferOuterOnTie() { + const root = await mkTmpDir("tie"); + const mid = path.join(root, "mid"); + const leaf = path.join(mid, "leaf"); + await fs.ensureDir(leaf); + // same weight marker at two levels + await fs.writeFile(path.join(root, "requirements.txt"), "\n"); + await fs.writeFile(path.join(mid, "requirements.txt"), "\n"); + const found = await findProjectRoot(leaf); + await assertEqual(found, root, "outermost directory should win on equal weight"); + return { name: "prefer-outermost-tie", ok: true }; +} + +// Additional coverage: Bazel, Nx/Turbo/Rush, Go workspaces, Deno, Java/Scala, PHP, Rust, Nix, Changesets, env markers, +// and priority interaction between package.json and lockfiles. + +async function testBazelWorkspace() { + const root = await mkTmpDir("bazel"); + const nested = path.join(root, "apps", "svc"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "WORKSPACE"), "workspace(name=\"tmp\")\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "Bazel WORKSPACE should be detected"); + return { name: "bazel-workspace", ok: true }; +} + +async function testNx() { + const root = await mkTmpDir("nx"); + const nested = path.join(root, "apps", "web"); + await fs.ensureDir(nested); + await fs.writeJson(path.join(root, "nx.json"), { npmScope: "tmp" }, { spaces: 2 }); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "nx.json should be detected"); + return { name: "nx", ok: true }; +} + +async function testTurbo() { + const root = await mkTmpDir("turbo"); + const nested = path.join(root, "packages", "x"); + await fs.ensureDir(nested); + await fs.writeJson(path.join(root, "turbo.json"), { pipeline: {} }, { spaces: 2 }); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "turbo.json should be detected"); + return { name: "turbo", ok: true }; +} + +async function testRush() { + const root = await mkTmpDir("rush"); + const nested = path.join(root, "apps", "a"); + await fs.ensureDir(nested); + await fs.writeJson(path.join(root, "rush.json"), { projectFolderMinDepth: 1 }, { spaces: 2 }); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "rush.json should be detected"); + return { name: "rush", ok: true }; +} + +async function testGoWorkAndMod() { + const root = await mkTmpDir("gowork"); + const mod = path.join(root, "modA"); + const nested = path.join(mod, "pkg"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "go.work"), "go 1.22\nuse ./modA\n"); + await fs.writeFile(path.join(mod, "go.mod"), "module example.com/a\ngo 1.22\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "go.work should define the workspace root"); + return { name: "go-work", ok: true }; +} + +async function testDenoJson() { + const root = await mkTmpDir("deno"); + const nested = path.join(root, "src"); + await fs.ensureDir(nested); + await fs.writeJson(path.join(root, "deno.json"), { tasks: {} }, { spaces: 2 }); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "deno.json should be detected"); + return { name: "deno-json", ok: true }; +} + +async function testGradleSettings() { + const root = await mkTmpDir("gradle"); + const nested = path.join(root, "app"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "settings.gradle"), "rootProject.name='tmp'\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "settings.gradle should be detected"); + return { name: "gradle-settings", ok: true }; +} + +async function testMavenPom() { + const root = await mkTmpDir("maven"); + const nested = path.join(root, "module"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "pom.xml"), "\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "pom.xml should be detected"); + return { name: "maven-pom", ok: true }; +} + +async function testSbtBuild() { + const root = await mkTmpDir("sbt"); + const nested = path.join(root, "sub"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "build.sbt"), "name := \"tmp\"\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "build.sbt should be detected"); + return { name: "sbt-build", ok: true }; +} + +async function testComposer() { + const root = await mkTmpDir("composer"); + const nested = path.join(root, "src"); + await fs.ensureDir(nested); + await fs.writeJson(path.join(root, "composer.json"), { name: "tmp/pkg" }, { spaces: 2 }); + await fs.writeFile(path.join(root, "composer.lock"), "{}\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "composer.{json,lock} should be detected"); + return { name: "composer", ok: true }; +} + +async function testCargo() { + const root = await mkTmpDir("cargo"); + const nested = path.join(root, "src"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "Cargo.toml"), "[package]\nname='tmp'\nversion='0.0.0'\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "Cargo.toml should be detected"); + return { name: "cargo", ok: true }; +} + +async function testNixFlake() { + const root = await mkTmpDir("nix"); + const nested = path.join(root, "work"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "flake.nix"), "{ }\n"); + const found = await findProjectRoot(nested); + await assertEqual(found, root, "flake.nix should be detected"); + return { name: "nix-flake", ok: true }; +} + +async function testChangesetConfig() { + const root = await mkTmpDir("changeset"); + const nested = path.join(root, "pkg"); + await fs.ensureDir(nested); + await fs.ensureDir(path.join(root, ".changeset")); + await fs.writeJson(path.join(root, ".changeset", "config.json"), { $schema: "https://unpkg.com/@changesets/config@2.3.1/schema.json" }, { spaces: 2 }); + const found = await findProjectRoot(nested); + await assertEqual(found, root, ".changeset/config.json should be detected"); + return { name: "changesets", ok: true }; +} + +async function testEnvCustomMarker() { + const root = await mkTmpDir("env-marker"); + const nested = path.join(root, "dir"); + await fs.ensureDir(nested); + await fs.writeFile(path.join(root, "MY_ROOT"), "\n"); + const prev = process.env.PROJECT_ROOT_MARKERS; + process.env.PROJECT_ROOT_MARKERS = "MY_ROOT"; + try { + const found = await findProjectRoot(nested); + await assertEqual(found, root, "custom env marker should be honored"); + } finally { + if (prev === undefined) delete process.env.PROJECT_ROOT_MARKERS; else process.env.PROJECT_ROOT_MARKERS = prev; + } + return { name: "env-custom-marker", ok: true }; +} + +async function testPackageLowPriorityVsLock() { + const root = await mkTmpDir("pkg-vs-lock"); + const nested = path.join(root, "nested"); + await fs.ensureDir(path.join(nested, "deep")); + await fs.writeJson(path.join(nested, "package.json"), { name: "nested" }, { spaces: 2 }); + await fs.writeFile(path.join(root, "yarn.lock"), "\n"); + const found = await findProjectRoot(path.join(nested, "deep")); + await assertEqual(found, root, "lockfile at root should outrank nested package.json"); + return { name: "package-vs-lock-priority", ok: true }; +} + +async function run() { + const tests = [ + testSentinel, + testOtherSentinels, + testGitCliAndMarker, + testHgMarkerOrCli, + testWorkspacePnpm, + testPackageJsonWorkspaces, + testLockfiles, + testLanguageConfigs, + testPreferOuterOnTie, + testBazelWorkspace, + testNx, + testTurbo, + testRush, + testGoWorkAndMod, + testDenoJson, + testGradleSettings, + testMavenPom, + testSbtBuild, + testComposer, + testCargo, + testNixFlake, + testChangesetConfig, + testEnvCustomMarker, + testPackageLowPriorityVsLock, + testSvnMarker, + testSymlinkStart, + testSubmoduleLikeInnerGitFile, + ]; + + const results = []; + for (const t of tests) { + try { + const r = await t(); + results.push({ ...r, ok: true }); + console.log(`โœ” ${r.name}${r.skipped ? " (skipped)" : ""}`); + } catch (err) { + console.error(`โœ– ${t.name}:`, err && err.message ? err.message : err); + results.push({ name: t.name, ok: false, error: String(err) }); + } + } + + const failed = results.filter((r) => !r.ok); + console.log("\nSummary:"); + for (const r of results) { + console.log(`- ${r.name}: ${r.ok ? "ok" : "FAIL"}${r.skipped ? " (skipped)" : ""}`); + } + + if (failed.length) { + process.exitCode = 1; + } +} + +run().catch((e) => { + console.error("Fatal error:", e); + process.exit(1); +});