chore: add code formatting config and pre-commit hooks (#450)

This commit is contained in:
manjaroblack
2025-08-16 19:08:39 -05:00
committed by GitHub
parent 51284d6ecf
commit ed539432fb
130 changed files with 11886 additions and 10939 deletions

View File

@@ -1,11 +1,11 @@
"use strict";
'use strict';
const fs = require("node:fs/promises");
const path = require("node:path");
const zlib = require("node:zlib");
const { Buffer } = require("node:buffer");
const crypto = require("node:crypto");
const cp = require("node:child_process");
const fs = require('node:fs/promises');
const path = require('node:path');
const zlib = require('node:zlib');
const { Buffer } = require('node:buffer');
const crypto = require('node:crypto');
const cp = require('node:child_process');
const KB = 1024;
const MB = 1024 * KB;
@@ -34,17 +34,19 @@ async function enrichAllFiles(textFiles, binaryFiles) {
const allFiles = [];
async function enrich(file, isBinary) {
const ext = (path.extname(file.path) || "").toLowerCase();
const dir = path.dirname(file.path) || ".";
const ext = (path.extname(file.path) || '').toLowerCase();
const dir = path.dirname(file.path) || '.';
const depth = file.path.split(path.sep).filter(Boolean).length;
const hidden = file.path.split(path.sep).some((seg) => seg.startsWith("."));
const hidden = file.path.split(path.sep).some((seg) => seg.startsWith('.'));
let mtimeMs = 0;
let isSymlink = false;
try {
const lst = await fs.lstat(file.absolutePath);
mtimeMs = lst.mtimeMs;
isSymlink = lst.isSymbolicLink();
} catch (_) { /* ignore lstat errors during enrichment */ }
} catch {
/* ignore lstat errors during enrichment */
}
allFiles.push({
path: file.path,
absolutePath: file.absolutePath,
@@ -67,18 +69,18 @@ async function enrichAllFiles(textFiles, binaryFiles) {
function buildHistogram(allFiles) {
const buckets = [
[1 * KB, "01KB"],
[10 * KB, "110KB"],
[100 * KB, "10100KB"],
[1 * MB, "100KB1MB"],
[10 * MB, "110MB"],
[100 * MB, "10100MB"],
[Infinity, ">=100MB"],
[1 * KB, '01KB'],
[10 * KB, '110KB'],
[100 * KB, '10100KB'],
[1 * MB, '100KB1MB'],
[10 * MB, '110MB'],
[100 * MB, '10100MB'],
[Infinity, '>=100MB'],
];
const histogram = buckets.map(([_, label]) => ({ label, count: 0, bytes: 0 }));
for (const f of allFiles) {
for (let i = 0; i < buckets.length; i++) {
if (f.size < buckets[i][0]) {
for (const [i, bucket] of buckets.entries()) {
if (f.size < bucket[0]) {
histogram[i].count++;
histogram[i].bytes += f.size;
break;
@@ -91,13 +93,13 @@ function buildHistogram(allFiles) {
function aggregateByExtension(allFiles) {
const byExtension = new Map();
for (const f of allFiles) {
const key = f.ext || "<none>";
const key = f.ext || '<none>';
const v = byExtension.get(key) || { ext: key, count: 0, bytes: 0 };
v.count++;
v.bytes += f.size;
byExtension.set(key, v);
}
return Array.from(byExtension.values()).sort((a, b) => b.bytes - a.bytes);
return [...byExtension.values()].sort((a, b) => b.bytes - a.bytes);
}
function aggregateByDirectory(allFiles) {
@@ -109,15 +111,15 @@ function aggregateByDirectory(allFiles) {
byDirectory.set(dir, v);
}
for (const f of allFiles) {
const parts = f.dir === "." ? [] : f.dir.split(path.sep);
let acc = "";
const parts = f.dir === '.' ? [] : f.dir.split(path.sep);
let acc = '';
for (let i = 0; i < parts.length; i++) {
acc = i === 0 ? parts[0] : acc + path.sep + parts[i];
addDirBytes(acc, f.size);
}
if (parts.length === 0) addDirBytes(".", f.size);
if (parts.length === 0) addDirBytes('.', f.size);
}
return Array.from(byDirectory.values()).sort((a, b) => b.bytes - a.bytes);
return [...byDirectory.values()].sort((a, b) => b.bytes - a.bytes);
}
function computeDepthAndLongest(allFiles) {
@@ -129,21 +131,22 @@ function computeDepthAndLongest(allFiles) {
.sort((a, b) => b.path.length - a.path.length)
.slice(0, 25)
.map((f) => ({ path: f.path, length: f.path.length, size: f.size }));
const depthDist = Array.from(depthDistribution.entries())
const depthDist = [...depthDistribution.entries()]
.sort((a, b) => a[0] - b[0])
.map(([depth, count]) => ({ depth, count }));
return { depthDist, longestPaths };
}
function computeTemporal(allFiles, nowMs) {
let oldest = null, newest = null;
let oldest = null,
newest = null;
const ageBuckets = [
{ label: "> 1 year", minDays: 365, maxDays: Infinity, count: 0, bytes: 0 },
{ label: "612 months", minDays: 180, maxDays: 365, count: 0, bytes: 0 },
{ label: "16 months", minDays: 30, maxDays: 180, count: 0, bytes: 0 },
{ label: "730 days", minDays: 7, maxDays: 30, count: 0, bytes: 0 },
{ label: "17 days", minDays: 1, maxDays: 7, count: 0, bytes: 0 },
{ label: "< 1 day", minDays: 0, maxDays: 1, count: 0, bytes: 0 },
{ label: '> 1 year', minDays: 365, maxDays: Infinity, count: 0, bytes: 0 },
{ label: '612 months', minDays: 180, maxDays: 365, count: 0, bytes: 0 },
{ label: '16 months', minDays: 30, maxDays: 180, count: 0, bytes: 0 },
{ label: '730 days', minDays: 7, maxDays: 30, count: 0, bytes: 0 },
{ label: '17 days', minDays: 1, maxDays: 7, count: 0, bytes: 0 },
{ label: '< 1 day', minDays: 0, maxDays: 1, count: 0, bytes: 0 },
];
for (const f of allFiles) {
const ageDays = Math.max(0, (nowMs - (f.mtimeMs || nowMs)) / (24 * 60 * 60 * 1000));
@@ -158,15 +161,21 @@ function computeTemporal(allFiles, nowMs) {
if (!newest || f.mtimeMs > newest.mtimeMs) newest = f;
}
return {
oldest: oldest ? { path: oldest.path, mtime: oldest.mtimeMs ? new Date(oldest.mtimeMs).toISOString() : null } : null,
newest: newest ? { path: newest.path, mtime: newest.mtimeMs ? new Date(newest.mtimeMs).toISOString() : null } : null,
oldest: oldest
? { path: oldest.path, mtime: oldest.mtimeMs ? new Date(oldest.mtimeMs).toISOString() : null }
: null,
newest: newest
? { path: newest.path, mtime: newest.mtimeMs ? new Date(newest.mtimeMs).toISOString() : null }
: null,
ageBuckets,
};
}
function computeQuality(allFiles, textFiles) {
const zeroByteFiles = allFiles.filter((f) => f.size === 0).length;
const emptyTextFiles = textFiles.filter((f) => (f.size || 0) === 0 || (f.lines || 0) === 0).length;
const emptyTextFiles = textFiles.filter(
(f) => (f.size || 0) === 0 || (f.lines || 0) === 0,
).length;
const hiddenFiles = allFiles.filter((f) => f.hidden).length;
const symlinks = allFiles.filter((f) => f.isSymlink).length;
const largeThreshold = 50 * MB;
@@ -201,18 +210,31 @@ function computeDuplicates(allFiles, textFiles) {
for (const tf of textGroup) {
try {
const src = textFiles.find((x) => x.absolutePath === tf.absolutePath);
const content = src ? src.content : "";
const h = crypto.createHash("sha1").update(content).digest("hex");
const content = src ? src.content : '';
const h = crypto.createHash('sha1').update(content).digest('hex');
const g = contentHashGroups.get(h) || [];
g.push(tf);
contentHashGroups.set(h, g);
} catch (_) { /* ignore hashing errors for duplicate detection */ }
} catch {
/* ignore hashing errors for duplicate detection */
}
}
for (const [_h, g] of contentHashGroups.entries()) {
if (g.length > 1) duplicateCandidates.push({ reason: "same-size+text-hash", size: Number(sizeKey), count: g.length, files: g.map((f) => f.path) });
if (g.length > 1)
duplicateCandidates.push({
reason: 'same-size+text-hash',
size: Number(sizeKey),
count: g.length,
files: g.map((f) => f.path),
});
}
if (otherGroup.length > 1) {
duplicateCandidates.push({ reason: "same-size", size: Number(sizeKey), count: otherGroup.length, files: otherGroup.map((f) => f.path) });
duplicateCandidates.push({
reason: 'same-size',
size: Number(sizeKey),
count: otherGroup.length,
files: otherGroup.map((f) => f.path),
});
}
}
return duplicateCandidates;
@@ -226,10 +248,12 @@ function estimateCompressibility(textFiles) {
const sampleLen = Math.min(256 * 1024, tf.size || 0);
if (sampleLen <= 0) continue;
const sample = tf.content.slice(0, sampleLen);
const gz = zlib.gzipSync(Buffer.from(sample, "utf8"));
const gz = zlib.gzipSync(Buffer.from(sample, 'utf8'));
compSampleBytes += sampleLen;
compCompressedBytes += gz.length;
} catch (_) { /* ignore compression errors during sampling */ }
} catch {
/* ignore compression errors during sampling */
}
}
return compSampleBytes > 0 ? compCompressedBytes / compSampleBytes : null;
}
@@ -245,20 +269,34 @@ function computeGitInfo(allFiles, rootDir, largeThreshold) {
};
try {
if (!rootDir) return info;
const top = cp.execFileSync("git", ["rev-parse", "--show-toplevel"], { cwd: rootDir, stdio: ["ignore", "pipe", "ignore"] }).toString().trim();
const top = cp
.execFileSync('git', ['rev-parse', '--show-toplevel'], {
cwd: rootDir,
stdio: ['ignore', 'pipe', 'ignore'],
})
.toString()
.trim();
if (!top) return info;
info.isRepo = true;
const out = cp.execFileSync("git", ["ls-files", "-z"], { cwd: rootDir, stdio: ["ignore", "pipe", "ignore"] });
const tracked = new Set(out.toString().split("\0").filter(Boolean));
let trackedBytes = 0, trackedCount = 0, untrackedBytes = 0, untrackedCount = 0;
const out = cp.execFileSync('git', ['ls-files', '-z'], {
cwd: rootDir,
stdio: ['ignore', 'pipe', 'ignore'],
});
const tracked = new Set(out.toString().split('\0').filter(Boolean));
let trackedBytes = 0,
trackedCount = 0,
untrackedBytes = 0,
untrackedCount = 0;
const lfsCandidates = [];
for (const f of allFiles) {
const isTracked = tracked.has(f.path);
if (isTracked) {
trackedCount++; trackedBytes += f.size;
trackedCount++;
trackedBytes += f.size;
if (f.size >= largeThreshold) lfsCandidates.push({ path: f.path, size: f.size });
} else {
untrackedCount++; untrackedBytes += f.size;
untrackedCount++;
untrackedBytes += f.size;
}
}
info.trackedCount = trackedCount;
@@ -266,7 +304,9 @@ function computeGitInfo(allFiles, rootDir, largeThreshold) {
info.untrackedCount = untrackedCount;
info.untrackedBytes = untrackedBytes;
info.lfsCandidates = lfsCandidates.sort((a, b) => b.size - a.size).slice(0, 50);
} catch (_) { /* git not available or not a repo, ignore */ }
} catch {
/* git not available or not a repo, ignore */
}
return info;
}
@@ -280,34 +320,58 @@ function computeLargestFiles(allFiles, totalBytes) {
size: f.size,
sizeFormatted: formatSize(f.size),
percentOfTotal: toPct(f.size, totalBytes),
ext: f.ext || "",
ext: f.ext || '',
isBinary: f.isBinary,
mtime: f.mtimeMs ? new Date(f.mtimeMs).toISOString() : null,
}));
}
function mdTable(rows, headers) {
const header = `| ${headers.join(" | ")} |`;
const sep = `| ${headers.map(() => "---").join(" | ")} |`;
const body = rows.map((r) => `| ${r.join(" | ")} |`).join("\n");
const header = `| ${headers.join(' | ')} |`;
const sep = `| ${headers.map(() => '---').join(' | ')} |`;
const body = rows.map((r) => `| ${r.join(' | ')} |`).join('\n');
return `${header}\n${sep}\n${body}`;
}
function buildMarkdownReport(largestFiles, byExtensionArr, byDirectoryArr, totalBytes) {
const toPct = (num, den) => (den === 0 ? 0 : (num / den) * 100);
const md = [];
md.push("\n### Top Largest Files (Top 50)\n");
md.push(mdTable(
largestFiles.map((f) => [f.path, f.sizeFormatted, `${f.percentOfTotal.toFixed(2)}%`, f.ext || "", f.isBinary ? "binary" : "text"]),
["Path", "Size", "% of total", "Ext", "Type"],
));
md.push("\n\n### Top Extensions by Bytes (Top 20)\n");
const topExtRows = byExtensionArr.slice(0, 20).map((e) => [e.ext, String(e.count), formatSize(e.bytes), `${toPct(e.bytes, totalBytes).toFixed(2)}%`]);
md.push(mdTable(topExtRows, ["Ext", "Count", "Bytes", "% of total"]));
md.push("\n\n### Top Directories by Bytes (Top 20)\n");
const topDirRows = byDirectoryArr.slice(0, 20).map((d) => [d.dir, String(d.count), formatSize(d.bytes), `${toPct(d.bytes, totalBytes).toFixed(2)}%`]);
md.push(mdTable(topDirRows, ["Directory", "Files", "Bytes", "% of total"]));
return md.join("\n");
md.push(
'\n### Top Largest Files (Top 50)\n',
mdTable(
largestFiles.map((f) => [
f.path,
f.sizeFormatted,
`${f.percentOfTotal.toFixed(2)}%`,
f.ext || '',
f.isBinary ? 'binary' : 'text',
]),
['Path', 'Size', '% of total', 'Ext', 'Type'],
),
'\n\n### Top Extensions by Bytes (Top 20)\n',
);
const topExtRows = byExtensionArr
.slice(0, 20)
.map((e) => [
e.ext,
String(e.count),
formatSize(e.bytes),
`${toPct(e.bytes, totalBytes).toFixed(2)}%`,
]);
md.push(
mdTable(topExtRows, ['Ext', 'Count', 'Bytes', '% of total']),
'\n\n### Top Directories by Bytes (Top 20)\n',
);
const topDirRows = byDirectoryArr
.slice(0, 20)
.map((d) => [
d.dir,
String(d.count),
formatSize(d.bytes),
`${toPct(d.bytes, totalBytes).toFixed(2)}%`,
]);
md.push(mdTable(topDirRows, ['Directory', 'Files', 'Bytes', '% of total']));
return md.join('\n');
}
module.exports = {