refactor: streamline test suite — 33 fewer files, 11.9x faster (#670)

* refactor: streamline test suite - cut 33 files, enable parallel execution (11.9x speedup)

Remove duplicate, low-value, and fragmented test files while preserving
all meaningful coverage. Enable parallel test execution and remove
the entire benchmark infrastructure.

Key changes:
- Consolidate workflow-validator tests (13 files -> 3)
- Consolidate config-validator tests (9 files -> 3)
- Consolidate telemetry tests (11 files -> 6)
- Merge AI validator tests (2 files -> 1)
- Remove example/demo test files, mock-testing files, and already-skipped tests
- Remove benchmark infrastructure (10 files, CI workflow, 4 npm scripts)
- Enable parallel test execution (remove singleThread: true)
- Remove retry:2 that was masking flaky tests
- Slim CI publish-results job

Results: 224 -> 191 test files, 4690 -> 4303 tests, 121K -> 106K lines
Local runtime: 319s -> 27s (11.9x speedup)

Conceived by Romuald Członkowski - www.aiadvisors.pl/en

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* refactor: absorb config-validator satellite tests into consolidated file

The previous commit deleted 4 config-validator satellite files. This
properly merges their unique tests into the consolidated config-validator.test.ts,
recovering 89 tests that were dropped during the bulk deletion.

Deduplicates 5 tests that existed in both the satellite files and the
security test file.

Conceived by Romuald Członkowski - www.aiadvisors.pl/en

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: delete missed benchmark-pr.yml workflow, fix flaky session test

- Remove benchmark-pr.yml that referenced deleted benchmark:ci script
- Fix session-persistence round-trip test using timestamps closer to
  now to avoid edge cases exposed by removing retry:2

Conceived by Romuald Członkowski - www.aiadvisors.pl/en

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: rebuild FTS5 index after database rebuild to prevent stale rowid refs

The FTS5 content-synced index could retain phantom rowid references from
previous rebuild cycles, causing 'missing row N from content table'
errors on MATCH queries.

- Add explicit FTS5 rebuild command in rebuild script after all nodes saved
- Add FTS5 rebuild in test beforeAll as defense-in-depth
- Rebuild nodes.db with consistent FTS5 index

Conceived by Romuald Członkowski - www.aiadvisors.pl/en

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix: use recent timestamps in all session persistence tests

Session round-trip tests used timestamps 5-10 minutes in the past which
could fail under CI load when combined with session timeout validation.
Use timestamps 30 seconds in the past for all valid-session test data.

Conceived by Romuald Członkowski - www.aiadvisors.pl/en

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Romuald Członkowski
2026-03-27 14:22:22 +01:00
committed by GitHub
parent 07bd1d4cc2
commit de2abaf89d
75 changed files with 3718 additions and 21917 deletions

View File

@@ -1,260 +0,0 @@
#!/usr/bin/env node
import { readFileSync, existsSync, writeFileSync } from 'fs';
import { resolve } from 'path';
/**
* Compare benchmark results between runs
*/
class BenchmarkComparator {
constructor() {
this.threshold = 0.1; // 10% threshold for significant changes
}
loadBenchmarkResults(path) {
if (!existsSync(path)) {
return null;
}
try {
return JSON.parse(readFileSync(path, 'utf-8'));
} catch (error) {
console.error(`Error loading benchmark results from ${path}:`, error);
return null;
}
}
compareBenchmarks(current, baseline) {
const comparison = {
timestamp: new Date().toISOString(),
summary: {
improved: 0,
regressed: 0,
unchanged: 0,
added: 0,
removed: 0
},
benchmarks: []
};
// Create maps for easy lookup
const currentMap = new Map();
const baselineMap = new Map();
// Process current benchmarks
if (current && current.files) {
for (const file of current.files) {
for (const group of file.groups || []) {
for (const bench of group.benchmarks || []) {
const key = `${group.name}::${bench.name}`;
currentMap.set(key, {
ops: bench.result.hz,
mean: bench.result.mean,
file: file.filepath
});
}
}
}
}
// Process baseline benchmarks
if (baseline && baseline.files) {
for (const file of baseline.files) {
for (const group of file.groups || []) {
for (const bench of group.benchmarks || []) {
const key = `${group.name}::${bench.name}`;
baselineMap.set(key, {
ops: bench.result.hz,
mean: bench.result.mean,
file: file.filepath
});
}
}
}
}
// Compare benchmarks
for (const [key, current] of currentMap) {
const baseline = baselineMap.get(key);
if (!baseline) {
// New benchmark
comparison.summary.added++;
comparison.benchmarks.push({
name: key,
status: 'added',
current: current.ops,
baseline: null,
change: null,
file: current.file
});
} else {
// Compare performance
const change = ((current.ops - baseline.ops) / baseline.ops) * 100;
let status = 'unchanged';
if (Math.abs(change) >= this.threshold * 100) {
if (change > 0) {
status = 'improved';
comparison.summary.improved++;
} else {
status = 'regressed';
comparison.summary.regressed++;
}
} else {
comparison.summary.unchanged++;
}
comparison.benchmarks.push({
name: key,
status,
current: current.ops,
baseline: baseline.ops,
change,
meanCurrent: current.mean,
meanBaseline: baseline.mean,
file: current.file
});
}
}
// Check for removed benchmarks
for (const [key, baseline] of baselineMap) {
if (!currentMap.has(key)) {
comparison.summary.removed++;
comparison.benchmarks.push({
name: key,
status: 'removed',
current: null,
baseline: baseline.ops,
change: null,
file: baseline.file
});
}
}
// Sort by change percentage (regressions first)
comparison.benchmarks.sort((a, b) => {
if (a.status === 'regressed' && b.status !== 'regressed') return -1;
if (b.status === 'regressed' && a.status !== 'regressed') return 1;
if (a.change !== null && b.change !== null) {
return a.change - b.change;
}
return 0;
});
return comparison;
}
generateMarkdownReport(comparison) {
let report = '## Benchmark Comparison Report\n\n';
const { summary } = comparison;
report += '### Summary\n\n';
report += `- **Improved**: ${summary.improved} benchmarks\n`;
report += `- **Regressed**: ${summary.regressed} benchmarks\n`;
report += `- **Unchanged**: ${summary.unchanged} benchmarks\n`;
report += `- **Added**: ${summary.added} benchmarks\n`;
report += `- **Removed**: ${summary.removed} benchmarks\n\n`;
// Regressions
const regressions = comparison.benchmarks.filter(b => b.status === 'regressed');
if (regressions.length > 0) {
report += '### ⚠️ Performance Regressions\n\n';
report += '| Benchmark | Current | Baseline | Change |\n';
report += '|-----------|---------|----------|--------|\n';
for (const bench of regressions) {
const currentOps = bench.current.toLocaleString('en-US', { maximumFractionDigits: 0 });
const baselineOps = bench.baseline.toLocaleString('en-US', { maximumFractionDigits: 0 });
const changeStr = bench.change.toFixed(2);
report += `| ${bench.name} | ${currentOps} ops/s | ${baselineOps} ops/s | **${changeStr}%** |\n`;
}
report += '\n';
}
// Improvements
const improvements = comparison.benchmarks.filter(b => b.status === 'improved');
if (improvements.length > 0) {
report += '### ✅ Performance Improvements\n\n';
report += '| Benchmark | Current | Baseline | Change |\n';
report += '|-----------|---------|----------|--------|\n';
for (const bench of improvements) {
const currentOps = bench.current.toLocaleString('en-US', { maximumFractionDigits: 0 });
const baselineOps = bench.baseline.toLocaleString('en-US', { maximumFractionDigits: 0 });
const changeStr = bench.change.toFixed(2);
report += `| ${bench.name} | ${currentOps} ops/s | ${baselineOps} ops/s | **+${changeStr}%** |\n`;
}
report += '\n';
}
// New benchmarks
const added = comparison.benchmarks.filter(b => b.status === 'added');
if (added.length > 0) {
report += '### 🆕 New Benchmarks\n\n';
report += '| Benchmark | Performance |\n';
report += '|-----------|-------------|\n';
for (const bench of added) {
const ops = bench.current.toLocaleString('en-US', { maximumFractionDigits: 0 });
report += `| ${bench.name} | ${ops} ops/s |\n`;
}
report += '\n';
}
return report;
}
generateJsonReport(comparison) {
return JSON.stringify(comparison, null, 2);
}
async compare(currentPath, baselinePath) {
// Load results
const current = this.loadBenchmarkResults(currentPath);
const baseline = this.loadBenchmarkResults(baselinePath);
if (!current && !baseline) {
console.error('No benchmark results found');
return;
}
// Generate comparison
const comparison = this.compareBenchmarks(current, baseline);
// Generate reports
const markdownReport = this.generateMarkdownReport(comparison);
const jsonReport = this.generateJsonReport(comparison);
// Write reports
writeFileSync('benchmark-comparison.md', markdownReport);
writeFileSync('benchmark-comparison.json', jsonReport);
// Output summary to console
console.log(markdownReport);
// Return exit code based on regressions
if (comparison.summary.regressed > 0) {
console.error(`\n❌ Found ${comparison.summary.regressed} performance regressions`);
process.exit(1);
} else {
console.log(`\n✅ No performance regressions found`);
process.exit(0);
}
}
}
// Parse command line arguments
const args = process.argv.slice(2);
if (args.length < 1) {
console.error('Usage: node compare-benchmarks.js <current-results> [baseline-results]');
console.error('If baseline-results is not provided, it will look for benchmark-baseline.json');
process.exit(1);
}
const currentPath = args[0];
const baselinePath = args[1] || 'benchmark-baseline.json';
// Run comparison
const comparator = new BenchmarkComparator();
comparator.compare(currentPath, baselinePath).catch(console.error);

View File

@@ -1,86 +0,0 @@
#!/usr/bin/env node
const fs = require('fs');
const path = require('path');
/**
* Formats Vitest benchmark results for github-action-benchmark
* Converts from Vitest format to the expected format
*/
function formatBenchmarkResults() {
const resultsPath = path.join(process.cwd(), 'benchmark-results.json');
if (!fs.existsSync(resultsPath)) {
console.error('benchmark-results.json not found');
process.exit(1);
}
const vitestResults = JSON.parse(fs.readFileSync(resultsPath, 'utf8'));
// Convert to github-action-benchmark format
const formattedResults = [];
// Vitest benchmark JSON reporter format
if (vitestResults.files) {
for (const file of vitestResults.files) {
const suiteName = path.basename(file.filepath, '.bench.ts');
// Process each suite in the file
if (file.groups) {
for (const group of file.groups) {
for (const benchmark of group.benchmarks || []) {
if (benchmark.result) {
formattedResults.push({
name: `${suiteName} - ${benchmark.name}`,
unit: 'ms',
value: benchmark.result.mean || 0,
range: (benchmark.result.max - benchmark.result.min) || 0,
extra: `${benchmark.result.hz?.toFixed(0) || 0} ops/sec`
});
}
}
}
}
}
} else if (Array.isArray(vitestResults)) {
// Alternative format handling
for (const result of vitestResults) {
if (result.name && result.result) {
formattedResults.push({
name: result.name,
unit: 'ms',
value: result.result.mean || 0,
range: (result.result.max - result.result.min) || 0,
extra: `${result.result.hz?.toFixed(0) || 0} ops/sec`
});
}
}
}
// Write formatted results
const outputPath = path.join(process.cwd(), 'benchmark-results-formatted.json');
fs.writeFileSync(outputPath, JSON.stringify(formattedResults, null, 2));
// Also create a summary for PR comments
const summary = {
timestamp: new Date().toISOString(),
benchmarks: formattedResults.map(b => ({
name: b.name,
time: `${b.value.toFixed(3)}ms`,
opsPerSec: b.extra,
range: `±${(b.range / 2).toFixed(3)}ms`
}))
};
fs.writeFileSync(
path.join(process.cwd(), 'benchmark-summary.json'),
JSON.stringify(summary, null, 2)
);
console.log(`Formatted ${formattedResults.length} benchmark results`);
}
// Run if called directly
if (require.main === module) {
formatBenchmarkResults();
}

View File

@@ -1,44 +0,0 @@
#!/usr/bin/env node
/**
* Generates a stub benchmark-results.json file when benchmarks fail to produce output.
* This ensures the CI pipeline doesn't fail due to missing files.
*/
const fs = require('fs');
const path = require('path');
const stubResults = {
timestamp: new Date().toISOString(),
files: [
{
filepath: 'tests/benchmarks/stub.bench.ts',
groups: [
{
name: 'Stub Benchmarks',
benchmarks: [
{
name: 'stub-benchmark',
result: {
mean: 0.001,
min: 0.001,
max: 0.001,
hz: 1000,
p75: 0.001,
p99: 0.001,
p995: 0.001,
p999: 0.001,
rme: 0,
samples: 1
}
}
]
}
]
}
]
};
const outputPath = path.join(process.cwd(), 'benchmark-results.json');
fs.writeFileSync(outputPath, JSON.stringify(stubResults, null, 2));
console.log(`Generated stub benchmark results at ${outputPath}`);

View File

@@ -1,172 +0,0 @@
#!/usr/bin/env node
const { spawn } = require('child_process');
const fs = require('fs');
const path = require('path');
const benchmarkResults = {
timestamp: new Date().toISOString(),
files: []
};
// Function to strip ANSI color codes
function stripAnsi(str) {
return str.replace(/\x1b\[[0-9;]*m/g, '');
}
// Run vitest bench command with no color output for easier parsing
const vitest = spawn('npx', ['vitest', 'bench', '--run', '--config', 'vitest.config.benchmark.ts', '--no-color'], {
stdio: ['inherit', 'pipe', 'pipe'],
shell: true,
env: { ...process.env, NO_COLOR: '1', FORCE_COLOR: '0' }
});
let output = '';
let currentFile = null;
let currentSuite = null;
vitest.stdout.on('data', (data) => {
const text = stripAnsi(data.toString());
output += text;
process.stdout.write(data); // Write original with colors
// Parse the output to extract benchmark results
const lines = text.split('\n');
for (const line of lines) {
// Detect test file - match with or without checkmark
const fileMatch = line.match(/[✓ ]\s+(tests\/benchmarks\/[^>]+\.bench\.ts)/);
if (fileMatch) {
console.log(`\n[Parser] Found file: ${fileMatch[1]}`);
currentFile = {
filepath: fileMatch[1],
groups: []
};
benchmarkResults.files.push(currentFile);
currentSuite = null;
}
// Detect suite name
const suiteMatch = line.match(/^\s+·\s+(.+?)\s+[\d,]+\.\d+\s+/);
if (suiteMatch && currentFile) {
const suiteName = suiteMatch[1].trim();
// Check if this is part of the previous line's suite description
const lastLineMatch = lines[lines.indexOf(line) - 1]?.match(/>\s+(.+?)(?:\s+\d+ms)?$/);
if (lastLineMatch) {
currentSuite = {
name: lastLineMatch[1].trim(),
benchmarks: []
};
currentFile.groups.push(currentSuite);
}
}
// Parse benchmark result line - the format is: name hz min max mean p75 p99 p995 p999 rme samples
const benchMatch = line.match(/^\s*[·•]\s+(.+?)\s+([\d,]+\.\d+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+±([\d.]+)%\s+([\d,]+)/);
if (benchMatch && currentFile) {
const [, name, hz, min, max, mean, p75, p99, p995, p999, rme, samples] = benchMatch;
console.log(`[Parser] Found benchmark: ${name.trim()}`);
const benchmark = {
name: name.trim(),
result: {
hz: parseFloat(hz.replace(/,/g, '')),
min: parseFloat(min),
max: parseFloat(max),
mean: parseFloat(mean),
p75: parseFloat(p75),
p99: parseFloat(p99),
p995: parseFloat(p995),
p999: parseFloat(p999),
rme: parseFloat(rme),
samples: parseInt(samples.replace(/,/g, ''))
}
};
// Add to current suite or create a default one
if (!currentSuite) {
currentSuite = {
name: 'Default',
benchmarks: []
};
currentFile.groups.push(currentSuite);
}
currentSuite.benchmarks.push(benchmark);
}
}
});
vitest.stderr.on('data', (data) => {
process.stderr.write(data);
});
vitest.on('close', (code) => {
if (code !== 0) {
console.error(`Benchmark process exited with code ${code}`);
process.exit(code);
}
// Clean up empty files/groups
benchmarkResults.files = benchmarkResults.files.filter(file =>
file.groups.length > 0 && file.groups.some(group => group.benchmarks.length > 0)
);
// Write results
const outputPath = path.join(process.cwd(), 'benchmark-results.json');
fs.writeFileSync(outputPath, JSON.stringify(benchmarkResults, null, 2));
console.log(`\nBenchmark results written to ${outputPath}`);
console.log(`Total files processed: ${benchmarkResults.files.length}`);
// Validate that we captured results
let totalBenchmarks = 0;
for (const file of benchmarkResults.files) {
for (const group of file.groups) {
totalBenchmarks += group.benchmarks.length;
}
}
if (totalBenchmarks === 0) {
console.warn('No benchmark results were captured! Generating stub results...');
// Generate stub results to prevent CI failure
const stubResults = {
timestamp: new Date().toISOString(),
files: [
{
filepath: 'tests/benchmarks/sample.bench.ts',
groups: [
{
name: 'Sample Benchmarks',
benchmarks: [
{
name: 'array sorting - small',
result: {
mean: 0.0136,
min: 0.0124,
max: 0.3220,
hz: 73341.27,
p75: 0.0133,
p99: 0.0213,
p995: 0.0307,
p999: 0.1062,
rme: 0.51,
samples: 36671
}
}
]
}
]
}
]
};
fs.writeFileSync(outputPath, JSON.stringify(stubResults, null, 2));
console.log('Stub results generated to prevent CI failure');
return;
}
console.log(`Total benchmarks captured: ${totalBenchmarks}`);
});

View File

@@ -1,121 +0,0 @@
const { writeFileSync } = require('fs');
const { resolve } = require('path');
class BenchmarkJsonReporter {
constructor() {
this.results = [];
console.log('[BenchmarkJsonReporter] Initialized');
}
onInit(ctx) {
console.log('[BenchmarkJsonReporter] onInit called');
}
onCollected(files) {
console.log('[BenchmarkJsonReporter] onCollected called with', files ? files.length : 0, 'files');
}
onTaskUpdate(tasks) {
console.log('[BenchmarkJsonReporter] onTaskUpdate called');
}
onBenchmarkResult(file, benchmark) {
console.log('[BenchmarkJsonReporter] onBenchmarkResult called for', benchmark.name);
}
onFinished(files, errors) {
console.log('[BenchmarkJsonReporter] onFinished called with', files ? files.length : 0, 'files');
const results = {
timestamp: new Date().toISOString(),
files: []
};
try {
for (const file of files || []) {
if (!file) continue;
const fileResult = {
filepath: file.filepath || file.name || 'unknown',
groups: []
};
// Handle both file.tasks and file.benchmarks
const tasks = file.tasks || file.benchmarks || [];
// Process tasks/benchmarks
for (const task of tasks) {
if (task.type === 'suite' && task.tasks) {
// This is a suite containing benchmarks
const group = {
name: task.name,
benchmarks: []
};
for (const benchmark of task.tasks) {
if (benchmark.result?.benchmark) {
group.benchmarks.push({
name: benchmark.name,
result: {
mean: benchmark.result.benchmark.mean,
min: benchmark.result.benchmark.min,
max: benchmark.result.benchmark.max,
hz: benchmark.result.benchmark.hz,
p75: benchmark.result.benchmark.p75,
p99: benchmark.result.benchmark.p99,
p995: benchmark.result.benchmark.p995,
p999: benchmark.result.benchmark.p999,
rme: benchmark.result.benchmark.rme,
samples: benchmark.result.benchmark.samples
}
});
}
}
if (group.benchmarks.length > 0) {
fileResult.groups.push(group);
}
} else if (task.result?.benchmark) {
// This is a direct benchmark (not in a suite)
if (!fileResult.groups.length) {
fileResult.groups.push({
name: 'Default',
benchmarks: []
});
}
fileResult.groups[0].benchmarks.push({
name: task.name,
result: {
mean: task.result.benchmark.mean,
min: task.result.benchmark.min,
max: task.result.benchmark.max,
hz: task.result.benchmark.hz,
p75: task.result.benchmark.p75,
p99: task.result.benchmark.p99,
p995: task.result.benchmark.p995,
p999: task.result.benchmark.p999,
rme: task.result.benchmark.rme,
samples: task.result.benchmark.samples
}
});
}
}
if (fileResult.groups.length > 0) {
results.files.push(fileResult);
}
}
// Write results
const outputPath = resolve(process.cwd(), 'benchmark-results.json');
writeFileSync(outputPath, JSON.stringify(results, null, 2));
console.log(`[BenchmarkJsonReporter] Benchmark results written to ${outputPath}`);
console.log(`[BenchmarkJsonReporter] Total files processed: ${results.files.length}`);
} catch (error) {
console.error('[BenchmarkJsonReporter] Error writing results:', error);
}
}
}
module.exports = BenchmarkJsonReporter;

View File

@@ -1,100 +0,0 @@
import type { Task, TaskResult, BenchmarkResult } from 'vitest';
import { writeFileSync } from 'fs';
import { resolve } from 'path';
interface BenchmarkJsonResult {
timestamp: string;
files: Array<{
filepath: string;
groups: Array<{
name: string;
benchmarks: Array<{
name: string;
result: {
mean: number;
min: number;
max: number;
hz: number;
p75: number;
p99: number;
p995: number;
p999: number;
rme: number;
samples: number;
};
}>;
}>;
}>;
}
export class BenchmarkJsonReporter {
private results: BenchmarkJsonResult = {
timestamp: new Date().toISOString(),
files: []
};
onInit() {
console.log('[BenchmarkJsonReporter] Initialized');
}
onFinished(files?: Task[]) {
console.log('[BenchmarkJsonReporter] onFinished called');
if (!files) {
console.log('[BenchmarkJsonReporter] No files provided');
return;
}
for (const file of files) {
const fileResult = {
filepath: file.filepath || 'unknown',
groups: [] as any[]
};
this.processTask(file, fileResult);
if (fileResult.groups.length > 0) {
this.results.files.push(fileResult);
}
}
// Write results
const outputPath = resolve(process.cwd(), 'benchmark-results.json');
writeFileSync(outputPath, JSON.stringify(this.results, null, 2));
console.log(`[BenchmarkJsonReporter] Results written to ${outputPath}`);
}
private processTask(task: Task, fileResult: any) {
if (task.type === 'suite' && task.tasks) {
const group = {
name: task.name,
benchmarks: [] as any[]
};
for (const benchmark of task.tasks) {
const result = benchmark.result as TaskResult & { benchmark?: BenchmarkResult };
if (result?.benchmark) {
group.benchmarks.push({
name: benchmark.name,
result: {
mean: result.benchmark.mean || 0,
min: result.benchmark.min || 0,
max: result.benchmark.max || 0,
hz: result.benchmark.hz || 0,
p75: result.benchmark.p75 || 0,
p99: result.benchmark.p99 || 0,
p995: result.benchmark.p995 || 0,
p999: result.benchmark.p999 || 0,
rme: result.benchmark.rme || 0,
samples: result.benchmark.samples?.length || 0
}
});
}
}
if (group.benchmarks.length > 0) {
fileResult.groups.push(group);
}
}
}
}