mirror of
https://github.com/czlonkowski/n8n-mcp.git
synced 2026-02-10 15:23:07 +00:00
feat: implement template compression and view count filtering
- Add gzip compression for workflow JSONs (89% size reduction) - Filter templates with ≤10 views to remove low-quality content - Reduce template count from 4,505 to 2,596 high-quality templates - Compress template data from ~75MB to 12.10MB - Total database reduced from 117MB to 48MB - Add on-the-fly decompression for template retrieval - Update schema to support compressed workflow storage 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -39,58 +39,75 @@ export interface TemplateDetail {
|
||||
|
||||
export class TemplateFetcher {
|
||||
private readonly baseUrl = 'https://api.n8n.io/api/templates';
|
||||
private readonly pageSize = 100;
|
||||
private readonly pageSize = 250; // Maximum allowed by API
|
||||
|
||||
/**
|
||||
* Fetch all templates and filter to last 12 months
|
||||
* This fetches ALL pages first, then applies date filter locally
|
||||
*/
|
||||
async fetchTemplates(progressCallback?: (current: number, total: number) => void): Promise<TemplateWorkflow[]> {
|
||||
const allTemplates = await this.fetchAllTemplates(progressCallback);
|
||||
|
||||
// Apply date filter locally after fetching all
|
||||
const oneYearAgo = new Date();
|
||||
oneYearAgo.setMonth(oneYearAgo.getMonth() - 12);
|
||||
|
||||
const recentTemplates = allTemplates.filter((w: TemplateWorkflow) => {
|
||||
const createdDate = new Date(w.createdAt);
|
||||
return createdDate >= oneYearAgo;
|
||||
});
|
||||
|
||||
logger.info(`Filtered to ${recentTemplates.length} templates from last 12 months (out of ${allTemplates.length} total)`);
|
||||
return recentTemplates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch ALL templates from the API without date filtering
|
||||
* Used internally and can be used for other filtering strategies
|
||||
*/
|
||||
async fetchAllTemplates(progressCallback?: (current: number, total: number) => void): Promise<TemplateWorkflow[]> {
|
||||
const allTemplates: TemplateWorkflow[] = [];
|
||||
let page = 1;
|
||||
let hasMore = true;
|
||||
let totalWorkflows = 0;
|
||||
|
||||
logger.info('Starting template fetch from n8n.io API');
|
||||
logger.info('Starting complete template fetch from n8n.io API');
|
||||
|
||||
while (hasMore) {
|
||||
try {
|
||||
const response = await axios.get(`${this.baseUrl}/search`, {
|
||||
params: {
|
||||
page,
|
||||
rows: this.pageSize,
|
||||
sort_by: 'last-updated'
|
||||
rows: this.pageSize
|
||||
// Note: sort_by parameter doesn't work, templates come in popularity order
|
||||
}
|
||||
});
|
||||
|
||||
const { workflows, totalWorkflows } = response.data;
|
||||
const { workflows } = response.data;
|
||||
totalWorkflows = response.data.totalWorkflows || totalWorkflows;
|
||||
|
||||
// Filter templates by date
|
||||
const recentTemplates = workflows.filter((w: TemplateWorkflow) => {
|
||||
const createdDate = new Date(w.createdAt);
|
||||
return createdDate >= oneYearAgo;
|
||||
});
|
||||
allTemplates.push(...workflows);
|
||||
|
||||
// If we hit templates older than 1 year, stop fetching
|
||||
if (recentTemplates.length < workflows.length) {
|
||||
hasMore = false;
|
||||
logger.info(`Reached templates older than 1 year at page ${page}`);
|
||||
}
|
||||
|
||||
allTemplates.push(...recentTemplates);
|
||||
// Calculate total pages for better progress reporting
|
||||
const totalPages = Math.ceil(totalWorkflows / this.pageSize);
|
||||
|
||||
if (progressCallback) {
|
||||
progressCallback(allTemplates.length, Math.min(totalWorkflows, allTemplates.length + 500));
|
||||
// Enhanced progress with page information
|
||||
progressCallback(allTemplates.length, totalWorkflows);
|
||||
}
|
||||
|
||||
logger.debug(`Fetched page ${page}/${totalPages}: ${workflows.length} templates (total so far: ${allTemplates.length}/${totalWorkflows})`);
|
||||
|
||||
// Check if there are more pages
|
||||
if (workflows.length < this.pageSize || allTemplates.length >= totalWorkflows) {
|
||||
if (workflows.length < this.pageSize) {
|
||||
hasMore = false;
|
||||
}
|
||||
|
||||
page++;
|
||||
|
||||
// Rate limiting - be nice to the API
|
||||
// Rate limiting - be nice to the API (slightly faster with 250 rows/page)
|
||||
if (hasMore) {
|
||||
await this.sleep(500); // 500ms between requests
|
||||
await this.sleep(300); // 300ms between requests (was 500ms with 100 rows)
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Error fetching templates page ${page}:`, error);
|
||||
@@ -98,7 +115,7 @@ export class TemplateFetcher {
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`Fetched ${allTemplates.length} templates from last year`);
|
||||
logger.info(`Fetched all ${allTemplates.length} templates from n8n.io`);
|
||||
return allTemplates;
|
||||
}
|
||||
|
||||
@@ -131,8 +148,8 @@ export class TemplateFetcher {
|
||||
progressCallback(i + 1, workflows.length);
|
||||
}
|
||||
|
||||
// Rate limiting
|
||||
await this.sleep(200); // 200ms between requests
|
||||
// Rate limiting (conservative to avoid API throttling)
|
||||
await this.sleep(150); // 150ms between requests
|
||||
} catch (error) {
|
||||
logger.error(`Failed to fetch details for workflow ${workflow.id}:`, error);
|
||||
// Continue with other templates
|
||||
|
||||
@@ -2,6 +2,7 @@ import { DatabaseAdapter } from '../database/database-adapter';
|
||||
import { TemplateWorkflow, TemplateDetail } from './template-fetcher';
|
||||
import { logger } from '../utils/logger';
|
||||
import { TemplateSanitizer } from '../utils/template-sanitizer';
|
||||
import * as zlib from 'zlib';
|
||||
|
||||
export interface StoredTemplate {
|
||||
id: number;
|
||||
@@ -12,7 +13,8 @@ export interface StoredTemplate {
|
||||
author_username: string;
|
||||
author_verified: number;
|
||||
nodes_used: string; // JSON string
|
||||
workflow_json: string; // JSON string
|
||||
workflow_json?: string; // JSON string (deprecated)
|
||||
workflow_json_compressed?: string; // Base64 encoded gzip
|
||||
categories: string; // JSON string
|
||||
views: number;
|
||||
created_at: string;
|
||||
@@ -105,10 +107,16 @@ export class TemplateRepository {
|
||||
* Save a template to the database
|
||||
*/
|
||||
saveTemplate(workflow: TemplateWorkflow, detail: TemplateDetail, categories: string[] = []): void {
|
||||
// Filter out templates with 10 or fewer views
|
||||
if ((workflow.totalViews || 0) <= 10) {
|
||||
logger.debug(`Skipping template ${workflow.id}: ${workflow.name} (only ${workflow.totalViews} views)`);
|
||||
return;
|
||||
}
|
||||
|
||||
const stmt = this.db.prepare(`
|
||||
INSERT OR REPLACE INTO templates (
|
||||
id, workflow_id, name, description, author_name, author_username,
|
||||
author_verified, nodes_used, workflow_json, categories, views,
|
||||
author_verified, nodes_used, workflow_json_compressed, categories, views,
|
||||
created_at, updated_at, url
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`);
|
||||
@@ -133,6 +141,17 @@ export class TemplateRepository {
|
||||
});
|
||||
}
|
||||
|
||||
// Compress the workflow JSON
|
||||
const workflowJsonStr = JSON.stringify(sanitizedWorkflow);
|
||||
const compressed = zlib.gzipSync(workflowJsonStr);
|
||||
const compressedBase64 = compressed.toString('base64');
|
||||
|
||||
// Log compression ratio
|
||||
const originalSize = Buffer.byteLength(workflowJsonStr);
|
||||
const compressedSize = compressed.length;
|
||||
const ratio = Math.round((1 - compressedSize / originalSize) * 100);
|
||||
logger.debug(`Template ${workflow.id} compression: ${originalSize} → ${compressedSize} bytes (${ratio}% reduction)`);
|
||||
|
||||
stmt.run(
|
||||
workflow.id,
|
||||
workflow.id,
|
||||
@@ -142,7 +161,7 @@ export class TemplateRepository {
|
||||
workflow.user.username,
|
||||
workflow.user.verified ? 1 : 0,
|
||||
JSON.stringify(nodeTypes),
|
||||
JSON.stringify(sanitizedWorkflow),
|
||||
compressedBase64,
|
||||
JSON.stringify(categories),
|
||||
workflow.totalViews || 0,
|
||||
workflow.createdAt,
|
||||
@@ -165,7 +184,8 @@ export class TemplateRepository {
|
||||
`;
|
||||
|
||||
const params = [...nodeTypes.map(n => `%"${n}"%`), limit];
|
||||
return this.db.prepare(query).all(...params) as StoredTemplate[];
|
||||
const results = this.db.prepare(query).all(...params) as StoredTemplate[];
|
||||
return results.map(t => this.decompressWorkflow(t));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -176,7 +196,37 @@ export class TemplateRepository {
|
||||
SELECT * FROM templates WHERE id = ?
|
||||
`).get(templateId) as StoredTemplate | undefined;
|
||||
|
||||
return row || null;
|
||||
if (!row) return null;
|
||||
|
||||
// Decompress workflow JSON if compressed
|
||||
if (row.workflow_json_compressed && !row.workflow_json) {
|
||||
try {
|
||||
const compressed = Buffer.from(row.workflow_json_compressed, 'base64');
|
||||
const decompressed = zlib.gunzipSync(compressed);
|
||||
row.workflow_json = decompressed.toString();
|
||||
} catch (error) {
|
||||
logger.error(`Failed to decompress workflow for template ${templateId}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return row;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress workflow JSON for a template
|
||||
*/
|
||||
private decompressWorkflow(template: StoredTemplate): StoredTemplate {
|
||||
if (template.workflow_json_compressed && !template.workflow_json) {
|
||||
try {
|
||||
const compressed = Buffer.from(template.workflow_json_compressed, 'base64');
|
||||
const decompressed = zlib.gunzipSync(compressed);
|
||||
template.workflow_json = decompressed.toString();
|
||||
} catch (error) {
|
||||
logger.error(`Failed to decompress workflow for template ${template.id}:`, error);
|
||||
}
|
||||
}
|
||||
return template;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -209,7 +259,7 @@ export class TemplateRepository {
|
||||
`).all(ftsQuery, limit) as StoredTemplate[];
|
||||
|
||||
logger.debug(`FTS5 search returned ${results.length} results`);
|
||||
return results;
|
||||
return results.map(t => this.decompressWorkflow(t));
|
||||
} catch (error: any) {
|
||||
// If FTS5 query fails, fallback to LIKE search
|
||||
logger.warn('FTS5 template search failed, using LIKE fallback:', {
|
||||
@@ -236,7 +286,7 @@ export class TemplateRepository {
|
||||
`).all(likeQuery, likeQuery, limit) as StoredTemplate[];
|
||||
|
||||
logger.debug(`LIKE search returned ${results.length} results`);
|
||||
return results;
|
||||
return results.map(t => this.decompressWorkflow(t));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -269,11 +319,12 @@ export class TemplateRepository {
|
||||
* Get all templates with limit
|
||||
*/
|
||||
getAllTemplates(limit: number = 10): StoredTemplate[] {
|
||||
return this.db.prepare(`
|
||||
const results = this.db.prepare(`
|
||||
SELECT * FROM templates
|
||||
ORDER BY views DESC, created_at DESC
|
||||
LIMIT ?
|
||||
`).all(limit) as StoredTemplate[];
|
||||
return results.map(t => this.decompressWorkflow(t));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -284,6 +335,41 @@ export class TemplateRepository {
|
||||
return result.count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all existing template IDs for comparison
|
||||
* Used in update mode to skip already fetched templates
|
||||
*/
|
||||
getExistingTemplateIds(): Set<number> {
|
||||
const rows = this.db.prepare('SELECT id FROM templates').all() as { id: number }[];
|
||||
return new Set(rows.map(r => r.id));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a template exists in the database
|
||||
*/
|
||||
hasTemplate(templateId: number): boolean {
|
||||
const result = this.db.prepare('SELECT 1 FROM templates WHERE id = ?').get(templateId) as { 1: number } | undefined;
|
||||
return result !== undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get template metadata (id, name, updated_at) for all templates
|
||||
* Used for comparison in update scenarios
|
||||
*/
|
||||
getTemplateMetadata(): Map<number, { name: string; updated_at: string }> {
|
||||
const rows = this.db.prepare('SELECT id, name, updated_at FROM templates').all() as {
|
||||
id: number;
|
||||
name: string;
|
||||
updated_at: string;
|
||||
}[];
|
||||
|
||||
const metadata = new Map<number, { name: string; updated_at: string }>();
|
||||
for (const row of rows) {
|
||||
metadata.set(row.id, { name: row.name, updated_at: row.updated_at });
|
||||
}
|
||||
return metadata;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get template statistics
|
||||
*/
|
||||
|
||||
@@ -47,7 +47,7 @@ export class TemplateService {
|
||||
|
||||
return {
|
||||
...this.formatTemplateInfo(template),
|
||||
workflow: JSON.parse(template.workflow_json)
|
||||
workflow: JSON.parse(template.workflow_json || '{}')
|
||||
};
|
||||
}
|
||||
|
||||
@@ -94,36 +94,59 @@ export class TemplateService {
|
||||
|
||||
/**
|
||||
* Fetch and update templates from n8n.io
|
||||
* @param mode - 'rebuild' to clear and rebuild, 'update' to add only new templates
|
||||
*/
|
||||
async fetchAndUpdateTemplates(
|
||||
progressCallback?: (message: string, current: number, total: number) => void
|
||||
progressCallback?: (message: string, current: number, total: number) => void,
|
||||
mode: 'rebuild' | 'update' = 'rebuild'
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Dynamically import fetcher only when needed (requires axios)
|
||||
const { TemplateFetcher } = await import('./template-fetcher');
|
||||
const fetcher = new TemplateFetcher();
|
||||
|
||||
// Clear existing templates
|
||||
this.repository.clearTemplates();
|
||||
// Get existing template IDs if in update mode
|
||||
let existingIds: Set<number> = new Set();
|
||||
if (mode === 'update') {
|
||||
existingIds = this.repository.getExistingTemplateIds();
|
||||
logger.info(`Update mode: Found ${existingIds.size} existing templates in database`);
|
||||
} else {
|
||||
// Clear existing templates in rebuild mode
|
||||
this.repository.clearTemplates();
|
||||
logger.info('Rebuild mode: Cleared existing templates');
|
||||
}
|
||||
|
||||
// Fetch template list
|
||||
logger.info('Fetching template list from n8n.io');
|
||||
logger.info(`Fetching template list from n8n.io (mode: ${mode})`);
|
||||
const templates = await fetcher.fetchTemplates((current, total) => {
|
||||
progressCallback?.('Fetching template list', current, total);
|
||||
});
|
||||
|
||||
logger.info(`Found ${templates.length} templates from last year`);
|
||||
logger.info(`Found ${templates.length} templates from last 12 months`);
|
||||
|
||||
// Filter to only new templates if in update mode
|
||||
let templatesToFetch = templates;
|
||||
if (mode === 'update') {
|
||||
templatesToFetch = templates.filter(t => !existingIds.has(t.id));
|
||||
logger.info(`Update mode: ${templatesToFetch.length} new templates to fetch (skipping ${templates.length - templatesToFetch.length} existing)`);
|
||||
|
||||
if (templatesToFetch.length === 0) {
|
||||
logger.info('No new templates to fetch');
|
||||
progressCallback?.('No new templates', 0, 0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch details for each template
|
||||
logger.info('Fetching template details');
|
||||
const details = await fetcher.fetchAllTemplateDetails(templates, (current, total) => {
|
||||
logger.info(`Fetching details for ${templatesToFetch.length} templates`);
|
||||
const details = await fetcher.fetchAllTemplateDetails(templatesToFetch, (current, total) => {
|
||||
progressCallback?.('Fetching template details', current, total);
|
||||
});
|
||||
|
||||
// Save to database
|
||||
logger.info('Saving templates to database');
|
||||
let saved = 0;
|
||||
for (const template of templates) {
|
||||
for (const template of templatesToFetch) {
|
||||
const detail = details.get(template.id);
|
||||
if (detail) {
|
||||
this.repository.saveTemplate(template, detail);
|
||||
@@ -134,8 +157,10 @@ export class TemplateService {
|
||||
logger.info(`Successfully saved ${saved} templates to database`);
|
||||
|
||||
// Rebuild FTS5 index after bulk import
|
||||
logger.info('Rebuilding FTS5 index for templates');
|
||||
this.repository.rebuildTemplateFTS();
|
||||
if (saved > 0) {
|
||||
logger.info('Rebuilding FTS5 index for templates');
|
||||
this.repository.rebuildTemplateFTS();
|
||||
}
|
||||
|
||||
progressCallback?.('Complete', saved, saved);
|
||||
} catch (error) {
|
||||
|
||||
Reference in New Issue
Block a user