feat: add includeOperations to search_nodes and patterns mode to search_templates

- Add `includeOperations` flag to search_nodes that returns resource/operation trees per result (e.g., Slack: 7 resources, 44 operations). Saves a get_node round-trip when building workflows. - Add `searchMode: "patterns"` to search_templates — lightweight workflow pattern summaries mined from 2,700+ templates (node frequency, co-occurrence, connection chains across 10 task categories). - Fix operations extraction: use filter() instead of find() to capture ALL operation properties, each mapped to its resource via displayOptions. - Fix FTS-to-LIKE fallback silently dropping search options. - Add mine-workflow-patterns.ts script (npm run mine:patterns). - Restore 584 community nodes in database after rebuild. Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-01 15:13:09 +00:00 · 2026-03-30 15:45:03 +02:00
parent 20ebfbb0fc
commit bda2009b77
10 changed files with 3414 additions and 28 deletions
--- a/src/scripts/mine-workflow-patterns.ts
+++ b/src/scripts/mine-workflow-patterns.ts
@@ -0,0 +1,532 @@
+#!/usr/bin/env node
+import { createDatabaseAdapter } from '../database/database-adapter';
+import * as zlib from 'zlib';
+import * as fs from 'fs';
+import * as path from 'path';
+
+// Node types to exclude from analysis
+const EXCLUDED_TYPES = new Set([
+  'n8n-nodes-base.stickyNote',
+  'n8n-nodes-base.noOp',
+  'n8n-nodes-base.manualTrigger',
+]);
+
+// Category-to-node mapping for classification
+const TASK_NODE_MAPPING: Record<string, string[]> = {
+  ai_automation: [
+    'nodes-langchain.agent', 'nodes-langchain.openAi', 'nodes-langchain.chainLlm',
+    'nodes-langchain.lmChatOpenAi', 'nodes-langchain.lmChatAnthropic',
+    'nodes-langchain.chainSummarization', 'nodes-langchain.toolWorkflow',
+    'nodes-langchain.memoryBufferWindow', 'nodes-langchain.outputParserStructured',
+  ],
+  webhook_processing: [
+    'nodes-base.webhook', 'nodes-base.respondToWebhook',
+  ],
+  email_automation: [
+    'nodes-base.gmail', 'nodes-base.emailSend', 'nodes-base.microsoftOutlook',
+    'nodes-base.emailReadImap',
+  ],
+  slack_integration: [
+    'nodes-base.slack', 'nodes-base.slackTrigger',
+  ],
+  data_sync: [
+    'nodes-base.googleSheets', 'nodes-base.airtable', 'nodes-base.postgres',
+    'nodes-base.mysql', 'nodes-base.mongoDb',
+  ],
+  data_transformation: [
+    'nodes-base.set', 'nodes-base.code', 'nodes-base.splitInBatches',
+    'nodes-base.merge', 'nodes-base.itemLists', 'nodes-base.filter',
+    'nodes-base.if', 'nodes-base.switch',
+  ],
+  scheduling: [
+    'nodes-base.scheduleTrigger', 'nodes-base.cron',
+  ],
+  api_integration: [
+    'nodes-base.httpRequest', 'nodes-base.webhook', 'nodes-base.graphql',
+  ],
+  database_operations: [
+    'nodes-base.postgres', 'nodes-base.mongoDb', 'nodes-base.redis',
+    'nodes-base.mysql', 'nodes-base.mySql',
+  ],
+  file_processing: [
+    'nodes-base.readBinaryFiles', 'nodes-base.writeBinaryFile',
+    'nodes-base.spreadsheetFile', 'nodes-base.googleDrive',
+  ],
+};
+
+// Display name mapping for common node types (used for pattern strings)
+const DISPLAY_NAMES: Record<string, string> = {
+  'n8n-nodes-base.webhook': 'Webhook',
+  'n8n-nodes-base.httpRequest': 'HTTP Request',
+  'n8n-nodes-base.code': 'Code',
+  'n8n-nodes-base.set': 'Set',
+  'n8n-nodes-base.if': 'If',
+  'n8n-nodes-base.switch': 'Switch',
+  'n8n-nodes-base.merge': 'Merge',
+  'n8n-nodes-base.filter': 'Filter',
+  'n8n-nodes-base.splitInBatches': 'Split In Batches',
+  'n8n-nodes-base.itemLists': 'Item Lists',
+  'n8n-nodes-base.respondToWebhook': 'Respond to Webhook',
+  'n8n-nodes-base.gmail': 'Gmail',
+  'n8n-nodes-base.emailSend': 'Send Email',
+  'n8n-nodes-base.slack': 'Slack',
+  'n8n-nodes-base.slackTrigger': 'Slack Trigger',
+  'n8n-nodes-base.googleSheets': 'Google Sheets',
+  'n8n-nodes-base.airtable': 'Airtable',
+  'n8n-nodes-base.postgres': 'Postgres',
+  'n8n-nodes-base.mysql': 'MySQL',
+  'n8n-nodes-base.mongoDb': 'MongoDB',
+  'n8n-nodes-base.redis': 'Redis',
+  'n8n-nodes-base.scheduleTrigger': 'Schedule Trigger',
+  'n8n-nodes-base.cron': 'Cron',
+  'n8n-nodes-base.googleDrive': 'Google Drive',
+  'n8n-nodes-base.spreadsheetFile': 'Spreadsheet File',
+  'n8n-nodes-base.readBinaryFiles': 'Read Binary Files',
+  'n8n-nodes-base.writeBinaryFile': 'Write Binary File',
+  'n8n-nodes-base.graphql': 'GraphQL',
+  'n8n-nodes-base.microsoftOutlook': 'Microsoft Outlook',
+  'n8n-nodes-base.emailReadImap': 'Email (IMAP)',
+  'n8n-nodes-base.noOp': 'No Op',
+  '@n8n/n8n-nodes-langchain.agent': 'AI Agent',
+  '@n8n/n8n-nodes-langchain.openAi': 'OpenAI',
+  '@n8n/n8n-nodes-langchain.chainLlm': 'LLM Chain',
+  '@n8n/n8n-nodes-langchain.lmChatOpenAi': 'OpenAI Chat Model',
+  '@n8n/n8n-nodes-langchain.lmChatAnthropic': 'Anthropic Chat Model',
+  '@n8n/n8n-nodes-langchain.chainSummarization': 'Summarization Chain',
+  '@n8n/n8n-nodes-langchain.toolWorkflow': 'Workflow Tool',
+  '@n8n/n8n-nodes-langchain.memoryBufferWindow': 'Window Buffer Memory',
+  '@n8n/n8n-nodes-langchain.outputParserStructured': 'Structured Output Parser',
+  'n8n-nodes-base.manualTrigger': 'Manual Trigger',
+};
+
+/**
+ * Check if a node type matches a category mapping entry.
+ * Category mappings use short forms like 'nodes-base.webhook'
+ * while actual types may be 'n8n-nodes-base.webhook' or '@n8n/n8n-nodes-langchain.agent'.
+ */
+function matchesCategory(nodeType: string, categoryPattern: string): boolean {
+  return nodeType.endsWith(categoryPattern) || nodeType.includes(categoryPattern);
+}
+
+/**
+ * Get display name for a node type.
+ */
+function getDisplayName(nodeType: string): string {
+  if (DISPLAY_NAMES[nodeType]) {
+    return DISPLAY_NAMES[nodeType];
+  }
+  // Extract the last part after the last dot
+  const parts = nodeType.split('.');
+  const name = parts[parts.length - 1];
+  // Convert camelCase to Title Case
+  return name
+    .replace(/([A-Z])/g, ' $1')
+    .replace(/^./, s => s.toUpperCase())
+    .trim();
+}
+
+/**
+ * Determine if a node type is a trigger node.
+ */
+function isTriggerType(nodeType: string): boolean {
+  const lower = nodeType.toLowerCase();
+  return lower.includes('trigger') || lower.includes('webhook');
+}
+
+/**
+ * Classify a set of node types into categories.
+ */
+function classifyTemplate(nodeTypes: string[], metadataCategories?: string[]): string[] {
+  const categories = new Set<string>();
+
+  for (const nodeType of nodeTypes) {
+    for (const [category, patterns] of Object.entries(TASK_NODE_MAPPING)) {
+      for (const pattern of patterns) {
+        if (matchesCategory(nodeType, pattern)) {
+          categories.add(category);
+        }
+      }
+    }
+  }
+
+  // Also include categories from metadata_json if available
+  if (metadataCategories && Array.isArray(metadataCategories)) {
+    for (const cat of metadataCategories) {
+      const normalized = cat.toLowerCase().replace(/[\s-]+/g, '_');
+      // Map metadata categories to our category keys
+      for (const key of Object.keys(TASK_NODE_MAPPING)) {
+        if (normalized.includes(key) || key.includes(normalized)) {
+          categories.add(key);
+        }
+      }
+    }
+  }
+
+  return Array.from(categories);
+}
+
+interface NodeFrequency {
+  type: string;
+  count: number;
+  frequency: number;
+  displayName: string;
+}
+
+interface EdgeFrequency {
+  from: string;
+  to: string;
+  count: number;
+}
+
+interface ChainFrequency {
+  chain: string[];
+  count: number;
+  frequency: number;
+}
+
+interface CategoryData {
+  templateCount: number;
+  pattern: string;
+  nodes: Array<{ type: string; frequency: number; role: string; displayName: string }>;
+  commonChains: ChainFrequency[];
+}
+
+interface PatternOutput {
+  generatedAt: string;
+  templateCount: number;
+  categories: Record<string, CategoryData>;
+  global: {
+    topNodes: NodeFrequency[];
+    topEdges: EdgeFrequency[];
+  };
+}
+
+async function main(): Promise<void> {
+  const dbPath = path.resolve(__dirname, '../../data/nodes.db');
+  console.log(`Opening database: ${dbPath}`);
+  const db = await createDatabaseAdapter(dbPath);
+
+  // ---- Pass 1: Frequency & co-occurrence ----
+  console.log('\n=== Pass 1: Node frequency & co-occurrence ===');
+  const pass1Start = Date.now();
+
+  const lightRows = db.prepare(
+    'SELECT id, nodes_used, metadata_json, views FROM templates ORDER BY views DESC'
+  ).all() as Array<{ id: number; nodes_used: string | null; metadata_json: string | null; views: number }>;
+
+  const templateCount = lightRows.length;
+  console.log(`Found ${templateCount} templates`);
+
+  // Global counters
+  const nodeFrequency = new Map<string, number>();
+  const pairCooccurrence = new Map<string, number>();
+  // Per-category tracking
+  const categoryTemplates = new Map<string, Set<number>>();
+  const categoryNodes = new Map<string, Map<string, number>>();
+
+  for (let i = 0; i < lightRows.length; i++) {
+    const row = lightRows[i];
+    if (i > 0 && i % 500 === 0) {
+      console.log(`  Processing template ${i}/${templateCount}...`);
+    }
+
+    if (!row.nodes_used) continue;
+
+    let nodeTypes: string[];
+    try {
+      nodeTypes = JSON.parse(row.nodes_used);
+      if (!Array.isArray(nodeTypes)) continue;
+    } catch {
+      continue;
+    }
+
+    // Deduplicate and filter
+    const uniqueTypes = [...new Set(nodeTypes)].filter(t => !EXCLUDED_TYPES.has(t));
+    if (uniqueTypes.length === 0) continue;
+
+    // Count global frequency
+    for (const nodeType of uniqueTypes) {
+      nodeFrequency.set(nodeType, (nodeFrequency.get(nodeType) || 0) + 1);
+    }
+
+    // Count pairwise co-occurrence
+    for (let a = 0; a < uniqueTypes.length; a++) {
+      for (let b = a + 1; b < uniqueTypes.length; b++) {
+        const pair = [uniqueTypes[a], uniqueTypes[b]].sort().join('|||');
+        pairCooccurrence.set(pair, (pairCooccurrence.get(pair) || 0) + 1);
+      }
+    }
+
+    // Classify into categories
+    let metadataCategories: string[] | undefined;
+    if (row.metadata_json) {
+      try {
+        const meta = JSON.parse(row.metadata_json);
+        metadataCategories = meta.categories;
+      } catch {
+        // skip
+      }
+    }
+
+    const categories = classifyTemplate(uniqueTypes, metadataCategories);
+    for (const cat of categories) {
+      if (!categoryTemplates.has(cat)) {
+        categoryTemplates.set(cat, new Set());
+        categoryNodes.set(cat, new Map());
+      }
+      categoryTemplates.get(cat)!.add(row.id);
+      const catNodeMap = categoryNodes.get(cat)!;
+      for (const nodeType of uniqueTypes) {
+        catNodeMap.set(nodeType, (catNodeMap.get(nodeType) || 0) + 1);
+      }
+    }
+  }
+
+  const pass1Time = ((Date.now() - pass1Start) / 1000).toFixed(1);
+  console.log(`Pass 1 complete: ${pass1Time}s`);
+  console.log(`  Unique node types: ${nodeFrequency.size}`);
+  console.log(`  Categories found: ${categoryTemplates.size}`);
+
+  // ---- Pass 2: Connection topology ----
+  console.log('\n=== Pass 2: Connection topology ===');
+  const pass2Start = Date.now();
+
+  const compressedRows = db.prepare(
+    'SELECT id, nodes_used, workflow_json_compressed, views FROM templates ORDER BY views DESC'
+  ).all() as Array<{ id: number; nodes_used: string | null; workflow_json_compressed: string | null; views: number }>;
+
+  // Edge frequency: sourceType -> targetType
+  const edgeFrequency = new Map<string, number>();
+  // Chain frequency (by category)
+  const categoryChains = new Map<string, Map<string, number>>();
+  // Global chains
+  const globalChains = new Map<string, number>();
+
+  let decompressedCount = 0;
+  let decompressFailCount = 0;
+
+  for (let i = 0; i < compressedRows.length; i++) {
+    const row = compressedRows[i];
+    if (i > 0 && i % 500 === 0) {
+      console.log(`  Processing template ${i}/${templateCount}...`);
+    }
+
+    if (!row.workflow_json_compressed) continue;
+
+    let workflow: any;
+    try {
+      const decompressed = zlib.gunzipSync(Buffer.from(row.workflow_json_compressed, 'base64'));
+      workflow = JSON.parse(decompressed.toString());
+      decompressedCount++;
+    } catch {
+      decompressFailCount++;
+      continue;
+    }
+
+    const nodes: any[] = workflow.nodes || [];
+    const connections: Record<string, any> = workflow.connections || {};
+
+    // Build name -> type map
+    const nameToType = new Map<string, string>();
+    for (const node of nodes) {
+      if (node.name && node.type && !EXCLUDED_TYPES.has(node.type)) {
+        nameToType.set(node.name, node.type);
+      }
+    }
+
+    // Build adjacency list for BFS
+    const adjacency = new Map<string, string[]>();
+
+    // Parse connections and record edges
+    for (const sourceName of Object.keys(connections)) {
+      const sourceType = nameToType.get(sourceName);
+      if (!sourceType) continue;
+
+      const mainOutputs = connections[sourceName]?.main;
+      if (!Array.isArray(mainOutputs)) continue;
+
+      for (const outputGroup of mainOutputs) {
+        if (!Array.isArray(outputGroup)) continue;
+        for (const conn of outputGroup) {
+          if (!conn || !conn.node) continue;
+          const targetName = conn.node;
+          const targetType = nameToType.get(targetName);
+          if (!targetType) continue;
+
+          // Record edge
+          const edgeKey = `${sourceType}|||${targetType}`;
+          edgeFrequency.set(edgeKey, (edgeFrequency.get(edgeKey) || 0) + 1);
+
+          // Build adjacency for chain extraction
+          if (!adjacency.has(sourceName)) {
+            adjacency.set(sourceName, []);
+          }
+          adjacency.get(sourceName)!.push(targetName);
+        }
+      }
+    }
+
+    // Find trigger nodes (nodes with no incoming connections, or type contains 'Trigger')
+    const hasIncoming = new Set<string>();
+    for (const targets of adjacency.values()) {
+      for (const target of targets) {
+        hasIncoming.add(target);
+      }
+    }
+
+    const triggerNodes = nodes.filter(n => {
+      if (EXCLUDED_TYPES.has(n.type)) return false;
+      return !hasIncoming.has(n.name) || isTriggerType(n.type);
+    });
+
+    // Pre-compute categories for this template (avoids re-parsing nodes_used per chain)
+    let templateCategories: string[] = [];
+    try {
+      if (row.nodes_used) {
+        const parsed = JSON.parse(row.nodes_used);
+        if (Array.isArray(parsed)) {
+          templateCategories = classifyTemplate(parsed.filter((t: string) => !EXCLUDED_TYPES.has(t)));
+        }
+      }
+    } catch {
+      // skip
+    }
+
+    // BFS from each trigger, extract chains of length 2-4
+    for (const trigger of triggerNodes) {
+      const queue: Array<{ nodeName: string; path: string[] }> = [
+        { nodeName: trigger.name, path: [nameToType.get(trigger.name)!] },
+      ];
+      const visited = new Set<string>([trigger.name]);
+
+      while (queue.length > 0) {
+        const { nodeName, path: currentPath } = queue.shift()!;
+
+        // Record chains of length 2, 3, 4
+        if (currentPath.length >= 2 && currentPath.length <= 4) {
+          const chainKey = currentPath.join('|||');
+          globalChains.set(chainKey, (globalChains.get(chainKey) || 0) + 1);
+
+          for (const cat of templateCategories) {
+            if (!categoryChains.has(cat)) {
+              categoryChains.set(cat, new Map());
+            }
+            const catChainMap = categoryChains.get(cat)!;
+            catChainMap.set(chainKey, (catChainMap.get(chainKey) || 0) + 1);
+          }
+        }
+
+        // Stop extending at depth 4
+        if (currentPath.length >= 4) continue;
+
+        const neighbors = adjacency.get(nodeName) || [];
+        for (const neighbor of neighbors) {
+          if (visited.has(neighbor)) continue;
+          const neighborType = nameToType.get(neighbor);
+          if (!neighborType) continue;
+          visited.add(neighbor);
+          queue.push({ nodeName: neighbor, path: [...currentPath, neighborType] });
+        }
+      }
+    }
+  }
+
+  const pass2Time = ((Date.now() - pass2Start) / 1000).toFixed(1);
+  console.log(`Pass 2 complete: ${pass2Time}s`);
+  console.log(`  Decompressed: ${decompressedCount}, Failed: ${decompressFailCount}`);
+  console.log(`  Unique edges: ${edgeFrequency.size}`);
+  console.log(`  Unique chains: ${globalChains.size}`);
+
+  // ---- Build output ----
+  console.log('\n=== Building output ===');
+
+  // Global top nodes
+  const topNodes: NodeFrequency[] = [...nodeFrequency.entries()]
+    .sort(([, a], [, b]) => b - a)
+    .slice(0, 50)
+    .map(([type, count]) => ({
+      type,
+      count,
+      frequency: Math.round((count / templateCount) * 100) / 100,
+      displayName: getDisplayName(type),
+    }));
+
+  // Global top edges
+  const topEdges: EdgeFrequency[] = [...edgeFrequency.entries()]
+    .sort(([, a], [, b]) => b - a)
+    .slice(0, 50)
+    .map(([key, count]) => {
+      const [from, to] = key.split('|||');
+      return { from, to, count };
+    });
+
+  // Build category data
+  const categories: Record<string, CategoryData> = {};
+
+  for (const [cat, templateIds] of categoryTemplates.entries()) {
+    const catNodeMap = categoryNodes.get(cat)!;
+    const catTemplateCount = templateIds.size;
+
+    // Top nodes for category, sorted by frequency
+    const catTopNodes = [...catNodeMap.entries()]
+      .sort(([, a], [, b]) => b - a)
+      .slice(0, 20)
+      .map(([type, count]) => ({
+        type,
+        frequency: Math.round((count / catTemplateCount) * 100) / 100,
+        role: isTriggerType(type) ? 'trigger' : 'action',
+        displayName: getDisplayName(type),
+      }));
+
+    // Top chains for category
+    const catChainMap = categoryChains.get(cat) || new Map<string, number>();
+    const catTopChains: ChainFrequency[] = [...catChainMap.entries()]
+      .sort(([, a], [, b]) => b - a)
+      .slice(0, 10)
+      .map(([chainKey, count]) => ({
+        chain: chainKey.split('|||'),
+        count,
+        frequency: Math.round((count / catTemplateCount) * 100) / 100,
+      }));
+
+    // Generate pattern string from top nodes
+    // Order: triggers first, then transforms/logic, then actions
+    const triggerNodes = catTopNodes.filter(n => n.role === 'trigger').slice(0, 1);
+    const actionNodes = catTopNodes.filter(n => n.role !== 'trigger').slice(0, 3);
+    const patternParts = [...triggerNodes, ...actionNodes].map(n => n.displayName);
+    const pattern = patternParts.join(' \u2192 ') || 'Mixed workflow';
+
+    categories[cat] = {
+      templateCount: catTemplateCount,
+      pattern,
+      nodes: catTopNodes,
+      commonChains: catTopChains,
+    };
+  }
+
+  const output: PatternOutput = {
+    generatedAt: new Date().toISOString(),
+    templateCount,
+    categories,
+    global: {
+      topNodes,
+      topEdges,
+    },
+  };
+
+  // Write output
+  const outputPath = path.resolve(__dirname, '../../data/workflow-patterns.json');
+  fs.writeFileSync(outputPath, JSON.stringify(output, null, 2));
+
+  console.log(`\nWritten ${Object.keys(categories).length} categories, ${templateCount} templates analyzed`);
+  console.log(`Output: ${outputPath}`);
+  console.log(`Pass 1: ${pass1Time}s, Pass 2: ${pass2Time}s`);
+  console.log(`Total: ${((Date.now() - pass1Start) / 1000).toFixed(1)}s`);
+
+  db.close();
+}
+
+main().catch((err) => {
+  console.error('Error:', err);
+  process.exit(1);
+});