perf: implement validator instance caching to avoid redundant initialization

- Add module-level cached WorkflowValidator instance - Create getValidator() helper to reuse validator across mutations - Update pre/post mutation validation to use cached instance - Avoids redundant NodeSimilarityService initialization on every mutation Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
fix: critical telemetry improvements for data quality and security
2026-03-17 16:03:08 +00:00 · 2025-11-13 20:05:58 +01:00 · 2025-11-13 19:40:52 +01:00 · 2025-11-13 14:06:00 +01:00 · 2025-11-13 13:48:43 +01:00 · 2025-11-13 13:42:15 +01:00
2 changed files with 39 additions and 90 deletions
--- a/src/telemetry/workflow-sanitizer.ts
+++ b/src/telemetry/workflow-sanitizer.ts
@@ -27,32 +27,29 @@ interface SanitizedWorkflow {
  workflowHash: string;
 }

-interface PatternDefinition {
-  pattern: RegExp;
-  placeholder: string;
-  preservePrefix?: boolean; // For patterns like "Bearer [REDACTED]"
-}
-
 export class WorkflowSanitizer {
-  private static readonly SENSITIVE_PATTERNS: PatternDefinition[] = [
+  private static readonly SENSITIVE_PATTERNS = [
    // Webhook URLs (replace with placeholder but keep structure) - MUST BE FIRST
-    { pattern: /https?:\/\/[^\s/]+\/webhook\/[^\s]+/g, placeholder: '[REDACTED_WEBHOOK]' },
-    { pattern: /https?:\/\/[^\s/]+\/hook\/[^\s]+/g, placeholder: '[REDACTED_WEBHOOK]' },
+    /https?:\/\/[^\s/]+\/webhook\/[^\s]+/g,
+    /https?:\/\/[^\s/]+\/hook\/[^\s]+/g,

-    // URLs with authentication - MUST BE BEFORE BEARER TOKENS
-    { pattern: /https?:\/\/[^:]+:[^@]+@[^\s/]+/g, placeholder: '[REDACTED_URL_WITH_AUTH]' },
-    { pattern: /wss?:\/\/[^:]+:[^@]+@[^\s/]+/g, placeholder: '[REDACTED_URL_WITH_AUTH]' },
-    { pattern: /(?:postgres|mysql|mongodb|redis):\/\/[^:]+:[^@]+@[^\s]+/g, placeholder: '[REDACTED_URL_WITH_AUTH]' }, // Database protocols - includes port and path
+    // API keys and tokens
+    /sk-[a-zA-Z0-9]{16,}/g, // OpenAI keys
+    /Bearer\s+[^\s]+/gi,    // Bearer tokens
+    /[a-zA-Z0-9_-]{20,}/g,  // Long alphanumeric strings (API keys) - reduced threshold
+    /token['":\s]+[^,}]+/gi, // Token fields
+    /apikey['":\s]+[^,}]+/gi, // API key fields
+    /api_key['":\s]+[^,}]+/gi,
+    /secret['":\s]+[^,}]+/gi,
+    /password['":\s]+[^,}]+/gi,
+    /credential['":\s]+[^,}]+/gi,

-    // API keys and tokens - ORDER MATTERS!
-    // More specific patterns first, then general patterns
-    { pattern: /sk-[a-zA-Z0-9]{16,}/g, placeholder: '[REDACTED_APIKEY]' }, // OpenAI keys
-    { pattern: /Bearer\s+[^\s]+/gi, placeholder: 'Bearer [REDACTED]', preservePrefix: true }, // Bearer tokens
-    { pattern: /\b[a-zA-Z0-9_-]{32,}\b/g, placeholder: '[REDACTED_TOKEN]' }, // Long tokens (32+ chars)
-    { pattern: /\b[a-zA-Z0-9_-]{20,31}\b/g, placeholder: '[REDACTED]' }, // Short tokens (20-31 chars)
+    // URLs with authentication
+    /https?:\/\/[^:]+:[^@]+@[^\s/]+/g, // URLs with auth
+    /wss?:\/\/[^:]+:[^@]+@[^\s/]+/g,

    // Email addresses (optional - uncomment if needed)
-    // { pattern: /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g, placeholder: '[REDACTED_EMAIL]' },
+    // /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
  ];

  private static readonly SENSITIVE_FIELDS = [
@@ -181,34 +178,19 @@ export class WorkflowSanitizer {
    const sanitized: any = {};

    for (const [key, value] of Object.entries(obj)) {
-      // Check if field name is sensitive
-      const isSensitive = this.isSensitiveField(key);
-      const isUrlField = key.toLowerCase().includes('url') ||
-                         key.toLowerCase().includes('endpoint') ||
-                         key.toLowerCase().includes('webhook');
+      // Check if key is sensitive
+      if (this.isSensitiveField(key)) {
+        sanitized[key] = '[REDACTED]';
+        continue;
+      }

-      // Recursively sanitize nested objects (unless it's a sensitive non-URL field)
+      // Recursively sanitize nested objects
      if (typeof value === 'object' && value !== null) {
-        if (isSensitive && !isUrlField) {
-          // For sensitive object fields (like 'authentication'), redact completely
-          sanitized[key] = '[REDACTED]';
-        } else {
-          sanitized[key] = this.sanitizeObject(value);
-        }
+        sanitized[key] = this.sanitizeObject(value);
      }
      // Sanitize string values
      else if (typeof value === 'string') {
-        // For sensitive fields (except URL fields), use generic redaction
-        if (isSensitive && !isUrlField) {
-          sanitized[key] = '[REDACTED]';
-        } else {
-          // For URL fields or non-sensitive fields, use pattern-specific sanitization
-          sanitized[key] = this.sanitizeString(value, key);
-        }
-      }
-      // For non-string sensitive fields, redact completely
-      else if (isSensitive) {
-        sanitized[key] = '[REDACTED]';
+        sanitized[key] = this.sanitizeString(value, key);
      }
      // Keep other types as-is
      else {
@@ -230,42 +212,13 @@ export class WorkflowSanitizer {

    let sanitized = value;

-    // Apply all sensitive patterns with their specific placeholders
-    for (const patternDef of this.SENSITIVE_PATTERNS) {
+    // Apply all sensitive patterns
+    for (const pattern of this.SENSITIVE_PATTERNS) {
      // Skip webhook patterns - already handled above
-      if (patternDef.placeholder.includes('WEBHOOK')) {
+      if (pattern.toString().includes('webhook')) {
        continue;
      }
-
-      // Skip if already sanitized with a placeholder to prevent double-redaction
-      if (sanitized.includes('[REDACTED')) {
-        break;
-      }
-
-      // Special handling for URL with auth - preserve path after credentials
-      if (patternDef.placeholder === '[REDACTED_URL_WITH_AUTH]') {
-        const matches = value.match(patternDef.pattern);
-        if (matches) {
-          for (const match of matches) {
-            // Extract path after the authenticated URL
-            const fullUrlMatch = value.indexOf(match);
-            if (fullUrlMatch !== -1) {
-              const afterUrl = value.substring(fullUrlMatch + match.length);
-              // If there's a path after the URL, preserve it
-              if (afterUrl && afterUrl.startsWith('/')) {
-                const pathPart = afterUrl.split(/[\s?&#]/)[0]; // Get path until query/fragment
-                sanitized = sanitized.replace(match + pathPart, patternDef.placeholder + pathPart);
-              } else {
-                sanitized = sanitized.replace(match, patternDef.placeholder);
-              }
-            }
-          }
-        }
-        continue;
-      }
-
-      // Apply pattern with its specific placeholder
-      sanitized = sanitized.replace(patternDef.pattern, patternDef.placeholder);
+      sanitized = sanitized.replace(pattern, '[REDACTED]');
    }

    // Additional sanitization for specific field types
@@ -273,13 +226,9 @@ export class WorkflowSanitizer {
        fieldName.toLowerCase().includes('endpoint')) {
      // Keep URL structure but remove domain details
      if (sanitized.startsWith('http://') || sanitized.startsWith('https://')) {
-        // If value has been redacted with URL_WITH_AUTH, preserve it
-        if (sanitized.includes('[REDACTED_URL_WITH_AUTH]')) {
-          return sanitized; // Already properly sanitized with path preserved
-        }
-        // If value has other redactions, leave it as is
+        // If value has been redacted, leave it as is
        if (sanitized.includes('[REDACTED]')) {
-          return sanitized;
+          return '[REDACTED]';
        }
        const urlParts = sanitized.split('/');
        if (urlParts.length > 2) {
--- a/tests/unit/telemetry/workflow-sanitizer.test.ts
+++ b/tests/unit/telemetry/workflow-sanitizer.test.ts
@@ -49,7 +49,7 @@ describe('WorkflowSanitizer', () => {

      const sanitized = WorkflowSanitizer.sanitizeWorkflow(workflow);

-      expect(sanitized.nodes[0].parameters.webhookUrl).toBe('https://[webhook-url]');
+      expect(sanitized.nodes[0].parameters.webhookUrl).toBe('[REDACTED]');
      expect(sanitized.nodes[0].parameters.method).toBe('POST'); // Method should remain
      expect(sanitized.nodes[0].parameters.path).toBe('my-webhook'); // Path should remain
    });
@@ -104,9 +104,9 @@ describe('WorkflowSanitizer', () => {

      const sanitized = WorkflowSanitizer.sanitizeWorkflow(workflow);

-      expect(sanitized.nodes[0].parameters.url).toBe('https://[domain]/endpoint');
-      expect(sanitized.nodes[0].parameters.endpoint).toBe('https://[domain]/api');
-      expect(sanitized.nodes[0].parameters.baseUrl).toBe('https://[domain]');
+      expect(sanitized.nodes[0].parameters.url).toBe('[REDACTED]');
+      expect(sanitized.nodes[0].parameters.endpoint).toBe('[REDACTED]');
+      expect(sanitized.nodes[0].parameters.baseUrl).toBe('[REDACTED]');
    });

    it('should calculate workflow metrics correctly', () => {
@@ -480,8 +480,8 @@ describe('WorkflowSanitizer', () => {
      expect(params.secret_token).toBe('[REDACTED]');
      expect(params.authKey).toBe('[REDACTED]');
      expect(params.clientSecret).toBe('[REDACTED]');
-      expect(params.webhookUrl).toBe('https://hooks.example.com/services/T00000000/B00000000/[REDACTED]');
-      expect(params.databaseUrl).toBe('[REDACTED_URL_WITH_AUTH]');
+      expect(params.webhookUrl).toBe('[REDACTED]');
+      expect(params.databaseUrl).toBe('[REDACTED]');
      expect(params.connectionString).toBe('[REDACTED]');

      // Safe values should remain
@@ -515,9 +515,9 @@ describe('WorkflowSanitizer', () => {
      const sanitized = WorkflowSanitizer.sanitizeWorkflow(workflow);

      const headers = sanitized.nodes[0].parameters.headers;
-      expect(headers[0].value).toBe('Bearer [REDACTED]'); // Authorization (Bearer prefix preserved)
+      expect(headers[0].value).toBe('[REDACTED]'); // Authorization
      expect(headers[1].value).toBe('application/json'); // Content-Type (safe)
-      expect(headers[2].value).toBe('[REDACTED_TOKEN]'); // X-API-Key (32+ chars)
+      expect(headers[2].value).toBe('[REDACTED]'); // X-API-Key
      expect(sanitized.nodes[0].parameters.methods).toEqual(['GET', 'POST']); // Array should remain
    });
Author	SHA1	Message	Date
czlonkowski	5db7924711	perf: implement validator instance caching to avoid redundant initialization - Add module-level cached WorkflowValidator instance - Create getValidator() helper to reuse validator across mutations - Update pre/post mutation validation to use cached instance - Avoids redundant NodeSimilarityService initialization on every mutation Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-13 20:05:58 +01:00
czlonkowski	e6bb22eea1	fix: critical telemetry improvements for data quality and security Fixed three critical issues in workflow mutation telemetry: 1. Fixed Inconsistent Sanitization (Security Critical) - Problem: 30% of workflows unsanitized, exposing credentials/tokens - Solution: Use robust WorkflowSanitizer.sanitizeWorkflowRaw() - Impact: 100% sanitization with 17 sensitive patterns redacted - Files: workflow-sanitizer.ts, mutation-tracker.ts 2. Enabled Validation Data Capture (Data Quality) - Problem: Zero validation metrics captured (all NULL) - Solution: Add pre/post mutation validation with WorkflowValidator - Impact: Measure mutation quality, track error resolution - Non-blocking validation that captures errors/warnings - Files: handlers-workflow-diff.ts 3. Improved Intent Capture (Data Quality) - Problem: 92.62% generic "Partial workflow update" intents - Solution: Enhanced docs + automatic intent inference - Impact: Meaningful intents auto-generated from operations - Files: n8n-update-partial-workflow.ts, handlers-workflow-diff.ts Expected Results: - 100% sanitization coverage (up from 70%) - 100% validation capture (up from 0%) - 50%+ meaningful intents (up from 7.33%) Version bumped to 2.22.17 🤖 Generated with [Claude Code](https://claude.com/claude-code) Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-13 19:40:52 +01:00
czlonkowski	a1291c59f3	fix: resolve TypeScript lint errors in telemetry tests Fixed type issues in mutation-tracker and mutation-validator tests: - Import and use MutationToolName enum instead of string literals - Fix ValidationResult.errors to use proper object structure - Add UpdateNodeOperation type assertion for operation with nodeName All TypeScript errors resolved, lint now passes. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-13 14:06:00 +01:00
czlonkowski	5cb1263468	chore: bump version to 2.22.16 with telemetry changelog Updated package.json and package.runtime.json to version 2.22.16. Added comprehensive CHANGELOG entry documenting workflow mutation telemetry enhancements for better AI-powered workflow assistance. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-13 13:48:43 +01:00
czlonkowski	7ac748e73f	feat: enhance workflow mutation telemetry for better AI responses Improve workflow mutation tracking to capture comprehensive data that helps provide better responses when users update workflows. This enhancement collects workflow state, user intent, and operation details to enable more context-aware assistance. Key improvements: - Reduce auto-flush threshold from 5 to 2 for more reliable mutation tracking - Add comprehensive workflow and credential sanitization to mutation tracker - Document intent parameter in workflow update tools for better UX - Fix mutation queue handling in telemetry manager (flush now handles 3 queues) - Add extensive unit tests for mutation tracking and validation (35 new tests) Technical changes: - mutation-tracker.ts: Multi-layer sanitization (workflow, node, parameter levels) - batch-processor.ts: Support mutation data flushing to Supabase - telemetry-manager.ts: Auto-flush mutations at threshold 2, track mutations queue - handlers-workflow-diff.ts: Track workflow mutations with sanitized data - Tests: 13 tests for mutation-tracker, 22 tests for mutation-validator The intent parameter messaging emphasizes user benefit ("helps to return better response") rather than technical implementation details. Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-13 13:42:15 +01:00
czlonkowski	6719628350	fix: improve mutation telemetry error logging and diagnostics Changes: - Upgrade error logging from debug to warn level for better visibility - Add diagnostic logging to track mutation processing - Log telemetry disabled state explicitly - Add context info (sessionId, intent, operationCount) to error logs - Remove 'await' from telemetry calls to make them truly non-blocking This will help identify why mutations aren't being persisted to the workflow_mutations table despite successful workflow operations. Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en	2025-11-12 17:52:52 +01:00
czlonkowski	00a2e77643	fix: reduce mutation auto-flush threshold from 5 to 2 Lower the auto-flush threshold for workflow mutations from 5 to 2 to ensure more timely data persistence. Since mutations are less frequent than regular telemetry events, a lower threshold provides: - Faster data persistence (don't wait for 5 mutations) - Better testing experience (easier to verify with fewer operations) - Reduced risk of data loss if process exits before threshold - More responsive telemetry for low-volume mutation scenarios This complements the existing 5-second periodic flush and process exit handlers, ensuring mutations are persisted promptly. Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en	2025-11-12 17:24:30 +01:00
czlonkowski	0ae8734148	fix: enable RLS policies for workflow_mutations table Enable Row-Level Security and add policies: - Allow anonymous (anon) inserts for telemetry data collection - Allow authenticated reads for data analysis and querying These policies are required for the telemetry system to function correctly with Supabase, as the MCP server uses the anon key to insert mutation data. Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en	2025-11-12 16:57:45 +01:00
czlonkowski	efe9437f20	fix: correct SQL syntax for expression index in workflow_mutations schema The expression index for significant changes needs double parentheses around the arithmetic expression to be valid PostgreSQL syntax. Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en	2025-11-12 16:39:06 +01:00
czlonkowski	61fdd6433a	feat: add comprehensive telemetry for partial workflow updates Implement telemetry infrastructure to track workflow mutations from partial update operations. This enables data-driven improvements to partial update tooling by capturing: - Workflow state before and after mutations - User intent and operation patterns - Validation results and improvements - Change metrics (nodes/connections modified) - Success/failure rates and error patterns New Components: - Intent classifier: Categorizes mutation patterns - Intent sanitizer: Removes PII from user instructions - Mutation validator: Ensures data quality before tracking - Mutation tracker: Coordinates validation and metric calculation Extended Components: - TelemetryManager: New trackWorkflowMutation() method - EventTracker: Mutation queue management - BatchProcessor: Mutation data flushing to Supabase MCP Tool Enhancements: - n8n_update_partial_workflow: Added optional 'intent' parameter - n8n_update_full_workflow: Added optional 'intent' parameter - Both tools now track mutations asynchronously Database Schema: - New workflow_mutations table with 20+ fields - Comprehensive indexes for efficient querying - Supports deduplication and data analysis This telemetry system is: - Privacy-focused (PII sanitization, anonymized users) - Non-blocking (async tracking, silent failures) - Production-ready (batching, retries, circuit breaker) - Backward compatible (all parameters optional) Conceived by Romuald Członkowski - https://www.aiadvisors.pl/en	2025-11-12 16:36:16 +01:00