mirror of
https://github.com/czlonkowski/n8n-mcp.git
synced 2026-01-30 06:22:04 +00:00
- Added OperationSimilarityService for validating operations with "Did you mean...?" suggestions - Added ResourceSimilarityService for validating resources with plural/singular detection - Implements Levenshtein distance algorithm for typo detection - Pattern matching for common operation/resource mistakes - 5-minute cache with automatic cleanup to prevent memory leaks - Confidence scoring (30% minimum threshold) for suggestion quality - Resource-aware operation filtering for contextual suggestions - Safe JSON parsing with ValidationServiceError for proper error handling - Type guards for safe property access - Performance optimizations with early termination - Comprehensive test coverage (37 new tests) - Integration tested with n8n-mcp-tester agent Example use cases: - "listFiles" → suggests "search" for Google Drive - "files" → suggests singular "file" - "flie" → suggests "file" (typo correction) - "downlod" → suggests "download" 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
522 lines
19 KiB
TypeScript
522 lines
19 KiB
TypeScript
import { NodeRepository } from '../database/node-repository';
|
|
import { logger } from '../utils/logger';
|
|
import { ValidationServiceError } from '../errors/validation-service-error';
|
|
|
|
export interface ResourceSuggestion {
|
|
value: string;
|
|
confidence: number;
|
|
reason: string;
|
|
availableOperations?: string[];
|
|
}
|
|
|
|
interface ResourcePattern {
|
|
pattern: string;
|
|
suggestion: string;
|
|
confidence: number;
|
|
reason: string;
|
|
}
|
|
|
|
export class ResourceSimilarityService {
|
|
private static readonly CACHE_DURATION_MS = 5 * 60 * 1000; // 5 minutes
|
|
private static readonly MIN_CONFIDENCE = 0.3; // 30% minimum confidence to suggest
|
|
private static readonly MAX_SUGGESTIONS = 5;
|
|
|
|
// Confidence thresholds for better code clarity
|
|
private static readonly CONFIDENCE_THRESHOLDS = {
|
|
EXACT: 1.0,
|
|
VERY_HIGH: 0.95,
|
|
HIGH: 0.8,
|
|
MEDIUM: 0.6,
|
|
MIN_SUBSTRING: 0.7
|
|
} as const;
|
|
|
|
private repository: NodeRepository;
|
|
private resourceCache: Map<string, { resources: any[], timestamp: number }> = new Map();
|
|
private suggestionCache: Map<string, ResourceSuggestion[]> = new Map();
|
|
private commonPatterns: Map<string, ResourcePattern[]>;
|
|
|
|
constructor(repository: NodeRepository) {
|
|
this.repository = repository;
|
|
this.commonPatterns = this.initializeCommonPatterns();
|
|
}
|
|
|
|
/**
|
|
* Clean up expired cache entries to prevent memory leaks
|
|
*/
|
|
private cleanupExpiredEntries(): void {
|
|
const now = Date.now();
|
|
|
|
// Clean resource cache
|
|
for (const [key, value] of this.resourceCache.entries()) {
|
|
if (now - value.timestamp >= ResourceSimilarityService.CACHE_DURATION_MS) {
|
|
this.resourceCache.delete(key);
|
|
}
|
|
}
|
|
|
|
// Clean suggestion cache - these don't have timestamps, so clear if cache is too large
|
|
if (this.suggestionCache.size > 100) {
|
|
// Keep only the most recent 50 entries
|
|
const entries = Array.from(this.suggestionCache.entries());
|
|
this.suggestionCache.clear();
|
|
entries.slice(-50).forEach(([key, value]) => {
|
|
this.suggestionCache.set(key, value);
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Initialize common resource mistake patterns
|
|
*/
|
|
private initializeCommonPatterns(): Map<string, ResourcePattern[]> {
|
|
const patterns = new Map<string, ResourcePattern[]>();
|
|
|
|
// Google Drive patterns
|
|
patterns.set('googleDrive', [
|
|
{ pattern: 'files', suggestion: 'file', confidence: 0.95, reason: 'Use singular "file" not plural' },
|
|
{ pattern: 'folders', suggestion: 'folder', confidence: 0.95, reason: 'Use singular "folder" not plural' },
|
|
{ pattern: 'permissions', suggestion: 'permission', confidence: 0.9, reason: 'Use singular form' },
|
|
{ pattern: 'fileAndFolder', suggestion: 'fileFolder', confidence: 0.9, reason: 'Use "fileFolder" for combined operations' },
|
|
{ pattern: 'driveFiles', suggestion: 'file', confidence: 0.8, reason: 'Use "file" for file operations' },
|
|
{ pattern: 'sharedDrives', suggestion: 'drive', confidence: 0.85, reason: 'Use "drive" for shared drive operations' },
|
|
]);
|
|
|
|
// Slack patterns
|
|
patterns.set('slack', [
|
|
{ pattern: 'messages', suggestion: 'message', confidence: 0.95, reason: 'Use singular "message" not plural' },
|
|
{ pattern: 'channels', suggestion: 'channel', confidence: 0.95, reason: 'Use singular "channel" not plural' },
|
|
{ pattern: 'users', suggestion: 'user', confidence: 0.95, reason: 'Use singular "user" not plural' },
|
|
{ pattern: 'msg', suggestion: 'message', confidence: 0.85, reason: 'Use full "message" not abbreviation' },
|
|
{ pattern: 'dm', suggestion: 'message', confidence: 0.7, reason: 'Use "message" for direct messages' },
|
|
{ pattern: 'conversation', suggestion: 'channel', confidence: 0.7, reason: 'Use "channel" for conversations' },
|
|
]);
|
|
|
|
// Database patterns (postgres, mysql, mongodb)
|
|
patterns.set('database', [
|
|
{ pattern: 'tables', suggestion: 'table', confidence: 0.95, reason: 'Use singular "table" not plural' },
|
|
{ pattern: 'queries', suggestion: 'query', confidence: 0.95, reason: 'Use singular "query" not plural' },
|
|
{ pattern: 'collections', suggestion: 'collection', confidence: 0.95, reason: 'Use singular "collection" not plural' },
|
|
{ pattern: 'documents', suggestion: 'document', confidence: 0.95, reason: 'Use singular "document" not plural' },
|
|
{ pattern: 'records', suggestion: 'record', confidence: 0.85, reason: 'Use "record" or "document"' },
|
|
{ pattern: 'rows', suggestion: 'row', confidence: 0.9, reason: 'Use singular "row"' },
|
|
]);
|
|
|
|
// Google Sheets patterns
|
|
patterns.set('googleSheets', [
|
|
{ pattern: 'sheets', suggestion: 'sheet', confidence: 0.95, reason: 'Use singular "sheet" not plural' },
|
|
{ pattern: 'spreadsheets', suggestion: 'spreadsheet', confidence: 0.95, reason: 'Use singular "spreadsheet"' },
|
|
{ pattern: 'cells', suggestion: 'cell', confidence: 0.9, reason: 'Use singular "cell"' },
|
|
{ pattern: 'ranges', suggestion: 'range', confidence: 0.9, reason: 'Use singular "range"' },
|
|
{ pattern: 'worksheets', suggestion: 'sheet', confidence: 0.8, reason: 'Use "sheet" for worksheet operations' },
|
|
]);
|
|
|
|
// Email patterns
|
|
patterns.set('email', [
|
|
{ pattern: 'emails', suggestion: 'email', confidence: 0.95, reason: 'Use singular "email" not plural' },
|
|
{ pattern: 'messages', suggestion: 'message', confidence: 0.9, reason: 'Use "message" for email operations' },
|
|
{ pattern: 'mails', suggestion: 'email', confidence: 0.9, reason: 'Use "email" not "mail"' },
|
|
{ pattern: 'attachments', suggestion: 'attachment', confidence: 0.95, reason: 'Use singular "attachment"' },
|
|
]);
|
|
|
|
// Generic plural/singular patterns
|
|
patterns.set('generic', [
|
|
{ pattern: 'items', suggestion: 'item', confidence: 0.9, reason: 'Use singular form' },
|
|
{ pattern: 'objects', suggestion: 'object', confidence: 0.9, reason: 'Use singular form' },
|
|
{ pattern: 'entities', suggestion: 'entity', confidence: 0.9, reason: 'Use singular form' },
|
|
{ pattern: 'resources', suggestion: 'resource', confidence: 0.9, reason: 'Use singular form' },
|
|
{ pattern: 'elements', suggestion: 'element', confidence: 0.9, reason: 'Use singular form' },
|
|
]);
|
|
|
|
return patterns;
|
|
}
|
|
|
|
/**
|
|
* Find similar resources for an invalid resource using pattern matching
|
|
* and Levenshtein distance algorithms
|
|
*
|
|
* @param nodeType - The n8n node type (e.g., 'nodes-base.googleDrive')
|
|
* @param invalidResource - The invalid resource provided by the user
|
|
* @param maxSuggestions - Maximum number of suggestions to return (default: 5)
|
|
* @returns Array of resource suggestions sorted by confidence
|
|
*
|
|
* @example
|
|
* findSimilarResources('nodes-base.googleDrive', 'files', 3)
|
|
* // Returns: [{ value: 'file', confidence: 0.95, reason: 'Use singular "file" not plural' }]
|
|
*/
|
|
findSimilarResources(
|
|
nodeType: string,
|
|
invalidResource: string,
|
|
maxSuggestions: number = ResourceSimilarityService.MAX_SUGGESTIONS
|
|
): ResourceSuggestion[] {
|
|
// Clean up expired cache entries periodically
|
|
if (Math.random() < 0.1) { // 10% chance to cleanup on each call
|
|
this.cleanupExpiredEntries();
|
|
}
|
|
// Check cache first
|
|
const cacheKey = `${nodeType}:${invalidResource}`;
|
|
if (this.suggestionCache.has(cacheKey)) {
|
|
return this.suggestionCache.get(cacheKey)!;
|
|
}
|
|
|
|
const suggestions: ResourceSuggestion[] = [];
|
|
|
|
// Get valid resources for the node
|
|
const validResources = this.getNodeResources(nodeType);
|
|
|
|
// Early termination for exact match - no suggestions needed
|
|
for (const resource of validResources) {
|
|
const resourceValue = this.getResourceValue(resource);
|
|
if (resourceValue.toLowerCase() === invalidResource.toLowerCase()) {
|
|
return []; // Valid resource, no suggestions needed
|
|
}
|
|
}
|
|
|
|
// Check for exact pattern matches first
|
|
const nodePatterns = this.getNodePatterns(nodeType);
|
|
for (const pattern of nodePatterns) {
|
|
if (pattern.pattern.toLowerCase() === invalidResource.toLowerCase()) {
|
|
// Check if the suggested resource actually exists with type safety
|
|
const exists = validResources.some(r => {
|
|
const resourceValue = this.getResourceValue(r);
|
|
return resourceValue === pattern.suggestion;
|
|
});
|
|
if (exists) {
|
|
suggestions.push({
|
|
value: pattern.suggestion,
|
|
confidence: pattern.confidence,
|
|
reason: pattern.reason
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Handle automatic plural/singular conversion
|
|
const singularForm = this.toSingular(invalidResource);
|
|
const pluralForm = this.toPlural(invalidResource);
|
|
|
|
for (const resource of validResources) {
|
|
const resourceValue = this.getResourceValue(resource);
|
|
|
|
// Check for plural/singular match
|
|
if (resourceValue === singularForm || resourceValue === pluralForm) {
|
|
if (!suggestions.some(s => s.value === resourceValue)) {
|
|
suggestions.push({
|
|
value: resourceValue,
|
|
confidence: 0.9,
|
|
reason: invalidResource.endsWith('s') ?
|
|
'Use singular form for resources' :
|
|
'Incorrect plural/singular form',
|
|
availableOperations: typeof resource === 'object' ? resource.operations : undefined
|
|
});
|
|
}
|
|
}
|
|
|
|
// Calculate similarity
|
|
const similarity = this.calculateSimilarity(invalidResource, resourceValue);
|
|
if (similarity >= ResourceSimilarityService.MIN_CONFIDENCE) {
|
|
if (!suggestions.some(s => s.value === resourceValue)) {
|
|
suggestions.push({
|
|
value: resourceValue,
|
|
confidence: similarity,
|
|
reason: this.getSimilarityReason(similarity, invalidResource, resourceValue),
|
|
availableOperations: typeof resource === 'object' ? resource.operations : undefined
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort by confidence and limit
|
|
suggestions.sort((a, b) => b.confidence - a.confidence);
|
|
const topSuggestions = suggestions.slice(0, maxSuggestions);
|
|
|
|
// Cache the result
|
|
this.suggestionCache.set(cacheKey, topSuggestions);
|
|
|
|
return topSuggestions;
|
|
}
|
|
|
|
/**
|
|
* Type-safe extraction of resource value from various formats
|
|
* @param resource - Resource object or string
|
|
* @returns The resource value as a string
|
|
*/
|
|
private getResourceValue(resource: any): string {
|
|
if (typeof resource === 'string') {
|
|
return resource;
|
|
}
|
|
if (typeof resource === 'object' && resource !== null) {
|
|
return resource.value || '';
|
|
}
|
|
return '';
|
|
}
|
|
|
|
/**
|
|
* Get resources for a node with caching
|
|
*/
|
|
private getNodeResources(nodeType: string): any[] {
|
|
// Cleanup cache periodically
|
|
if (Math.random() < 0.05) { // 5% chance
|
|
this.cleanupExpiredEntries();
|
|
}
|
|
|
|
const cacheKey = nodeType;
|
|
const cached = this.resourceCache.get(cacheKey);
|
|
|
|
if (cached && Date.now() - cached.timestamp < ResourceSimilarityService.CACHE_DURATION_MS) {
|
|
return cached.resources;
|
|
}
|
|
|
|
const nodeInfo = this.repository.getNode(nodeType);
|
|
if (!nodeInfo) return [];
|
|
|
|
const resources: any[] = [];
|
|
const resourceMap: Map<string, string[]> = new Map();
|
|
|
|
// Parse properties for resource fields
|
|
try {
|
|
const properties = nodeInfo.properties || [];
|
|
for (const prop of properties) {
|
|
if (prop.name === 'resource' && prop.options) {
|
|
for (const option of prop.options) {
|
|
resources.push({
|
|
value: option.value,
|
|
name: option.name,
|
|
operations: []
|
|
});
|
|
resourceMap.set(option.value, []);
|
|
}
|
|
}
|
|
|
|
// Find operations for each resource
|
|
if (prop.name === 'operation' && prop.displayOptions?.show?.resource) {
|
|
const resourceValues = Array.isArray(prop.displayOptions.show.resource)
|
|
? prop.displayOptions.show.resource
|
|
: [prop.displayOptions.show.resource];
|
|
|
|
for (const resourceValue of resourceValues) {
|
|
if (resourceMap.has(resourceValue) && prop.options) {
|
|
const ops = prop.options.map((op: any) => op.value);
|
|
resourceMap.get(resourceValue)!.push(...ops);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update resources with their operations
|
|
for (const resource of resources) {
|
|
if (resourceMap.has(resource.value)) {
|
|
resource.operations = resourceMap.get(resource.value);
|
|
}
|
|
}
|
|
|
|
// If no explicit resources, check for common patterns
|
|
if (resources.length === 0) {
|
|
// Some nodes don't have explicit resource fields
|
|
const implicitResources = this.extractImplicitResources(properties);
|
|
resources.push(...implicitResources);
|
|
}
|
|
} catch (error) {
|
|
logger.warn(`Failed to extract resources for ${nodeType}:`, error);
|
|
}
|
|
|
|
// Cache and return
|
|
this.resourceCache.set(cacheKey, { resources, timestamp: Date.now() });
|
|
return resources;
|
|
}
|
|
|
|
/**
|
|
* Extract implicit resources from node properties
|
|
*/
|
|
private extractImplicitResources(properties: any[]): any[] {
|
|
const resources: any[] = [];
|
|
|
|
// Look for properties that suggest resources
|
|
for (const prop of properties) {
|
|
if (prop.name === 'operation' && prop.options) {
|
|
// If there's no explicit resource field, operations might imply resources
|
|
const resourceFromOps = this.inferResourceFromOperations(prop.options);
|
|
if (resourceFromOps) {
|
|
resources.push({
|
|
value: resourceFromOps,
|
|
name: resourceFromOps.charAt(0).toUpperCase() + resourceFromOps.slice(1),
|
|
operations: prop.options.map((op: any) => op.value)
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return resources;
|
|
}
|
|
|
|
/**
|
|
* Infer resource type from operations
|
|
*/
|
|
private inferResourceFromOperations(operations: any[]): string | null {
|
|
// Common patterns in operation names that suggest resources
|
|
const patterns = [
|
|
{ keywords: ['file', 'upload', 'download'], resource: 'file' },
|
|
{ keywords: ['folder', 'directory'], resource: 'folder' },
|
|
{ keywords: ['message', 'send', 'reply'], resource: 'message' },
|
|
{ keywords: ['channel', 'broadcast'], resource: 'channel' },
|
|
{ keywords: ['user', 'member'], resource: 'user' },
|
|
{ keywords: ['table', 'row', 'column'], resource: 'table' },
|
|
{ keywords: ['document', 'doc'], resource: 'document' },
|
|
];
|
|
|
|
for (const pattern of patterns) {
|
|
for (const op of operations) {
|
|
const opName = (op.value || op).toLowerCase();
|
|
if (pattern.keywords.some(keyword => opName.includes(keyword))) {
|
|
return pattern.resource;
|
|
}
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Get patterns for a specific node type
|
|
*/
|
|
private getNodePatterns(nodeType: string): ResourcePattern[] {
|
|
const patterns: ResourcePattern[] = [];
|
|
|
|
// Add node-specific patterns
|
|
if (nodeType.includes('googleDrive')) {
|
|
patterns.push(...(this.commonPatterns.get('googleDrive') || []));
|
|
} else if (nodeType.includes('slack')) {
|
|
patterns.push(...(this.commonPatterns.get('slack') || []));
|
|
} else if (nodeType.includes('postgres') || nodeType.includes('mysql') || nodeType.includes('mongodb')) {
|
|
patterns.push(...(this.commonPatterns.get('database') || []));
|
|
} else if (nodeType.includes('googleSheets')) {
|
|
patterns.push(...(this.commonPatterns.get('googleSheets') || []));
|
|
} else if (nodeType.includes('gmail') || nodeType.includes('email')) {
|
|
patterns.push(...(this.commonPatterns.get('email') || []));
|
|
}
|
|
|
|
// Always add generic patterns
|
|
patterns.push(...(this.commonPatterns.get('generic') || []));
|
|
|
|
return patterns;
|
|
}
|
|
|
|
/**
|
|
* Convert to singular form (simple heuristic)
|
|
*/
|
|
private toSingular(word: string): string {
|
|
if (word.endsWith('ies')) {
|
|
return word.slice(0, -3) + 'y';
|
|
} else if (word.endsWith('es')) {
|
|
return word.slice(0, -2);
|
|
} else if (word.endsWith('s') && !word.endsWith('ss')) {
|
|
return word.slice(0, -1);
|
|
}
|
|
return word;
|
|
}
|
|
|
|
/**
|
|
* Convert to plural form (simple heuristic)
|
|
*/
|
|
private toPlural(word: string): string {
|
|
if (word.endsWith('y') && !['ay', 'ey', 'iy', 'oy', 'uy'].includes(word.slice(-2))) {
|
|
return word.slice(0, -1) + 'ies';
|
|
} else if (word.endsWith('s') || word.endsWith('x') || word.endsWith('z') ||
|
|
word.endsWith('ch') || word.endsWith('sh')) {
|
|
return word + 'es';
|
|
} else {
|
|
return word + 's';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculate similarity between two strings using Levenshtein distance
|
|
*/
|
|
private calculateSimilarity(str1: string, str2: string): number {
|
|
const s1 = str1.toLowerCase();
|
|
const s2 = str2.toLowerCase();
|
|
|
|
// Exact match
|
|
if (s1 === s2) return 1.0;
|
|
|
|
// One is substring of the other
|
|
if (s1.includes(s2) || s2.includes(s1)) {
|
|
const ratio = Math.min(s1.length, s2.length) / Math.max(s1.length, s2.length);
|
|
return Math.max(ResourceSimilarityService.CONFIDENCE_THRESHOLDS.MIN_SUBSTRING, ratio);
|
|
}
|
|
|
|
// Calculate Levenshtein distance
|
|
const distance = this.levenshteinDistance(s1, s2);
|
|
const maxLength = Math.max(s1.length, s2.length);
|
|
|
|
// Convert distance to similarity
|
|
let similarity = 1 - (distance / maxLength);
|
|
|
|
// Boost confidence for single character typos and transpositions in short words
|
|
if (distance === 1 && maxLength <= 5) {
|
|
similarity = Math.max(similarity, 0.75);
|
|
} else if (distance === 2 && maxLength <= 5) {
|
|
// Boost for transpositions (e.g., "flie" -> "file")
|
|
similarity = Math.max(similarity, 0.72);
|
|
}
|
|
|
|
return similarity;
|
|
}
|
|
|
|
/**
|
|
* Calculate Levenshtein distance between two strings
|
|
*/
|
|
private levenshteinDistance(str1: string, str2: string): number {
|
|
const m = str1.length;
|
|
const n = str2.length;
|
|
const dp: number[][] = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
|
|
|
|
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
|
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
|
|
|
for (let i = 1; i <= m; i++) {
|
|
for (let j = 1; j <= n; j++) {
|
|
if (str1[i - 1] === str2[j - 1]) {
|
|
dp[i][j] = dp[i - 1][j - 1];
|
|
} else {
|
|
dp[i][j] = Math.min(
|
|
dp[i - 1][j] + 1, // deletion
|
|
dp[i][j - 1] + 1, // insertion
|
|
dp[i - 1][j - 1] + 1 // substitution
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
return dp[m][n];
|
|
}
|
|
|
|
/**
|
|
* Generate a human-readable reason for the similarity
|
|
* @param confidence - Similarity confidence score
|
|
* @param invalid - The invalid resource string
|
|
* @param valid - The valid resource string
|
|
* @returns Human-readable explanation of the similarity
|
|
*/
|
|
private getSimilarityReason(confidence: number, invalid: string, valid: string): string {
|
|
const { VERY_HIGH, HIGH, MEDIUM } = ResourceSimilarityService.CONFIDENCE_THRESHOLDS;
|
|
|
|
if (confidence >= VERY_HIGH) {
|
|
return 'Almost exact match - likely a typo';
|
|
} else if (confidence >= HIGH) {
|
|
return 'Very similar - common variation';
|
|
} else if (confidence >= MEDIUM) {
|
|
return 'Similar resource name';
|
|
} else if (invalid.includes(valid) || valid.includes(invalid)) {
|
|
return 'Partial match';
|
|
} else {
|
|
return 'Possibly related resource';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Clear caches
|
|
*/
|
|
clearCache(): void {
|
|
this.resourceCache.clear();
|
|
this.suggestionCache.clear();
|
|
}
|
|
} |