mirror of
https://github.com/czlonkowski/n8n-mcp.git
synced 2026-03-19 17:03:08 +00:00
feat: add operation and resource validation with intelligent suggestions
- Added OperationSimilarityService for validating operations with "Did you mean...?" suggestions - Added ResourceSimilarityService for validating resources with plural/singular detection - Implements Levenshtein distance algorithm for typo detection - Pattern matching for common operation/resource mistakes - 5-minute cache with automatic cleanup to prevent memory leaks - Confidence scoring (30% minimum threshold) for suggestion quality - Resource-aware operation filtering for contextual suggestions - Safe JSON parsing with ValidationServiceError for proper error handling - Type guards for safe property access - Performance optimizations with early termination - Comprehensive test coverage (37 new tests) - Integration tested with n8n-mcp-tester agent Example use cases: - "listFiles" → suggests "search" for Google Drive - "files" → suggests singular "file" - "flie" → suggests "file" (typo correction) - "downlod" → suggests "download" 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -8,6 +8,10 @@
|
||||
import { ConfigValidator, ValidationResult, ValidationError, ValidationWarning } from './config-validator';
|
||||
import { NodeSpecificValidators, NodeValidationContext } from './node-specific-validators';
|
||||
import { FixedCollectionValidator } from '../utils/fixed-collection-validator';
|
||||
import { OperationSimilarityService } from './operation-similarity-service';
|
||||
import { ResourceSimilarityService } from './resource-similarity-service';
|
||||
import { NodeRepository } from '../database/node-repository';
|
||||
import { DatabaseAdapter } from '../database/database-adapter';
|
||||
|
||||
export type ValidationMode = 'full' | 'operation' | 'minimal';
|
||||
export type ValidationProfile = 'strict' | 'runtime' | 'ai-friendly' | 'minimal';
|
||||
@@ -35,6 +39,18 @@ export interface OperationContext {
|
||||
}
|
||||
|
||||
export class EnhancedConfigValidator extends ConfigValidator {
|
||||
private static operationSimilarityService: OperationSimilarityService | null = null;
|
||||
private static resourceSimilarityService: ResourceSimilarityService | null = null;
|
||||
private static nodeRepository: NodeRepository | null = null;
|
||||
|
||||
/**
|
||||
* Initialize similarity services (called once at startup)
|
||||
*/
|
||||
static initializeSimilarityServices(repository: NodeRepository): void {
|
||||
this.nodeRepository = repository;
|
||||
this.operationSimilarityService = new OperationSimilarityService(repository);
|
||||
this.resourceSimilarityService = new ResourceSimilarityService(repository);
|
||||
}
|
||||
/**
|
||||
* Validate with operation awareness
|
||||
*/
|
||||
@@ -213,7 +229,10 @@ export class EnhancedConfigValidator extends ConfigValidator {
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Validate resource and operation using similarity services
|
||||
this.validateResourceAndOperation(nodeType, config, result);
|
||||
|
||||
// First, validate fixedCollection properties for known problematic nodes
|
||||
this.validateFixedCollectionStructures(nodeType, config, result);
|
||||
|
||||
@@ -642,4 +661,127 @@ export class EnhancedConfigValidator extends ConfigValidator {
|
||||
|
||||
// Add any Filter-node-specific validation here in the future
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate resource and operation values using similarity services
|
||||
*/
|
||||
private static validateResourceAndOperation(
|
||||
nodeType: string,
|
||||
config: Record<string, any>,
|
||||
result: EnhancedValidationResult
|
||||
): void {
|
||||
// Skip if similarity services not initialized
|
||||
if (!this.operationSimilarityService || !this.resourceSimilarityService || !this.nodeRepository) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate resource field if present
|
||||
if (config.resource !== undefined) {
|
||||
// Remove any existing resource error from base validator to replace with our enhanced version
|
||||
result.errors = result.errors.filter(e => e.property !== 'resource');
|
||||
const validResources = this.nodeRepository.getNodeResources(nodeType);
|
||||
const resourceIsValid = validResources.some(r => {
|
||||
const resourceValue = typeof r === 'string' ? r : r.value;
|
||||
return resourceValue === config.resource;
|
||||
});
|
||||
|
||||
if (!resourceIsValid && config.resource !== '') {
|
||||
// Find similar resources
|
||||
const suggestions = this.resourceSimilarityService.findSimilarResources(
|
||||
nodeType,
|
||||
config.resource,
|
||||
3
|
||||
);
|
||||
|
||||
// Build error message with suggestions
|
||||
let errorMessage = `Invalid resource "${config.resource}" for node ${nodeType}.`;
|
||||
let fix = '';
|
||||
|
||||
if (suggestions.length > 0) {
|
||||
const topSuggestion = suggestions[0];
|
||||
// Always use "Did you mean" for the top suggestion
|
||||
errorMessage += ` Did you mean "${topSuggestion.value}"?`;
|
||||
if (topSuggestion.confidence >= 0.8) {
|
||||
fix = `Change resource to "${topSuggestion.value}". ${topSuggestion.reason}`;
|
||||
} else {
|
||||
// For lower confidence, still show valid resources in the fix
|
||||
fix = `Valid resources: ${validResources.slice(0, 5).map(r => {
|
||||
const val = typeof r === 'string' ? r : r.value;
|
||||
return `"${val}"`;
|
||||
}).join(', ')}${validResources.length > 5 ? '...' : ''}`;
|
||||
}
|
||||
} else {
|
||||
// No similar resources found, list valid ones
|
||||
fix = `Valid resources: ${validResources.slice(0, 5).map(r => {
|
||||
const val = typeof r === 'string' ? r : r.value;
|
||||
return `"${val}"`;
|
||||
}).join(', ')}${validResources.length > 5 ? '...' : ''}`;
|
||||
}
|
||||
|
||||
result.errors.push({
|
||||
type: 'invalid_value',
|
||||
property: 'resource',
|
||||
message: errorMessage,
|
||||
fix
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Validate operation field if present
|
||||
if (config.operation !== undefined) {
|
||||
// Remove any existing operation error from base validator to replace with our enhanced version
|
||||
result.errors = result.errors.filter(e => e.property !== 'operation');
|
||||
const validOperations = this.nodeRepository.getNodeOperations(nodeType, config.resource);
|
||||
const operationIsValid = validOperations.some(op => {
|
||||
const opValue = op.operation || op.value || op;
|
||||
return opValue === config.operation;
|
||||
});
|
||||
|
||||
if (!operationIsValid && config.operation !== '') {
|
||||
// Find similar operations
|
||||
const suggestions = this.operationSimilarityService.findSimilarOperations(
|
||||
nodeType,
|
||||
config.operation,
|
||||
config.resource,
|
||||
3
|
||||
);
|
||||
|
||||
// Build error message with suggestions
|
||||
let errorMessage = `Invalid operation "${config.operation}" for node ${nodeType}`;
|
||||
if (config.resource) {
|
||||
errorMessage += ` with resource "${config.resource}"`;
|
||||
}
|
||||
errorMessage += '.';
|
||||
|
||||
let fix = '';
|
||||
|
||||
if (suggestions.length > 0) {
|
||||
const topSuggestion = suggestions[0];
|
||||
if (topSuggestion.confidence >= 0.8) {
|
||||
errorMessage += ` Did you mean "${topSuggestion.value}"?`;
|
||||
fix = `Change operation to "${topSuggestion.value}". ${topSuggestion.reason}`;
|
||||
} else {
|
||||
errorMessage += ` Similar operations: ${suggestions.map(s => `"${s.value}"`).join(', ')}`;
|
||||
fix = `Valid operations${config.resource ? ` for resource "${config.resource}"` : ''}: ${validOperations.slice(0, 5).map(op => {
|
||||
const val = op.operation || op.value || op;
|
||||
return `"${val}"`;
|
||||
}).join(', ')}${validOperations.length > 5 ? '...' : ''}`;
|
||||
}
|
||||
} else {
|
||||
// No similar operations found, list valid ones
|
||||
fix = `Valid operations${config.resource ? ` for resource "${config.resource}"` : ''}: ${validOperations.slice(0, 5).map(op => {
|
||||
const val = op.operation || op.value || op;
|
||||
return `"${val}"`;
|
||||
}).join(', ')}${validOperations.length > 5 ? '...' : ''}`;
|
||||
}
|
||||
|
||||
result.errors.push({
|
||||
type: 'invalid_value',
|
||||
property: 'operation',
|
||||
message: errorMessage,
|
||||
fix
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
485
src/services/operation-similarity-service.ts
Normal file
485
src/services/operation-similarity-service.ts
Normal file
@@ -0,0 +1,485 @@
|
||||
import { NodeRepository } from '../database/node-repository';
|
||||
import { logger } from '../utils/logger';
|
||||
import { ValidationServiceError } from '../errors/validation-service-error';
|
||||
|
||||
export interface OperationSuggestion {
|
||||
value: string;
|
||||
confidence: number;
|
||||
reason: string;
|
||||
resource?: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
interface OperationPattern {
|
||||
pattern: string;
|
||||
suggestion: string;
|
||||
confidence: number;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export class OperationSimilarityService {
|
||||
private static readonly CACHE_DURATION_MS = 5 * 60 * 1000; // 5 minutes
|
||||
private static readonly MIN_CONFIDENCE = 0.3; // 30% minimum confidence to suggest
|
||||
private static readonly MAX_SUGGESTIONS = 5;
|
||||
|
||||
// Confidence thresholds for better code clarity
|
||||
private static readonly CONFIDENCE_THRESHOLDS = {
|
||||
EXACT: 1.0,
|
||||
VERY_HIGH: 0.95,
|
||||
HIGH: 0.8,
|
||||
MEDIUM: 0.6,
|
||||
MIN_SUBSTRING: 0.7
|
||||
} as const;
|
||||
|
||||
private repository: NodeRepository;
|
||||
private operationCache: Map<string, { operations: any[], timestamp: number }> = new Map();
|
||||
private suggestionCache: Map<string, OperationSuggestion[]> = new Map();
|
||||
private commonPatterns: Map<string, OperationPattern[]>;
|
||||
|
||||
constructor(repository: NodeRepository) {
|
||||
this.repository = repository;
|
||||
this.commonPatterns = this.initializeCommonPatterns();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up expired cache entries to prevent memory leaks
|
||||
* Should be called periodically or before cache operations
|
||||
*/
|
||||
private cleanupExpiredEntries(): void {
|
||||
const now = Date.now();
|
||||
|
||||
// Clean operation cache
|
||||
for (const [key, value] of this.operationCache.entries()) {
|
||||
if (now - value.timestamp >= OperationSimilarityService.CACHE_DURATION_MS) {
|
||||
this.operationCache.delete(key);
|
||||
}
|
||||
}
|
||||
|
||||
// Clean suggestion cache - these don't have timestamps, so clear if cache is too large
|
||||
if (this.suggestionCache.size > 100) {
|
||||
// Keep only the most recent 50 entries
|
||||
const entries = Array.from(this.suggestionCache.entries());
|
||||
this.suggestionCache.clear();
|
||||
entries.slice(-50).forEach(([key, value]) => {
|
||||
this.suggestionCache.set(key, value);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize common operation mistake patterns
|
||||
*/
|
||||
private initializeCommonPatterns(): Map<string, OperationPattern[]> {
|
||||
const patterns = new Map<string, OperationPattern[]>();
|
||||
|
||||
// Google Drive patterns
|
||||
patterns.set('googleDrive', [
|
||||
{ pattern: 'listFiles', suggestion: 'search', confidence: 0.85, reason: 'Use "search" with resource: "fileFolder" to list files' },
|
||||
{ pattern: 'uploadFile', suggestion: 'upload', confidence: 0.95, reason: 'Use "upload" instead of "uploadFile"' },
|
||||
{ pattern: 'deleteFile', suggestion: 'deleteFile', confidence: 1.0, reason: 'Exact match' },
|
||||
{ pattern: 'downloadFile', suggestion: 'download', confidence: 0.95, reason: 'Use "download" instead of "downloadFile"' },
|
||||
{ pattern: 'getFile', suggestion: 'download', confidence: 0.8, reason: 'Use "download" to retrieve file content' },
|
||||
{ pattern: 'listFolders', suggestion: 'search', confidence: 0.85, reason: 'Use "search" with resource: "fileFolder"' },
|
||||
]);
|
||||
|
||||
// Slack patterns
|
||||
patterns.set('slack', [
|
||||
{ pattern: 'sendMessage', suggestion: 'send', confidence: 0.95, reason: 'Use "send" instead of "sendMessage"' },
|
||||
{ pattern: 'getMessage', suggestion: 'get', confidence: 0.9, reason: 'Use "get" to retrieve messages' },
|
||||
{ pattern: 'postMessage', suggestion: 'send', confidence: 0.9, reason: 'Use "send" to post messages' },
|
||||
{ pattern: 'deleteMessage', suggestion: 'delete', confidence: 0.95, reason: 'Use "delete" instead of "deleteMessage"' },
|
||||
{ pattern: 'createChannel', suggestion: 'create', confidence: 0.9, reason: 'Use "create" with resource: "channel"' },
|
||||
]);
|
||||
|
||||
// Database patterns (postgres, mysql, mongodb)
|
||||
patterns.set('database', [
|
||||
{ pattern: 'selectData', suggestion: 'select', confidence: 0.95, reason: 'Use "select" instead of "selectData"' },
|
||||
{ pattern: 'insertData', suggestion: 'insert', confidence: 0.95, reason: 'Use "insert" instead of "insertData"' },
|
||||
{ pattern: 'updateData', suggestion: 'update', confidence: 0.95, reason: 'Use "update" instead of "updateData"' },
|
||||
{ pattern: 'deleteData', suggestion: 'delete', confidence: 0.95, reason: 'Use "delete" instead of "deleteData"' },
|
||||
{ pattern: 'query', suggestion: 'select', confidence: 0.7, reason: 'Use "select" for queries' },
|
||||
{ pattern: 'fetch', suggestion: 'select', confidence: 0.7, reason: 'Use "select" to fetch data' },
|
||||
]);
|
||||
|
||||
// HTTP patterns
|
||||
patterns.set('httpRequest', [
|
||||
{ pattern: 'fetch', suggestion: 'GET', confidence: 0.8, reason: 'Use "GET" method for fetching data' },
|
||||
{ pattern: 'send', suggestion: 'POST', confidence: 0.7, reason: 'Use "POST" method for sending data' },
|
||||
{ pattern: 'create', suggestion: 'POST', confidence: 0.8, reason: 'Use "POST" method for creating resources' },
|
||||
{ pattern: 'update', suggestion: 'PUT', confidence: 0.8, reason: 'Use "PUT" method for updating resources' },
|
||||
{ pattern: 'delete', suggestion: 'DELETE', confidence: 0.9, reason: 'Use "DELETE" method' },
|
||||
]);
|
||||
|
||||
// Generic patterns
|
||||
patterns.set('generic', [
|
||||
{ pattern: 'list', suggestion: 'get', confidence: 0.6, reason: 'Consider using "get" or "search"' },
|
||||
{ pattern: 'retrieve', suggestion: 'get', confidence: 0.8, reason: 'Use "get" to retrieve data' },
|
||||
{ pattern: 'fetch', suggestion: 'get', confidence: 0.8, reason: 'Use "get" to fetch data' },
|
||||
{ pattern: 'remove', suggestion: 'delete', confidence: 0.85, reason: 'Use "delete" to remove items' },
|
||||
{ pattern: 'add', suggestion: 'create', confidence: 0.7, reason: 'Use "create" to add new items' },
|
||||
]);
|
||||
|
||||
return patterns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar operations for an invalid operation using Levenshtein distance
|
||||
* and pattern matching algorithms
|
||||
*
|
||||
* @param nodeType - The n8n node type (e.g., 'nodes-base.slack')
|
||||
* @param invalidOperation - The invalid operation provided by the user
|
||||
* @param resource - Optional resource to filter operations
|
||||
* @param maxSuggestions - Maximum number of suggestions to return (default: 5)
|
||||
* @returns Array of operation suggestions sorted by confidence
|
||||
*
|
||||
* @example
|
||||
* findSimilarOperations('nodes-base.googleDrive', 'listFiles', 'fileFolder')
|
||||
* // Returns: [{ value: 'search', confidence: 0.85, reason: 'Use "search" with resource: "fileFolder" to list files' }]
|
||||
*/
|
||||
findSimilarOperations(
|
||||
nodeType: string,
|
||||
invalidOperation: string,
|
||||
resource?: string,
|
||||
maxSuggestions: number = OperationSimilarityService.MAX_SUGGESTIONS
|
||||
): OperationSuggestion[] {
|
||||
// Clean up expired cache entries periodically
|
||||
if (Math.random() < 0.1) { // 10% chance to cleanup on each call
|
||||
this.cleanupExpiredEntries();
|
||||
}
|
||||
// Check cache first
|
||||
const cacheKey = `${nodeType}:${invalidOperation}:${resource || ''}`;
|
||||
if (this.suggestionCache.has(cacheKey)) {
|
||||
return this.suggestionCache.get(cacheKey)!;
|
||||
}
|
||||
|
||||
const suggestions: OperationSuggestion[] = [];
|
||||
|
||||
// Get valid operations for the node
|
||||
const nodeInfo = this.repository.getNode(nodeType);
|
||||
if (!nodeInfo) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const validOperations = this.getNodeOperations(nodeType, resource);
|
||||
|
||||
// Early termination for exact match - no suggestions needed
|
||||
for (const op of validOperations) {
|
||||
const opValue = this.getOperationValue(op);
|
||||
if (opValue.toLowerCase() === invalidOperation.toLowerCase()) {
|
||||
return []; // Valid operation, no suggestions needed
|
||||
}
|
||||
}
|
||||
|
||||
// Check for exact pattern matches first
|
||||
const nodePatterns = this.getNodePatterns(nodeType);
|
||||
for (const pattern of nodePatterns) {
|
||||
if (pattern.pattern.toLowerCase() === invalidOperation.toLowerCase()) {
|
||||
// Type-safe operation value extraction
|
||||
const exists = validOperations.some(op => {
|
||||
const opValue = this.getOperationValue(op);
|
||||
return opValue === pattern.suggestion;
|
||||
});
|
||||
if (exists) {
|
||||
suggestions.push({
|
||||
value: pattern.suggestion,
|
||||
confidence: pattern.confidence,
|
||||
reason: pattern.reason,
|
||||
resource
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate similarity for all valid operations
|
||||
for (const op of validOperations) {
|
||||
const opValue = this.getOperationValue(op);
|
||||
|
||||
const similarity = this.calculateSimilarity(invalidOperation, opValue);
|
||||
|
||||
if (similarity >= OperationSimilarityService.MIN_CONFIDENCE) {
|
||||
// Don't add if already suggested by pattern
|
||||
if (!suggestions.some(s => s.value === opValue)) {
|
||||
suggestions.push({
|
||||
value: opValue,
|
||||
confidence: similarity,
|
||||
reason: this.getSimilarityReason(similarity, invalidOperation, opValue),
|
||||
resource: typeof op === 'object' ? op.resource : undefined,
|
||||
description: typeof op === 'object' ? (op.description || op.name) : undefined
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by confidence and limit
|
||||
suggestions.sort((a, b) => b.confidence - a.confidence);
|
||||
const topSuggestions = suggestions.slice(0, maxSuggestions);
|
||||
|
||||
// Cache the result
|
||||
this.suggestionCache.set(cacheKey, topSuggestions);
|
||||
|
||||
return topSuggestions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type-safe extraction of operation value from various formats
|
||||
* @param op - Operation object or string
|
||||
* @returns The operation value as a string
|
||||
*/
|
||||
private getOperationValue(op: any): string {
|
||||
if (typeof op === 'string') {
|
||||
return op;
|
||||
}
|
||||
if (typeof op === 'object' && op !== null) {
|
||||
return op.operation || op.value || '';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Type-safe extraction of resource value
|
||||
* @param resource - Resource object or string
|
||||
* @returns The resource value as a string
|
||||
*/
|
||||
private getResourceValue(resource: any): string {
|
||||
if (typeof resource === 'string') {
|
||||
return resource;
|
||||
}
|
||||
if (typeof resource === 'object' && resource !== null) {
|
||||
return resource.value || '';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get operations for a node, handling resource filtering
|
||||
*/
|
||||
private getNodeOperations(nodeType: string, resource?: string): any[] {
|
||||
// Cleanup cache periodically
|
||||
if (Math.random() < 0.05) { // 5% chance
|
||||
this.cleanupExpiredEntries();
|
||||
}
|
||||
|
||||
const cacheKey = `${nodeType}:${resource || 'all'}`;
|
||||
const cached = this.operationCache.get(cacheKey);
|
||||
|
||||
if (cached && Date.now() - cached.timestamp < OperationSimilarityService.CACHE_DURATION_MS) {
|
||||
return cached.operations;
|
||||
}
|
||||
|
||||
const nodeInfo = this.repository.getNode(nodeType);
|
||||
if (!nodeInfo) return [];
|
||||
|
||||
let operations: any[] = [];
|
||||
|
||||
// Parse operations from the node with safe JSON parsing
|
||||
try {
|
||||
const opsData = nodeInfo.operations;
|
||||
if (typeof opsData === 'string') {
|
||||
// Safe JSON parsing
|
||||
try {
|
||||
operations = JSON.parse(opsData);
|
||||
} catch (parseError) {
|
||||
logger.error(`JSON parse error for operations in ${nodeType}:`, parseError);
|
||||
throw ValidationServiceError.jsonParseError(nodeType, parseError as Error);
|
||||
}
|
||||
} else if (Array.isArray(opsData)) {
|
||||
operations = opsData;
|
||||
} else if (opsData && typeof opsData === 'object') {
|
||||
operations = Object.values(opsData).flat();
|
||||
}
|
||||
} catch (error) {
|
||||
// Re-throw ValidationServiceError, log and continue for others
|
||||
if (error instanceof ValidationServiceError) {
|
||||
throw error;
|
||||
}
|
||||
logger.warn(`Failed to process operations for ${nodeType}:`, error);
|
||||
}
|
||||
|
||||
// Also check properties for operation fields
|
||||
try {
|
||||
const properties = nodeInfo.properties || [];
|
||||
for (const prop of properties) {
|
||||
if (prop.name === 'operation' && prop.options) {
|
||||
// Filter by resource if specified
|
||||
if (prop.displayOptions?.show?.resource) {
|
||||
const allowedResources = Array.isArray(prop.displayOptions.show.resource)
|
||||
? prop.displayOptions.show.resource
|
||||
: [prop.displayOptions.show.resource];
|
||||
// Only filter if a specific resource is requested
|
||||
if (resource && !allowedResources.includes(resource)) {
|
||||
continue;
|
||||
}
|
||||
// If no resource specified, include all operations
|
||||
}
|
||||
|
||||
operations.push(...prop.options.map((opt: any) => ({
|
||||
operation: opt.value,
|
||||
name: opt.name,
|
||||
description: opt.description,
|
||||
resource
|
||||
})));
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to extract operations from properties for ${nodeType}:`, error);
|
||||
}
|
||||
|
||||
// Cache and return
|
||||
this.operationCache.set(cacheKey, { operations, timestamp: Date.now() });
|
||||
return operations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get patterns for a specific node type
|
||||
*/
|
||||
private getNodePatterns(nodeType: string): OperationPattern[] {
|
||||
const patterns: OperationPattern[] = [];
|
||||
|
||||
// Add node-specific patterns
|
||||
if (nodeType.includes('googleDrive')) {
|
||||
patterns.push(...(this.commonPatterns.get('googleDrive') || []));
|
||||
} else if (nodeType.includes('slack')) {
|
||||
patterns.push(...(this.commonPatterns.get('slack') || []));
|
||||
} else if (nodeType.includes('postgres') || nodeType.includes('mysql') || nodeType.includes('mongodb')) {
|
||||
patterns.push(...(this.commonPatterns.get('database') || []));
|
||||
} else if (nodeType.includes('httpRequest')) {
|
||||
patterns.push(...(this.commonPatterns.get('httpRequest') || []));
|
||||
}
|
||||
|
||||
// Always add generic patterns
|
||||
patterns.push(...(this.commonPatterns.get('generic') || []));
|
||||
|
||||
return patterns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate similarity between two strings using Levenshtein distance
|
||||
*/
|
||||
private calculateSimilarity(str1: string, str2: string): number {
|
||||
const s1 = str1.toLowerCase();
|
||||
const s2 = str2.toLowerCase();
|
||||
|
||||
// Exact match
|
||||
if (s1 === s2) return 1.0;
|
||||
|
||||
// One is substring of the other
|
||||
if (s1.includes(s2) || s2.includes(s1)) {
|
||||
const ratio = Math.min(s1.length, s2.length) / Math.max(s1.length, s2.length);
|
||||
return Math.max(OperationSimilarityService.CONFIDENCE_THRESHOLDS.MIN_SUBSTRING, ratio);
|
||||
}
|
||||
|
||||
// Calculate Levenshtein distance
|
||||
const distance = this.levenshteinDistance(s1, s2);
|
||||
const maxLength = Math.max(s1.length, s2.length);
|
||||
|
||||
// Convert distance to similarity (0 to 1)
|
||||
let similarity = 1 - (distance / maxLength);
|
||||
|
||||
// Boost confidence for single character typos and transpositions in short words
|
||||
if (distance === 1 && maxLength <= 5) {
|
||||
similarity = Math.max(similarity, 0.75);
|
||||
} else if (distance === 2 && maxLength <= 5) {
|
||||
// Boost for transpositions
|
||||
similarity = Math.max(similarity, 0.72);
|
||||
}
|
||||
|
||||
// Boost similarity for common patterns
|
||||
if (this.areCommonVariations(s1, s2)) {
|
||||
return Math.min(1.0, similarity + 0.2);
|
||||
}
|
||||
|
||||
return similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate Levenshtein distance between two strings
|
||||
*/
|
||||
private levenshteinDistance(str1: string, str2: string): number {
|
||||
const m = str1.length;
|
||||
const n = str2.length;
|
||||
const dp: number[][] = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
|
||||
|
||||
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
||||
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
||||
|
||||
for (let i = 1; i <= m; i++) {
|
||||
for (let j = 1; j <= n; j++) {
|
||||
if (str1[i - 1] === str2[j - 1]) {
|
||||
dp[i][j] = dp[i - 1][j - 1];
|
||||
} else {
|
||||
dp[i][j] = Math.min(
|
||||
dp[i - 1][j] + 1, // deletion
|
||||
dp[i][j - 1] + 1, // insertion
|
||||
dp[i - 1][j - 1] + 1 // substitution
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dp[m][n];
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if two strings are common variations
|
||||
*/
|
||||
private areCommonVariations(str1: string, str2: string): boolean {
|
||||
// Check for common prefixes/suffixes
|
||||
const commonPrefixes = ['get', 'set', 'create', 'delete', 'update', 'send', 'fetch'];
|
||||
const commonSuffixes = ['data', 'item', 'record', 'message', 'file', 'folder'];
|
||||
|
||||
for (const prefix of commonPrefixes) {
|
||||
if ((str1.startsWith(prefix) && !str2.startsWith(prefix)) ||
|
||||
(!str1.startsWith(prefix) && str2.startsWith(prefix))) {
|
||||
const s1Clean = str1.replace(prefix, '');
|
||||
const s2Clean = str2.replace(prefix, '');
|
||||
if (s1Clean === s2Clean || this.levenshteinDistance(s1Clean, s2Clean) <= 2) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const suffix of commonSuffixes) {
|
||||
if ((str1.endsWith(suffix) && !str2.endsWith(suffix)) ||
|
||||
(!str1.endsWith(suffix) && str2.endsWith(suffix))) {
|
||||
const s1Clean = str1.replace(suffix, '');
|
||||
const s2Clean = str2.replace(suffix, '');
|
||||
if (s1Clean === s2Clean || this.levenshteinDistance(s1Clean, s2Clean) <= 2) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a human-readable reason for the similarity
|
||||
* @param confidence - Similarity confidence score
|
||||
* @param invalid - The invalid operation string
|
||||
* @param valid - The valid operation string
|
||||
* @returns Human-readable explanation of the similarity
|
||||
*/
|
||||
private getSimilarityReason(confidence: number, invalid: string, valid: string): string {
|
||||
const { VERY_HIGH, HIGH, MEDIUM } = OperationSimilarityService.CONFIDENCE_THRESHOLDS;
|
||||
|
||||
if (confidence >= VERY_HIGH) {
|
||||
return 'Almost exact match - likely a typo';
|
||||
} else if (confidence >= HIGH) {
|
||||
return 'Very similar - common variation';
|
||||
} else if (confidence >= MEDIUM) {
|
||||
return 'Similar operation';
|
||||
} else if (invalid.includes(valid) || valid.includes(invalid)) {
|
||||
return 'Partial match';
|
||||
} else {
|
||||
return 'Possibly related operation';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear caches
|
||||
*/
|
||||
clearCache(): void {
|
||||
this.operationCache.clear();
|
||||
this.suggestionCache.clear();
|
||||
}
|
||||
}
|
||||
522
src/services/resource-similarity-service.ts
Normal file
522
src/services/resource-similarity-service.ts
Normal file
@@ -0,0 +1,522 @@
|
||||
import { NodeRepository } from '../database/node-repository';
|
||||
import { logger } from '../utils/logger';
|
||||
import { ValidationServiceError } from '../errors/validation-service-error';
|
||||
|
||||
export interface ResourceSuggestion {
|
||||
value: string;
|
||||
confidence: number;
|
||||
reason: string;
|
||||
availableOperations?: string[];
|
||||
}
|
||||
|
||||
interface ResourcePattern {
|
||||
pattern: string;
|
||||
suggestion: string;
|
||||
confidence: number;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export class ResourceSimilarityService {
|
||||
private static readonly CACHE_DURATION_MS = 5 * 60 * 1000; // 5 minutes
|
||||
private static readonly MIN_CONFIDENCE = 0.3; // 30% minimum confidence to suggest
|
||||
private static readonly MAX_SUGGESTIONS = 5;
|
||||
|
||||
// Confidence thresholds for better code clarity
|
||||
private static readonly CONFIDENCE_THRESHOLDS = {
|
||||
EXACT: 1.0,
|
||||
VERY_HIGH: 0.95,
|
||||
HIGH: 0.8,
|
||||
MEDIUM: 0.6,
|
||||
MIN_SUBSTRING: 0.7
|
||||
} as const;
|
||||
|
||||
private repository: NodeRepository;
|
||||
private resourceCache: Map<string, { resources: any[], timestamp: number }> = new Map();
|
||||
private suggestionCache: Map<string, ResourceSuggestion[]> = new Map();
|
||||
private commonPatterns: Map<string, ResourcePattern[]>;
|
||||
|
||||
constructor(repository: NodeRepository) {
|
||||
this.repository = repository;
|
||||
this.commonPatterns = this.initializeCommonPatterns();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up expired cache entries to prevent memory leaks
|
||||
*/
|
||||
private cleanupExpiredEntries(): void {
|
||||
const now = Date.now();
|
||||
|
||||
// Clean resource cache
|
||||
for (const [key, value] of this.resourceCache.entries()) {
|
||||
if (now - value.timestamp >= ResourceSimilarityService.CACHE_DURATION_MS) {
|
||||
this.resourceCache.delete(key);
|
||||
}
|
||||
}
|
||||
|
||||
// Clean suggestion cache - these don't have timestamps, so clear if cache is too large
|
||||
if (this.suggestionCache.size > 100) {
|
||||
// Keep only the most recent 50 entries
|
||||
const entries = Array.from(this.suggestionCache.entries());
|
||||
this.suggestionCache.clear();
|
||||
entries.slice(-50).forEach(([key, value]) => {
|
||||
this.suggestionCache.set(key, value);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize common resource mistake patterns
|
||||
*/
|
||||
private initializeCommonPatterns(): Map<string, ResourcePattern[]> {
|
||||
const patterns = new Map<string, ResourcePattern[]>();
|
||||
|
||||
// Google Drive patterns
|
||||
patterns.set('googleDrive', [
|
||||
{ pattern: 'files', suggestion: 'file', confidence: 0.95, reason: 'Use singular "file" not plural' },
|
||||
{ pattern: 'folders', suggestion: 'folder', confidence: 0.95, reason: 'Use singular "folder" not plural' },
|
||||
{ pattern: 'permissions', suggestion: 'permission', confidence: 0.9, reason: 'Use singular form' },
|
||||
{ pattern: 'fileAndFolder', suggestion: 'fileFolder', confidence: 0.9, reason: 'Use "fileFolder" for combined operations' },
|
||||
{ pattern: 'driveFiles', suggestion: 'file', confidence: 0.8, reason: 'Use "file" for file operations' },
|
||||
{ pattern: 'sharedDrives', suggestion: 'drive', confidence: 0.85, reason: 'Use "drive" for shared drive operations' },
|
||||
]);
|
||||
|
||||
// Slack patterns
|
||||
patterns.set('slack', [
|
||||
{ pattern: 'messages', suggestion: 'message', confidence: 0.95, reason: 'Use singular "message" not plural' },
|
||||
{ pattern: 'channels', suggestion: 'channel', confidence: 0.95, reason: 'Use singular "channel" not plural' },
|
||||
{ pattern: 'users', suggestion: 'user', confidence: 0.95, reason: 'Use singular "user" not plural' },
|
||||
{ pattern: 'msg', suggestion: 'message', confidence: 0.85, reason: 'Use full "message" not abbreviation' },
|
||||
{ pattern: 'dm', suggestion: 'message', confidence: 0.7, reason: 'Use "message" for direct messages' },
|
||||
{ pattern: 'conversation', suggestion: 'channel', confidence: 0.7, reason: 'Use "channel" for conversations' },
|
||||
]);
|
||||
|
||||
// Database patterns (postgres, mysql, mongodb)
|
||||
patterns.set('database', [
|
||||
{ pattern: 'tables', suggestion: 'table', confidence: 0.95, reason: 'Use singular "table" not plural' },
|
||||
{ pattern: 'queries', suggestion: 'query', confidence: 0.95, reason: 'Use singular "query" not plural' },
|
||||
{ pattern: 'collections', suggestion: 'collection', confidence: 0.95, reason: 'Use singular "collection" not plural' },
|
||||
{ pattern: 'documents', suggestion: 'document', confidence: 0.95, reason: 'Use singular "document" not plural' },
|
||||
{ pattern: 'records', suggestion: 'record', confidence: 0.85, reason: 'Use "record" or "document"' },
|
||||
{ pattern: 'rows', suggestion: 'row', confidence: 0.9, reason: 'Use singular "row"' },
|
||||
]);
|
||||
|
||||
// Google Sheets patterns
|
||||
patterns.set('googleSheets', [
|
||||
{ pattern: 'sheets', suggestion: 'sheet', confidence: 0.95, reason: 'Use singular "sheet" not plural' },
|
||||
{ pattern: 'spreadsheets', suggestion: 'spreadsheet', confidence: 0.95, reason: 'Use singular "spreadsheet"' },
|
||||
{ pattern: 'cells', suggestion: 'cell', confidence: 0.9, reason: 'Use singular "cell"' },
|
||||
{ pattern: 'ranges', suggestion: 'range', confidence: 0.9, reason: 'Use singular "range"' },
|
||||
{ pattern: 'worksheets', suggestion: 'sheet', confidence: 0.8, reason: 'Use "sheet" for worksheet operations' },
|
||||
]);
|
||||
|
||||
// Email patterns
|
||||
patterns.set('email', [
|
||||
{ pattern: 'emails', suggestion: 'email', confidence: 0.95, reason: 'Use singular "email" not plural' },
|
||||
{ pattern: 'messages', suggestion: 'message', confidence: 0.9, reason: 'Use "message" for email operations' },
|
||||
{ pattern: 'mails', suggestion: 'email', confidence: 0.9, reason: 'Use "email" not "mail"' },
|
||||
{ pattern: 'attachments', suggestion: 'attachment', confidence: 0.95, reason: 'Use singular "attachment"' },
|
||||
]);
|
||||
|
||||
// Generic plural/singular patterns
|
||||
patterns.set('generic', [
|
||||
{ pattern: 'items', suggestion: 'item', confidence: 0.9, reason: 'Use singular form' },
|
||||
{ pattern: 'objects', suggestion: 'object', confidence: 0.9, reason: 'Use singular form' },
|
||||
{ pattern: 'entities', suggestion: 'entity', confidence: 0.9, reason: 'Use singular form' },
|
||||
{ pattern: 'resources', suggestion: 'resource', confidence: 0.9, reason: 'Use singular form' },
|
||||
{ pattern: 'elements', suggestion: 'element', confidence: 0.9, reason: 'Use singular form' },
|
||||
]);
|
||||
|
||||
return patterns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find similar resources for an invalid resource using pattern matching
|
||||
* and Levenshtein distance algorithms
|
||||
*
|
||||
* @param nodeType - The n8n node type (e.g., 'nodes-base.googleDrive')
|
||||
* @param invalidResource - The invalid resource provided by the user
|
||||
* @param maxSuggestions - Maximum number of suggestions to return (default: 5)
|
||||
* @returns Array of resource suggestions sorted by confidence
|
||||
*
|
||||
* @example
|
||||
* findSimilarResources('nodes-base.googleDrive', 'files', 3)
|
||||
* // Returns: [{ value: 'file', confidence: 0.95, reason: 'Use singular "file" not plural' }]
|
||||
*/
|
||||
findSimilarResources(
|
||||
nodeType: string,
|
||||
invalidResource: string,
|
||||
maxSuggestions: number = ResourceSimilarityService.MAX_SUGGESTIONS
|
||||
): ResourceSuggestion[] {
|
||||
// Clean up expired cache entries periodically
|
||||
if (Math.random() < 0.1) { // 10% chance to cleanup on each call
|
||||
this.cleanupExpiredEntries();
|
||||
}
|
||||
// Check cache first
|
||||
const cacheKey = `${nodeType}:${invalidResource}`;
|
||||
if (this.suggestionCache.has(cacheKey)) {
|
||||
return this.suggestionCache.get(cacheKey)!;
|
||||
}
|
||||
|
||||
const suggestions: ResourceSuggestion[] = [];
|
||||
|
||||
// Get valid resources for the node
|
||||
const validResources = this.getNodeResources(nodeType);
|
||||
|
||||
// Early termination for exact match - no suggestions needed
|
||||
for (const resource of validResources) {
|
||||
const resourceValue = this.getResourceValue(resource);
|
||||
if (resourceValue.toLowerCase() === invalidResource.toLowerCase()) {
|
||||
return []; // Valid resource, no suggestions needed
|
||||
}
|
||||
}
|
||||
|
||||
// Check for exact pattern matches first
|
||||
const nodePatterns = this.getNodePatterns(nodeType);
|
||||
for (const pattern of nodePatterns) {
|
||||
if (pattern.pattern.toLowerCase() === invalidResource.toLowerCase()) {
|
||||
// Check if the suggested resource actually exists with type safety
|
||||
const exists = validResources.some(r => {
|
||||
const resourceValue = this.getResourceValue(r);
|
||||
return resourceValue === pattern.suggestion;
|
||||
});
|
||||
if (exists) {
|
||||
suggestions.push({
|
||||
value: pattern.suggestion,
|
||||
confidence: pattern.confidence,
|
||||
reason: pattern.reason
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle automatic plural/singular conversion
|
||||
const singularForm = this.toSingular(invalidResource);
|
||||
const pluralForm = this.toPlural(invalidResource);
|
||||
|
||||
for (const resource of validResources) {
|
||||
const resourceValue = this.getResourceValue(resource);
|
||||
|
||||
// Check for plural/singular match
|
||||
if (resourceValue === singularForm || resourceValue === pluralForm) {
|
||||
if (!suggestions.some(s => s.value === resourceValue)) {
|
||||
suggestions.push({
|
||||
value: resourceValue,
|
||||
confidence: 0.9,
|
||||
reason: invalidResource.endsWith('s') ?
|
||||
'Use singular form for resources' :
|
||||
'Incorrect plural/singular form',
|
||||
availableOperations: typeof resource === 'object' ? resource.operations : undefined
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate similarity
|
||||
const similarity = this.calculateSimilarity(invalidResource, resourceValue);
|
||||
if (similarity >= ResourceSimilarityService.MIN_CONFIDENCE) {
|
||||
if (!suggestions.some(s => s.value === resourceValue)) {
|
||||
suggestions.push({
|
||||
value: resourceValue,
|
||||
confidence: similarity,
|
||||
reason: this.getSimilarityReason(similarity, invalidResource, resourceValue),
|
||||
availableOperations: typeof resource === 'object' ? resource.operations : undefined
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by confidence and limit
|
||||
suggestions.sort((a, b) => b.confidence - a.confidence);
|
||||
const topSuggestions = suggestions.slice(0, maxSuggestions);
|
||||
|
||||
// Cache the result
|
||||
this.suggestionCache.set(cacheKey, topSuggestions);
|
||||
|
||||
return topSuggestions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type-safe extraction of resource value from various formats
|
||||
* @param resource - Resource object or string
|
||||
* @returns The resource value as a string
|
||||
*/
|
||||
private getResourceValue(resource: any): string {
|
||||
if (typeof resource === 'string') {
|
||||
return resource;
|
||||
}
|
||||
if (typeof resource === 'object' && resource !== null) {
|
||||
return resource.value || '';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get resources for a node with caching
|
||||
*/
|
||||
private getNodeResources(nodeType: string): any[] {
|
||||
// Cleanup cache periodically
|
||||
if (Math.random() < 0.05) { // 5% chance
|
||||
this.cleanupExpiredEntries();
|
||||
}
|
||||
|
||||
const cacheKey = nodeType;
|
||||
const cached = this.resourceCache.get(cacheKey);
|
||||
|
||||
if (cached && Date.now() - cached.timestamp < ResourceSimilarityService.CACHE_DURATION_MS) {
|
||||
return cached.resources;
|
||||
}
|
||||
|
||||
const nodeInfo = this.repository.getNode(nodeType);
|
||||
if (!nodeInfo) return [];
|
||||
|
||||
const resources: any[] = [];
|
||||
const resourceMap: Map<string, string[]> = new Map();
|
||||
|
||||
// Parse properties for resource fields
|
||||
try {
|
||||
const properties = nodeInfo.properties || [];
|
||||
for (const prop of properties) {
|
||||
if (prop.name === 'resource' && prop.options) {
|
||||
for (const option of prop.options) {
|
||||
resources.push({
|
||||
value: option.value,
|
||||
name: option.name,
|
||||
operations: []
|
||||
});
|
||||
resourceMap.set(option.value, []);
|
||||
}
|
||||
}
|
||||
|
||||
// Find operations for each resource
|
||||
if (prop.name === 'operation' && prop.displayOptions?.show?.resource) {
|
||||
const resourceValues = Array.isArray(prop.displayOptions.show.resource)
|
||||
? prop.displayOptions.show.resource
|
||||
: [prop.displayOptions.show.resource];
|
||||
|
||||
for (const resourceValue of resourceValues) {
|
||||
if (resourceMap.has(resourceValue) && prop.options) {
|
||||
const ops = prop.options.map((op: any) => op.value);
|
||||
resourceMap.get(resourceValue)!.push(...ops);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update resources with their operations
|
||||
for (const resource of resources) {
|
||||
if (resourceMap.has(resource.value)) {
|
||||
resource.operations = resourceMap.get(resource.value);
|
||||
}
|
||||
}
|
||||
|
||||
// If no explicit resources, check for common patterns
|
||||
if (resources.length === 0) {
|
||||
// Some nodes don't have explicit resource fields
|
||||
const implicitResources = this.extractImplicitResources(properties);
|
||||
resources.push(...implicitResources);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to extract resources for ${nodeType}:`, error);
|
||||
}
|
||||
|
||||
// Cache and return
|
||||
this.resourceCache.set(cacheKey, { resources, timestamp: Date.now() });
|
||||
return resources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract implicit resources from node properties
|
||||
*/
|
||||
private extractImplicitResources(properties: any[]): any[] {
|
||||
const resources: any[] = [];
|
||||
|
||||
// Look for properties that suggest resources
|
||||
for (const prop of properties) {
|
||||
if (prop.name === 'operation' && prop.options) {
|
||||
// If there's no explicit resource field, operations might imply resources
|
||||
const resourceFromOps = this.inferResourceFromOperations(prop.options);
|
||||
if (resourceFromOps) {
|
||||
resources.push({
|
||||
value: resourceFromOps,
|
||||
name: resourceFromOps.charAt(0).toUpperCase() + resourceFromOps.slice(1),
|
||||
operations: prop.options.map((op: any) => op.value)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return resources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Infer resource type from operations
|
||||
*/
|
||||
private inferResourceFromOperations(operations: any[]): string | null {
|
||||
// Common patterns in operation names that suggest resources
|
||||
const patterns = [
|
||||
{ keywords: ['file', 'upload', 'download'], resource: 'file' },
|
||||
{ keywords: ['folder', 'directory'], resource: 'folder' },
|
||||
{ keywords: ['message', 'send', 'reply'], resource: 'message' },
|
||||
{ keywords: ['channel', 'broadcast'], resource: 'channel' },
|
||||
{ keywords: ['user', 'member'], resource: 'user' },
|
||||
{ keywords: ['table', 'row', 'column'], resource: 'table' },
|
||||
{ keywords: ['document', 'doc'], resource: 'document' },
|
||||
];
|
||||
|
||||
for (const pattern of patterns) {
|
||||
for (const op of operations) {
|
||||
const opName = (op.value || op).toLowerCase();
|
||||
if (pattern.keywords.some(keyword => opName.includes(keyword))) {
|
||||
return pattern.resource;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get patterns for a specific node type
|
||||
*/
|
||||
private getNodePatterns(nodeType: string): ResourcePattern[] {
|
||||
const patterns: ResourcePattern[] = [];
|
||||
|
||||
// Add node-specific patterns
|
||||
if (nodeType.includes('googleDrive')) {
|
||||
patterns.push(...(this.commonPatterns.get('googleDrive') || []));
|
||||
} else if (nodeType.includes('slack')) {
|
||||
patterns.push(...(this.commonPatterns.get('slack') || []));
|
||||
} else if (nodeType.includes('postgres') || nodeType.includes('mysql') || nodeType.includes('mongodb')) {
|
||||
patterns.push(...(this.commonPatterns.get('database') || []));
|
||||
} else if (nodeType.includes('googleSheets')) {
|
||||
patterns.push(...(this.commonPatterns.get('googleSheets') || []));
|
||||
} else if (nodeType.includes('gmail') || nodeType.includes('email')) {
|
||||
patterns.push(...(this.commonPatterns.get('email') || []));
|
||||
}
|
||||
|
||||
// Always add generic patterns
|
||||
patterns.push(...(this.commonPatterns.get('generic') || []));
|
||||
|
||||
return patterns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert to singular form (simple heuristic)
|
||||
*/
|
||||
private toSingular(word: string): string {
|
||||
if (word.endsWith('ies')) {
|
||||
return word.slice(0, -3) + 'y';
|
||||
} else if (word.endsWith('es')) {
|
||||
return word.slice(0, -2);
|
||||
} else if (word.endsWith('s') && !word.endsWith('ss')) {
|
||||
return word.slice(0, -1);
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert to plural form (simple heuristic)
|
||||
*/
|
||||
private toPlural(word: string): string {
|
||||
if (word.endsWith('y') && !['ay', 'ey', 'iy', 'oy', 'uy'].includes(word.slice(-2))) {
|
||||
return word.slice(0, -1) + 'ies';
|
||||
} else if (word.endsWith('s') || word.endsWith('x') || word.endsWith('z') ||
|
||||
word.endsWith('ch') || word.endsWith('sh')) {
|
||||
return word + 'es';
|
||||
} else {
|
||||
return word + 's';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate similarity between two strings using Levenshtein distance
|
||||
*/
|
||||
private calculateSimilarity(str1: string, str2: string): number {
|
||||
const s1 = str1.toLowerCase();
|
||||
const s2 = str2.toLowerCase();
|
||||
|
||||
// Exact match
|
||||
if (s1 === s2) return 1.0;
|
||||
|
||||
// One is substring of the other
|
||||
if (s1.includes(s2) || s2.includes(s1)) {
|
||||
const ratio = Math.min(s1.length, s2.length) / Math.max(s1.length, s2.length);
|
||||
return Math.max(ResourceSimilarityService.CONFIDENCE_THRESHOLDS.MIN_SUBSTRING, ratio);
|
||||
}
|
||||
|
||||
// Calculate Levenshtein distance
|
||||
const distance = this.levenshteinDistance(s1, s2);
|
||||
const maxLength = Math.max(s1.length, s2.length);
|
||||
|
||||
// Convert distance to similarity
|
||||
let similarity = 1 - (distance / maxLength);
|
||||
|
||||
// Boost confidence for single character typos and transpositions in short words
|
||||
if (distance === 1 && maxLength <= 5) {
|
||||
similarity = Math.max(similarity, 0.75);
|
||||
} else if (distance === 2 && maxLength <= 5) {
|
||||
// Boost for transpositions (e.g., "flie" -> "file")
|
||||
similarity = Math.max(similarity, 0.72);
|
||||
}
|
||||
|
||||
return similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate Levenshtein distance between two strings
|
||||
*/
|
||||
private levenshteinDistance(str1: string, str2: string): number {
|
||||
const m = str1.length;
|
||||
const n = str2.length;
|
||||
const dp: number[][] = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
|
||||
|
||||
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
||||
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
||||
|
||||
for (let i = 1; i <= m; i++) {
|
||||
for (let j = 1; j <= n; j++) {
|
||||
if (str1[i - 1] === str2[j - 1]) {
|
||||
dp[i][j] = dp[i - 1][j - 1];
|
||||
} else {
|
||||
dp[i][j] = Math.min(
|
||||
dp[i - 1][j] + 1, // deletion
|
||||
dp[i][j - 1] + 1, // insertion
|
||||
dp[i - 1][j - 1] + 1 // substitution
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dp[m][n];
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a human-readable reason for the similarity
|
||||
* @param confidence - Similarity confidence score
|
||||
* @param invalid - The invalid resource string
|
||||
* @param valid - The valid resource string
|
||||
* @returns Human-readable explanation of the similarity
|
||||
*/
|
||||
private getSimilarityReason(confidence: number, invalid: string, valid: string): string {
|
||||
const { VERY_HIGH, HIGH, MEDIUM } = ResourceSimilarityService.CONFIDENCE_THRESHOLDS;
|
||||
|
||||
if (confidence >= VERY_HIGH) {
|
||||
return 'Almost exact match - likely a typo';
|
||||
} else if (confidence >= HIGH) {
|
||||
return 'Very similar - common variation';
|
||||
} else if (confidence >= MEDIUM) {
|
||||
return 'Similar resource name';
|
||||
} else if (invalid.includes(valid) || valid.includes(invalid)) {
|
||||
return 'Partial match';
|
||||
} else {
|
||||
return 'Possibly related resource';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear caches
|
||||
*/
|
||||
clearCache(): void {
|
||||
this.resourceCache.clear();
|
||||
this.suggestionCache.clear();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user