Files
n8n-mcp/src/database/schema.sql
czlonkowski cfd2325ca4 fix: add FTS5 search index to prevent 69% search failure rate (v2.18.5)
Fixes production search failures where 69% of user searches returned zero
results for critical nodes (webhook, merge, split batch) despite nodes
existing in database.

Root Cause:
- schema.sql missing nodes_fts FTS5 virtual table
- No validation to detect empty database or missing FTS5
- rebuild.ts used schema without search index
- Result: 9 of 13 searches failed in production

Changes:
1. Schema Updates (src/database/schema.sql):
   - Added nodes_fts FTS5 virtual table with full-text indexing
   - Added INSERT/UPDATE/DELETE triggers for auto-sync
   - Indexes: node_type, display_name, description, documentation, operations

2. Database Validation (src/scripts/rebuild.ts):
   - Added empty database detection (fails if zero nodes)
   - Added FTS5 existence and synchronization validation
   - Added searchability tests for critical nodes
   - Added minimum node count check (500+)

3. Runtime Health Checks (src/mcp/server.ts):
   - Database health validation on first access
   - Detects empty database with clear error
   - Detects missing FTS5 with actionable warning

4. Test Suite (53 new tests):
   - tests/integration/database/node-fts5-search.test.ts (14 tests)
   - tests/integration/database/empty-database.test.ts (14 tests)
   - tests/integration/ci/database-population.test.ts (25 tests)

5. Database Rebuild:
   - data/nodes.db rebuilt with FTS5 index
   - 535 nodes fully synchronized with FTS5

Impact:
-  All critical searches now work (webhook, merge, split, code, http)
-  FTS5 provides fast ranked search (< 100ms)
-  Clear error messages if database empty
-  CI validates committed database integrity
-  Runtime health checks detect issues immediately

Performance:
- FTS5 search: < 100ms for typical queries
- LIKE fallback: < 500ms (unchanged, still functional)

Testing: LIKE search investigation revealed it was perfectly functional,
only failed because database was empty. No changes needed.

Related: Issue #296 Part 2 (Part 1: v2.18.4 fixed adapter bypass)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-10 09:16:20 +02:00

147 lines
5.0 KiB
SQL

-- Ultra-simple schema for MVP
CREATE TABLE IF NOT EXISTS nodes (
node_type TEXT PRIMARY KEY,
package_name TEXT NOT NULL,
display_name TEXT NOT NULL,
description TEXT,
category TEXT,
development_style TEXT CHECK(development_style IN ('declarative', 'programmatic')),
is_ai_tool INTEGER DEFAULT 0,
is_trigger INTEGER DEFAULT 0,
is_webhook INTEGER DEFAULT 0,
is_versioned INTEGER DEFAULT 0,
version TEXT,
documentation TEXT,
properties_schema TEXT,
operations TEXT,
credentials_required TEXT,
outputs TEXT, -- JSON array of output definitions
output_names TEXT, -- JSON array of output names
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Minimal indexes for performance
CREATE INDEX IF NOT EXISTS idx_package ON nodes(package_name);
CREATE INDEX IF NOT EXISTS idx_ai_tool ON nodes(is_ai_tool);
CREATE INDEX IF NOT EXISTS idx_category ON nodes(category);
-- FTS5 full-text search index for nodes
CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(
node_type,
display_name,
description,
documentation,
operations,
content=nodes,
content_rowid=rowid
);
-- Triggers to keep FTS5 in sync with nodes table
CREATE TRIGGER IF NOT EXISTS nodes_fts_insert AFTER INSERT ON nodes
BEGIN
INSERT INTO nodes_fts(rowid, node_type, display_name, description, documentation, operations)
VALUES (new.rowid, new.node_type, new.display_name, new.description, new.documentation, new.operations);
END;
CREATE TRIGGER IF NOT EXISTS nodes_fts_update AFTER UPDATE ON nodes
BEGIN
UPDATE nodes_fts
SET node_type = new.node_type,
display_name = new.display_name,
description = new.description,
documentation = new.documentation,
operations = new.operations
WHERE rowid = new.rowid;
END;
CREATE TRIGGER IF NOT EXISTS nodes_fts_delete AFTER DELETE ON nodes
BEGIN
DELETE FROM nodes_fts WHERE rowid = old.rowid;
END;
-- Templates table for n8n workflow templates
CREATE TABLE IF NOT EXISTS templates (
id INTEGER PRIMARY KEY,
workflow_id INTEGER UNIQUE NOT NULL,
name TEXT NOT NULL,
description TEXT,
author_name TEXT,
author_username TEXT,
author_verified INTEGER DEFAULT 0,
nodes_used TEXT, -- JSON array of node types
workflow_json TEXT, -- Complete workflow JSON (deprecated, use workflow_json_compressed)
workflow_json_compressed TEXT, -- Compressed workflow JSON (base64 encoded gzip)
categories TEXT, -- JSON array of categories
views INTEGER DEFAULT 0,
created_at DATETIME,
updated_at DATETIME,
url TEXT,
scraped_at DATETIME DEFAULT CURRENT_TIMESTAMP,
metadata_json TEXT, -- Structured metadata from OpenAI (JSON)
metadata_generated_at DATETIME -- When metadata was generated
);
-- Templates indexes
CREATE INDEX IF NOT EXISTS idx_template_nodes ON templates(nodes_used);
CREATE INDEX IF NOT EXISTS idx_template_updated ON templates(updated_at);
CREATE INDEX IF NOT EXISTS idx_template_name ON templates(name);
CREATE INDEX IF NOT EXISTS idx_template_metadata ON templates(metadata_generated_at);
-- Pre-extracted node configurations from templates
-- This table stores the top node configurations from popular templates
-- Provides fast access to real-world configuration examples
CREATE TABLE IF NOT EXISTS template_node_configs (
id INTEGER PRIMARY KEY,
node_type TEXT NOT NULL,
template_id INTEGER NOT NULL,
template_name TEXT NOT NULL,
template_views INTEGER DEFAULT 0,
-- Node configuration (extracted from workflow)
node_name TEXT, -- Node name in workflow (e.g., "HTTP Request")
parameters_json TEXT NOT NULL, -- JSON: node.parameters
credentials_json TEXT, -- JSON: node.credentials (if present)
-- Pre-calculated metadata for filtering
has_credentials INTEGER DEFAULT 0,
has_expressions INTEGER DEFAULT 0, -- Contains {{...}} or $json/$node
complexity TEXT CHECK(complexity IN ('simple', 'medium', 'complex')),
use_cases TEXT, -- JSON array from template.metadata.use_cases
-- Pre-calculated ranking (1 = best, 2 = second best, etc.)
rank INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (template_id) REFERENCES templates(id) ON DELETE CASCADE
);
-- Indexes for fast queries
CREATE INDEX IF NOT EXISTS idx_config_node_type_rank
ON template_node_configs(node_type, rank);
CREATE INDEX IF NOT EXISTS idx_config_complexity
ON template_node_configs(node_type, complexity, rank);
CREATE INDEX IF NOT EXISTS idx_config_auth
ON template_node_configs(node_type, has_credentials, rank);
-- View for easy querying of top configs
CREATE VIEW IF NOT EXISTS ranked_node_configs AS
SELECT
node_type,
template_name,
template_views,
parameters_json,
credentials_json,
has_credentials,
has_expressions,
complexity,
use_cases,
rank
FROM template_node_configs
WHERE rank <= 5 -- Top 5 per node type
ORDER BY node_type, rank;
-- Note: Template FTS5 tables are created conditionally at runtime if FTS5 is supported
-- See template-repository.ts initializeFTS5() method
-- Node FTS5 table (nodes_fts) is created above during schema initialization