diff --git a/IMPLEMENTATION_GUIDE.md b/IMPLEMENTATION_GUIDE.md new file mode 100644 index 0000000..1a36cc5 --- /dev/null +++ b/IMPLEMENTATION_GUIDE.md @@ -0,0 +1,3491 @@ +# n8n-mcp MVP: Developer Implementation Guide + +**Version:** 1.0 +**Target:** 2.5 week MVP launch +**Audience:** Backend, Frontend, DevOps engineers +**Date:** 2025-10-11 + +--- + +## ๐Ÿ“‹ Table of Contents + +1. [Prerequisites](#prerequisites) +2. [Phase 0: Environment Setup (Day 0)](#phase-0-environment-setup) +3. [Phase 1: Backend Implementation (Days 1-4)](#phase-1-backend-implementation) +4. [Phase 2: Frontend Implementation (Days 5-9)](#phase-2-frontend-implementation) +5. [Phase 3: Testing & Launch (Days 10-12)](#phase-3-testing--launch) +6. [Troubleshooting](#troubleshooting) +7. [Rollback Procedures](#rollback-procedures) + +--- + +## Prerequisites + +### Development Environment + +**Required Tools:** +- [ ] Node.js 20+ LTS +- [ ] npm 10+ +- [ ] Docker & Docker Compose +- [ ] Git +- [ ] Code editor (VS Code recommended) +- [ ] curl / Postman for API testing + +**Optional but Recommended:** +- [ ] Docker Desktop (for local testing) +- [ ] GitHub CLI (`gh`) +- [ ] Supabase CLI (`npx supabase`) + +### Access & Accounts + +**Must Have:** +- [ ] GitHub account with access to `czlonkowski/n8n-mcp` repo +- [ ] Supabase account (free tier) +- [ ] Hetzner Cloud account +- [ ] Domain access to `n8n-mcp.com` DNS + +**Nice to Have:** +- [ ] Vercel account (for frontend hosting) +- [ ] Testing n8n instance with API key + +### Knowledge Prerequisites + +**Backend Developer:** +- TypeScript/Node.js +- REST APIs & HTTP servers +- PostgreSQL & SQL +- Docker basics +- Encryption (AES-256-GCM) + +**Frontend Developer:** +- React 19 & Next.js 15 +- TypeScript +- Supabase client SDK +- Server Components & Server Actions + +**DevOps:** +- Docker Compose +- Caddy/nginx basics +- DNS configuration +- SSL/TLS certificates + +--- + +## Phase 0: Environment Setup + +**Goal:** Get development environment ready +**Time:** 2-4 hours +**Assignee:** All team members + +### 0.1 Clone Repository + +```bash +# Clone n8n-mcp backend +git clone https://github.com/czlonkowski/n8n-mcp.git +cd n8n-mcp + +# Create feature branch +git checkout -b feature/multi-tenant-mvp + +# Install dependencies +npm install + +# Build to verify setup +npm run build +``` + +**Verification:** +```bash +npm run typecheck # Should pass +npm test # Existing tests should pass +``` + +### 0.2 Create Supabase Project + +**Steps:** +1. Go to https://supabase.com/dashboard +2. Click "New Project" +3. Fill in: + - Name: `n8n-mcp-production` + - Database Password: Generate strong password (save securely!) + - Region: Europe (Frankfurt) - closest to Hetzner + - Plan: Free tier +4. Wait for provisioning (~2 minutes) + +**Get Credentials:** +```bash +# From Project Settings > API +SUPABASE_URL=https://xxxxx.supabase.co +SUPABASE_ANON_KEY=eyJxxxxx # For frontend +SUPABASE_SERVICE_KEY=eyJxxxxx # For backend (bypasses RLS) +``` + +**Create `.env.local` file:** +```bash +# Backend .env.local +SUPABASE_URL=https://xxxxx.supabase.co +SUPABASE_SERVICE_KEY=eyJxxxxx +SESSION_SECRET=generate-random-32-char-string +NODE_ENV=development +MCP_MODE=http +PORT=3000 +ENABLE_MULTI_TENANT=true +``` + +### 0.3 Provision Hetzner Server (Optional for Local Dev) + +**For Production Deployment:** +1. Go to https://console.hetzner.cloud +2. Create new project: `n8n-mcp-production` +3. Add server: + - Type: CPX31 (4 vCPU, 8GB RAM) + - Location: Falkenstein, Germany + - Image: Ubuntu 22.04 LTS + - Add SSH key +4. Note server IP: `XXX.XXX.XXX.XXX` + +**Initial Server Setup:** +```bash +ssh root@XXX.XXX.XXX.XXX + +# Update system +apt update && apt upgrade -y + +# Install Docker +curl -fsSL https://get.docker.com -o get-docker.sh +sh get-docker.sh + +# Install Docker Compose +apt install docker-compose-plugin -y + +# Verify +docker --version +docker compose version +``` + +### 0.4 Configure DNS + +**Add DNS Records:** +``` +Type Name Value TTL +A api.n8n-mcp.com XXX.XXX.XXX.XXX 300 +A www.n8n-mcp.com (Vercel IP) 300 +``` + +**Verification:** +```bash +dig api.n8n-mcp.com +short # Should return server IP +``` + +--- + +## Phase 1: Backend Implementation + +**Goal:** Multi-tenant n8n-mcp service with API key auth +**Time:** 3-4 days +**Assignee:** Backend developer + +### Day 1: Database Schema & Supabase Setup + +#### 1.1 Deploy Database Schema + +**File:** `supabase/schema.sql` (create this file) + +```sql +-- Enable UUID extension +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; + +-- Users table (extends auth.users) +CREATE TABLE public.users ( + id UUID PRIMARY KEY REFERENCES auth.users(id) ON DELETE CASCADE, + email TEXT NOT NULL UNIQUE, + full_name TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +-- API Keys table (n8n-mcp keys, not n8n instance keys!) +CREATE TABLE public.api_keys ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES public.users(id) ON DELETE CASCADE, + key_hash TEXT NOT NULL UNIQUE, + key_prefix TEXT NOT NULL, -- e.g., "nmcp_abc123..." + name TEXT NOT NULL, -- User-friendly name + last_used_at TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT NOW(), + is_active BOOLEAN DEFAULT TRUE +); + +-- n8n Instance Configuration (user's actual n8n credentials) +CREATE TABLE public.n8n_instances ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES public.users(id) ON DELETE CASCADE, + instance_url TEXT NOT NULL, + api_key_encrypted TEXT NOT NULL, -- Encrypted n8n API key + is_active BOOLEAN DEFAULT TRUE, + last_validated_at TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT unique_user_instance UNIQUE(user_id, instance_url) +); + +-- Usage tracking (basic for MVP) +CREATE TABLE public.usage_logs ( + id BIGSERIAL PRIMARY KEY, + user_id UUID NOT NULL REFERENCES public.users(id) ON DELETE CASCADE, + api_key_id UUID REFERENCES public.api_keys(id) ON DELETE SET NULL, + tool_name TEXT NOT NULL, + status TEXT NOT NULL CHECK (status IN ('success', 'error', 'rate_limited')), + error_message TEXT, + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- Indexes for performance +CREATE INDEX idx_api_keys_user_id ON public.api_keys(user_id); +CREATE INDEX idx_api_keys_key_hash ON public.api_keys(key_hash); +CREATE INDEX idx_api_keys_active ON public.api_keys(is_active) WHERE is_active = true; +CREATE INDEX idx_n8n_instances_user_id ON public.n8n_instances(user_id); +CREATE INDEX idx_usage_logs_user_id ON public.usage_logs(user_id); +CREATE INDEX idx_usage_logs_created_at ON public.usage_logs(created_at DESC); + +-- Enable Row Level Security +ALTER TABLE public.users ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.api_keys ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.n8n_instances ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.usage_logs ENABLE ROW LEVEL SECURITY; + +-- RLS Policies + +-- Users can view own data +CREATE POLICY "Users can view own data" ON public.users + FOR SELECT USING (auth.uid() = id); + +CREATE POLICY "Users can update own data" ON public.users + FOR UPDATE USING (auth.uid() = id); + +-- Users can manage own API keys +CREATE POLICY "Users can view own API keys" ON public.api_keys + FOR SELECT USING (auth.uid() = user_id); + +CREATE POLICY "Users can insert own API keys" ON public.api_keys + FOR INSERT WITH CHECK (auth.uid() = user_id); + +CREATE POLICY "Users can update own API keys" ON public.api_keys + FOR UPDATE USING (auth.uid() = user_id); + +CREATE POLICY "Users can delete own API keys" ON public.api_keys + FOR DELETE USING (auth.uid() = user_id); + +-- Users can manage own n8n instances +CREATE POLICY "Users can view own n8n config" ON public.n8n_instances + FOR SELECT USING (auth.uid() = user_id); + +CREATE POLICY "Users can insert own n8n config" ON public.n8n_instances + FOR INSERT WITH CHECK (auth.uid() = user_id); + +CREATE POLICY "Users can update own n8n config" ON public.n8n_instances + FOR UPDATE USING (auth.uid() = user_id); + +CREATE POLICY "Users can delete own n8n config" ON public.n8n_instances + FOR DELETE USING (auth.uid() = user_id); + +-- Users can view own usage logs +CREATE POLICY "Users can view own usage" ON public.usage_logs + FOR SELECT USING (auth.uid() = user_id); + +-- Service role can do everything (for backend API key validation) +-- This is automatic with service_role key + +-- Function to auto-create user record on signup +CREATE OR REPLACE FUNCTION public.handle_new_user() +RETURNS TRIGGER AS $$ +BEGIN + INSERT INTO public.users (id, email, full_name) + VALUES (NEW.id, NEW.email, NEW.raw_user_meta_data->>'full_name'); + RETURN NEW; +END; +$$ LANGUAGE plpgsql SECURITY DEFINER; + +-- Trigger to create user on auth signup +CREATE TRIGGER on_auth_user_created + AFTER INSERT ON auth.users + FOR EACH ROW EXECUTE FUNCTION public.handle_new_user(); +``` + +**Deploy Schema:** + +**Option A: Supabase Dashboard** +1. Go to SQL Editor in Supabase dashboard +2. Paste entire schema +3. Click "Run" + +**Option B: Supabase CLI** +```bash +npx supabase db push +``` + +**Verification:** +```sql +-- Run in SQL Editor +SELECT table_name +FROM information_schema.tables +WHERE table_schema = 'public' +ORDER BY table_name; + +-- Should see: users, api_keys, n8n_instances, usage_logs +``` + +#### 1.2 Configure Supabase Auth + +**Steps:** +1. Go to Authentication > Settings +2. Enable Email provider (already enabled) +3. Configure Email Templates: + - Confirmation: Customize subject/body + - Magic Link: Disable (not using for MVP) +4. Site URL: `https://www.n8n-mcp.com` +5. Redirect URLs: Add `https://www.n8n-mcp.com/auth/callback` + +**Verification:** +- Send test signup email from dashboard +- Check email arrives and link works + +--- + +### Day 2-3: Multi-Tenant Backend Implementation + +#### 2.1 Create Encryption Service + +**File:** `src/services/encryption.ts` + +```typescript +import crypto from 'crypto'; + +const ALGORITHM = 'aes-256-gcm'; +const IV_LENGTH = 16; +const SALT_LENGTH = 64; +const TAG_LENGTH = 16; +const KEY_LENGTH = 32; + +/** + * Derives an encryption key from master secret + user ID + * This ensures each user has a unique encryption key + */ +function deriveKey(userId: string): Buffer { + const masterKey = process.env.MASTER_ENCRYPTION_KEY; + if (!masterKey) { + throw new Error('MASTER_ENCRYPTION_KEY not set'); + } + + return crypto.pbkdf2Sync( + masterKey, + userId, + 100000, + KEY_LENGTH, + 'sha512' + ); +} + +/** + * Encrypts data using AES-256-GCM + * Format: salt + iv + tag + encrypted data + */ +export function encrypt(plaintext: string, userId: string): string { + const key = deriveKey(userId); + const iv = crypto.randomBytes(IV_LENGTH); + const cipher = crypto.createCipheriv(ALGORITHM, key, iv); + + let encrypted = cipher.update(plaintext, 'utf8', 'hex'); + encrypted += cipher.final('hex'); + const tag = cipher.getAuthTag(); + + // Combine: iv + tag + encrypted + const result = Buffer.concat([ + iv, + tag, + Buffer.from(encrypted, 'hex') + ]); + + return result.toString('base64'); +} + +/** + * Decrypts data encrypted with encrypt() + */ +export function decrypt(ciphertext: string, userId: string): string { + const key = deriveKey(userId); + const buffer = Buffer.from(ciphertext, 'base64'); + + // Extract components + const iv = buffer.subarray(0, IV_LENGTH); + const tag = buffer.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH); + const encrypted = buffer.subarray(IV_LENGTH + TAG_LENGTH); + + const decipher = crypto.createDecipheriv(ALGORITHM, key, iv); + decipher.setAuthTag(tag); + + let decrypted = decipher.update(encrypted); + decrypted = Buffer.concat([decrypted, decipher.final()]); + + return decrypted.toString('utf8'); +} +``` + +**Test:** +```typescript +// Create test file: src/services/encryption.test.ts +import { encrypt, decrypt } from './encryption'; + +describe('Encryption Service', () => { + beforeAll(() => { + process.env.MASTER_ENCRYPTION_KEY = 'test-master-key-32-chars-long!'; + }); + + test('should encrypt and decrypt correctly', () => { + const userId = 'test-user-id'; + const plaintext = 'my-n8n-api-key-secret'; + + const encrypted = encrypt(plaintext, userId); + const decrypted = decrypt(encrypted, userId); + + expect(decrypted).toBe(plaintext); + expect(encrypted).not.toBe(plaintext); + }); + + test('should fail with wrong user ID', () => { + const userId1 = 'user-1'; + const userId2 = 'user-2'; + const plaintext = 'secret'; + + const encrypted = encrypt(plaintext, userId1); + + expect(() => decrypt(encrypted, userId2)).toThrow(); + }); +}); +``` + +Run test: +```bash +npm test -- src/services/encryption.test.ts +``` + +#### 2.2 Create Supabase Client Service + +**File:** `src/services/database.ts` + +```typescript +import { createClient } from '@supabase/supabase-js'; + +// Singleton pattern for Supabase client +let supabaseClient: ReturnType | null = null; + +export function getSupabaseClient() { + if (supabaseClient) return supabaseClient; + + const supabaseUrl = process.env.SUPABASE_URL; + const supabaseKey = process.env.SUPABASE_SERVICE_KEY; + + if (!supabaseUrl || !supabaseKey) { + throw new Error('SUPABASE_URL and SUPABASE_SERVICE_KEY must be set'); + } + + supabaseClient = createClient(supabaseUrl, supabaseKey, { + auth: { + persistSession: false, // Server-side, no sessions + autoRefreshToken: false + }, + db: { + schema: 'public' + } + }); + + return supabaseClient; +} + +// Type definitions for database +export interface User { + id: string; + email: string; + full_name: string | null; + created_at: string; + updated_at: string; +} + +export interface ApiKey { + id: string; + user_id: string; + key_hash: string; + key_prefix: string; + name: string; + last_used_at: string | null; + created_at: string; + is_active: boolean; +} + +export interface N8nInstance { + id: string; + user_id: string; + instance_url: string; + api_key_encrypted: string; + is_active: boolean; + last_validated_at: string | null; + created_at: string; + updated_at: string; +} + +export interface UsageLog { + id: number; + user_id: string; + api_key_id: string | null; + tool_name: string; + status: 'success' | 'error' | 'rate_limited'; + error_message: string | null; + created_at: string; +} +``` + +#### 2.3 Create Rate Limiter Service + +**File:** `src/services/rate-limiter.ts` + +```typescript +interface RateLimitCounter { + count: number; + windowStart: number; +} + +export class RateLimiter { + private counters = new Map(); + private cleanupInterval: NodeJS.Timeout; + + constructor( + private limit: number = 100, // requests per window + private windowMs: number = 60000 // 1 minute + ) { + // Cleanup old counters every 5 minutes + this.cleanupInterval = setInterval(() => this.cleanup(), 300000); + } + + /** + * Check if request is within rate limit + * @param key Unique identifier (API key) + * @returns true if allowed, false if rate limited + */ + check(key: string): boolean { + const now = Date.now(); + let counter = this.counters.get(key); + + // Create new window if doesn't exist or expired + if (!counter || counter.windowStart < now - this.windowMs) { + counter = { + count: 0, + windowStart: now + }; + } + + counter.count++; + this.counters.set(key, counter); + + return counter.count <= this.limit; + } + + /** + * Get remaining requests for a key + */ + remaining(key: string): number { + const counter = this.counters.get(key); + if (!counter) return this.limit; + + const now = Date.now(); + if (counter.windowStart < now - this.windowMs) { + return this.limit; + } + + return Math.max(0, this.limit - counter.count); + } + + /** + * Reset rate limit for a key + */ + reset(key: string): void { + this.counters.delete(key); + } + + /** + * Cleanup expired counters + */ + private cleanup(): void { + const now = Date.now(); + for (const [key, counter] of this.counters.entries()) { + if (counter.windowStart < now - this.windowMs * 2) { + this.counters.delete(key); + } + } + } + + /** + * Shutdown cleanup interval + */ + destroy(): void { + clearInterval(this.cleanupInterval); + } +} +``` + +**Test:** +```typescript +// src/services/rate-limiter.test.ts +import { RateLimiter } from './rate-limiter'; + +describe('RateLimiter', () => { + test('should allow requests within limit', () => { + const limiter = new RateLimiter(3, 1000); + const key = 'test-key'; + + expect(limiter.check(key)).toBe(true); // 1 + expect(limiter.check(key)).toBe(true); // 2 + expect(limiter.check(key)).toBe(true); // 3 + expect(limiter.check(key)).toBe(false); // 4 - exceeded + }); + + test('should reset after window expires', async () => { + const limiter = new RateLimiter(2, 100); // 100ms window + const key = 'test-key'; + + limiter.check(key); // 1 + limiter.check(key); // 2 + expect(limiter.check(key)).toBe(false); // 3 - exceeded + + // Wait for window to expire + await new Promise(resolve => setTimeout(resolve, 150)); + + expect(limiter.check(key)).toBe(true); // New window + }); +}); +``` + +#### 2.4 Create Session Manager Service + +**File:** `src/services/session-manager.ts` + +```typescript +import fs from 'fs'; +import path from 'path'; +import { InstanceContext } from '../types'; + +export interface SessionData { + userId: string; + context: InstanceContext; + created: number; + lastAccess: number; + expires: number; +} + +export interface SessionOptions { + maxSessions: number; + ttl: number; // milliseconds + persistPath?: string; +} + +export class SessionManager { + private sessions = new Map(); + private backupInterval: NodeJS.Timeout | null = null; + + constructor(private options: SessionOptions) { + this.loadFromDisk(); + + // Backup to disk every minute if persistPath provided + if (options.persistPath) { + this.backupInterval = setInterval(() => { + this.backupToDisk(); + }, 60000); + } + + // Cleanup expired sessions every 5 minutes + setInterval(() => this.cleanup(), 300000); + } + + /** + * Get session by ID + */ + get(sessionId: string): SessionData | null { + const session = this.sessions.get(sessionId); + + if (!session) return null; + + // Check if expired + if (session.expires < Date.now()) { + this.sessions.delete(sessionId); + return null; + } + + // Update last access + session.lastAccess = Date.now(); + session.expires = Date.now() + this.options.ttl; + + return session; + } + + /** + * Create new session + */ + create(userId: string, context: InstanceContext): string { + // Enforce max sessions + if (this.sessions.size >= this.options.maxSessions) { + this.evictOldest(); + } + + const sessionId = this.generateSessionId(); + const now = Date.now(); + + this.sessions.set(sessionId, { + userId, + context, + created: now, + lastAccess: now, + expires: now + this.options.ttl + }); + + return sessionId; + } + + /** + * Delete session + */ + delete(sessionId: string): void { + this.sessions.delete(sessionId); + } + + /** + * Get all sessions for a user + */ + getByUser(userId: string): SessionData[] { + const result: SessionData[] = []; + for (const [_, session] of this.sessions) { + if (session.userId === userId && session.expires > Date.now()) { + result.push(session); + } + } + return result; + } + + /** + * Generate unique session ID + */ + private generateSessionId(): string { + return `sess_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; + } + + /** + * Evict oldest session + */ + private evictOldest(): void { + let oldestId: string | null = null; + let oldestTime = Infinity; + + for (const [id, session] of this.sessions) { + if (session.lastAccess < oldestTime) { + oldestTime = session.lastAccess; + oldestId = id; + } + } + + if (oldestId) { + this.sessions.delete(oldestId); + } + } + + /** + * Cleanup expired sessions + */ + private cleanup(): void { + const now = Date.now(); + for (const [id, session] of this.sessions) { + if (session.expires < now) { + this.sessions.delete(id); + } + } + } + + /** + * Backup sessions to disk + */ + private backupToDisk(): void { + if (!this.options.persistPath) return; + + try { + const dirPath = this.options.persistPath; + if (!fs.existsSync(dirPath)) { + fs.mkdirSync(dirPath, { recursive: true }); + } + + const filePath = path.join(dirPath, 'sessions.json'); + const data = JSON.stringify(Array.from(this.sessions.entries())); + + fs.writeFileSync(filePath, data, 'utf8'); + } catch (error) { + console.error('Failed to backup sessions:', error); + } + } + + /** + * Load sessions from disk + */ + private loadFromDisk(): void { + if (!this.options.persistPath) return; + + try { + const filePath = path.join(this.options.persistPath, 'sessions.json'); + + if (fs.existsSync(filePath)) { + const data = fs.readFileSync(filePath, 'utf8'); + const entries = JSON.parse(data); + + // Only restore non-expired sessions + const now = Date.now(); + for (const [id, session] of entries) { + if (session.expires > now) { + this.sessions.set(id, session); + } + } + } + } catch (error) { + console.error('Failed to load sessions:', error); + } + } + + /** + * Shutdown manager + */ + destroy(): void { + if (this.backupInterval) { + clearInterval(this.backupInterval); + } + this.backupToDisk(); + } +} +``` + +#### 2.5 Create API Key Validator Service + +**File:** `src/services/api-key-validator.ts` + +```typescript +import bcrypt from 'bcryptjs'; +import { getSupabaseClient } from './database'; +import { decrypt } from './encryption'; +import { InstanceContext } from '../types'; + +export interface UserContext { + userId: string; + n8nUrl: string; + n8nApiKey: string; +} + +// In-memory cache for validated API keys (5 minute TTL) +interface CacheEntry { + context: UserContext; + expires: number; +} + +const apiKeyCache = new Map(); + +// Cleanup cache every 5 minutes +setInterval(() => { + const now = Date.now(); + for (const [key, entry] of apiKeyCache.entries()) { + if (entry.expires < now) { + apiKeyCache.delete(key); + } + } +}, 300000); + +/** + * Validates n8n-mcp API key and returns user context + * This performs the two-tier API key lookup: + * 1. Validate n8n-mcp API key (nmcp_xxx) + * 2. Fetch and decrypt user's n8n instance credentials + */ +export async function validateApiKey(apiKey: string): Promise { + // Check cache first + const cached = apiKeyCache.get(apiKey); + if (cached && cached.expires > Date.now()) { + return cached.context; + } + + const supabase = getSupabaseClient(); + + // Hash the provided API key + const keyHash = await bcrypt.hash(apiKey, 10); + + // Look up API key in database + const { data, error } = await supabase + .from('api_keys') + .select(` + id, + user_id, + is_active, + n8n_instances!inner ( + instance_url, + api_key_encrypted, + is_active + ) + `) + .eq('key_hash', keyHash) + .eq('is_active', true) + .single(); + + if (error || !data) { + throw new Error('Invalid API key'); + } + + // Check if n8n instance is active + const n8nInstance = Array.isArray(data.n8n_instances) + ? data.n8n_instances[0] + : data.n8n_instances; + + if (!n8nInstance || !n8nInstance.is_active) { + throw new Error('n8n instance not configured or inactive'); + } + + // Decrypt n8n API key (server-side only!) + let n8nApiKey: string; + try { + n8nApiKey = decrypt(n8nInstance.api_key_encrypted, data.user_id); + } catch (error) { + throw new Error('Failed to decrypt n8n credentials'); + } + + // Update last_used_at + await supabase + .from('api_keys') + .update({ last_used_at: new Date().toISOString() }) + .eq('id', data.id); + + // Create user context + const context: UserContext = { + userId: data.user_id, + n8nUrl: n8nInstance.instance_url, + n8nApiKey + }; + + // Cache for 5 minutes + apiKeyCache.set(apiKey, { + context, + expires: Date.now() + 300000 + }); + + return context; +} + +/** + * Clear cache for a specific API key + */ +export function clearApiKeyCache(apiKey: string): void { + apiKeyCache.delete(apiKey); +} + +/** + * Clear all cache + */ +export function clearAllCache(): void { + apiKeyCache.clear(); +} +``` + +#### 2.6 Modify HTTP Server for Multi-Tenant + +**File:** `src/http-server-single-session.ts` (modifications) + +```typescript +// Add these imports at the top +import { validateApiKey } from './services/api-key-validator'; +import { RateLimiter } from './services/rate-limiter'; +import { SessionManager } from './services/session-manager'; +import { getSupabaseClient } from './services/database'; + +// Initialize services (add after existing imports) +const rateLimiter = new RateLimiter(100, 60000); // 100 req/min +const sessionManager = new SessionManager({ + maxSessions: 1000, + ttl: 3600000, // 1 hour + persistPath: process.env.SESSION_PERSIST_PATH || './sessions' +}); + +// Add new method to HTTPServer class +private async handleMultiTenantRequest( + req: Request +): Promise { + // Extract API key from Authorization header + const authHeader = req.headers.get('Authorization'); + if (!authHeader || !authHeader.startsWith('Bearer ')) { + return new Response('Missing or invalid Authorization header', { + status: 401, + headers: { 'Content-Type': 'text/plain' } + }); + } + + const apiKey = authHeader.substring(7); // Remove 'Bearer ' + + // Check rate limit + if (!rateLimiter.check(apiKey)) { + // Log rate limit event + try { + const supabase = getSupabaseClient(); + await supabase.from('usage_logs').insert({ + user_id: 'unknown', // We don't know user yet + tool_name: 'rate_limit', + status: 'rate_limited' + }); + } catch (error) { + console.error('Failed to log rate limit:', error); + } + + return new Response('Rate limit exceeded', { + status: 429, + headers: { + 'Content-Type': 'text/plain', + 'X-RateLimit-Limit': '100', + 'X-RateLimit-Remaining': '0', + 'Retry-After': '60' + } + }); + } + + // Validate API key and get user context + let userContext; + try { + userContext = await validateApiKey(apiKey); + } catch (error) { + return new Response('Unauthorized', { + status: 401, + headers: { 'Content-Type': 'text/plain' } + }); + } + + // Create InstanceContext (existing pattern!) + const instanceContext: InstanceContext = { + n8nApiUrl: userContext.n8nUrl, + n8nApiKey: userContext.n8nApiKey + }; + + // Handle MCP request with user's context + try { + const response = await this.handleMCPRequest(req, instanceContext); + + // Log successful usage + const supabase = getSupabaseClient(); + await supabase.from('usage_logs').insert({ + user_id: userContext.userId, + tool_name: this.extractToolName(req), + status: 'success' + }); + + return response; + } catch (error) { + // Log error + const supabase = getSupabaseClient(); + await supabase.from('usage_logs').insert({ + user_id: userContext.userId, + tool_name: this.extractToolName(req), + status: 'error', + error_message: error instanceof Error ? error.message : 'Unknown error' + }); + + throw error; + } +} + +// Helper method to extract tool name from request +private extractToolName(req: Request): string { + try { + const url = new URL(req.url); + return url.pathname.split('/').pop() || 'unknown'; + } catch { + return 'unknown'; + } +} + +// Modify existing handle() method to check for multi-tenant mode +async handle(req: Request): Promise { + const enableMultiTenant = process.env.ENABLE_MULTI_TENANT === 'true'; + + if (enableMultiTenant) { + return this.handleMultiTenantRequest(req); + } else { + // Existing single-tenant logic + return this.handleMCPRequest(req, this.defaultContext); + } +} +``` + +**Add to package.json dependencies:** +```json +{ + "dependencies": { + "@supabase/supabase-js": "^2.39.0", + "bcryptjs": "^2.4.3" + }, + "devDependencies": { + "@types/bcryptjs": "^2.4.6" + } +} +``` + +Install dependencies: +```bash +npm install @supabase/supabase-js bcryptjs +npm install -D @types/bcryptjs +``` + +--- + +### Day 4: Docker & Deployment Setup + +#### 4.1 Create Production Docker Compose + +**File:** `docker-compose.prod.yml` + +```yaml +version: '3.8' + +services: + caddy: + image: caddy:2-alpine + container_name: n8n-mcp-caddy + restart: always + ports: + - "80:80" + - "443:443" + volumes: + - ./Caddyfile:/etc/caddy/Caddyfile:ro + - caddy_data:/data + - caddy_config:/config + networks: + - n8n-mcp-network + + n8n-mcp: + image: ghcr.io/czlonkowski/n8n-mcp:latest + container_name: n8n-mcp-app + restart: always + environment: + - SUPABASE_URL=${SUPABASE_URL} + - SUPABASE_SERVICE_KEY=${SUPABASE_SERVICE_KEY} + - MASTER_ENCRYPTION_KEY=${MASTER_ENCRYPTION_KEY} + - SESSION_SECRET=${SESSION_SECRET} + - SESSION_PERSIST_PATH=/app/sessions + - NODE_ENV=production + - MCP_MODE=http + - PORT=3000 + - ENABLE_MULTI_TENANT=true + - RATE_LIMIT_REQUESTS=100 + volumes: + - ./data/nodes.db:/app/data/nodes.db:ro + - session_data:/app/sessions + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/health"] + interval: 30s + timeout: 3s + retries: 3 + start_period: 10s + networks: + - n8n-mcp-network + +volumes: + caddy_data: + driver: local + caddy_config: + driver: local + session_data: + driver: local + +networks: + n8n-mcp-network: + driver: bridge +``` + +#### 4.2 Create Caddyfile + +**File:** `Caddyfile` + +``` +# Caddy configuration for n8n-mcp +{ + # Global options + email admin@n8n-mcp.com +} + +api.n8n-mcp.com { + # Reverse proxy to n8n-mcp container + reverse_proxy n8n-mcp:3000 { + # Health check + health_uri /health + health_interval 30s + health_timeout 5s + + # Headers + header_up Host {host} + header_up X-Real-IP {remote} + header_up X-Forwarded-For {remote} + header_up X-Forwarded-Proto {scheme} + } + + # Global rate limiting (per IP) + rate_limit { + zone dynamic { + key {remote_host} + events 100 + window 1m + } + } + + # Logging + log { + output file /var/log/caddy/access.log { + roll_size 100mb + roll_keep 5 + } + format json + } + + # Error pages + handle_errors { + respond "{err.status_code} {err.status_text}" + } +} +``` + +#### 4.3 Create Dockerfile (if not exists) + +**File:** `Dockerfile` + +```dockerfile +# Build stage +FROM node:20-alpine AS builder + +WORKDIR /app + +# Copy package files +COPY package*.json ./ +COPY tsconfig.json ./ + +# Install dependencies +RUN npm ci + +# Copy source +COPY src ./src +COPY data ./data + +# Build +RUN npm run build + +# Production stage +FROM node:20-alpine + +WORKDIR /app + +# Install curl for healthcheck +RUN apk add --no-cache curl + +# Copy package files +COPY package*.json ./ + +# Install production dependencies only +RUN npm ci --omit=dev + +# Copy built files +COPY --from=builder /app/dist ./dist +COPY --from=builder /app/data ./data + +# Create session directory +RUN mkdir -p /app/sessions && chown -R node:node /app/sessions + +# Use non-root user +USER node + +EXPOSE 3000 + +CMD ["node", "dist/index.js"] +``` + +#### 4.4 Create Deployment Script + +**File:** `scripts/deploy.sh` + +```bash +#!/bin/bash +set -e + +echo "๐Ÿš€ Deploying n8n-mcp to production..." + +# Build Docker image +echo "๐Ÿ“ฆ Building Docker image..." +docker build -t ghcr.io/czlonkowski/n8n-mcp:latest . + +# Push to registry (optional) +# docker push ghcr.io/czlonkowski/n8n-mcp:latest + +# Pull latest image on server +echo "โฌ‡๏ธ Pulling latest image..." +docker compose -f docker-compose.prod.yml pull + +# Stop containers +echo "๐Ÿ›‘ Stopping containers..." +docker compose -f docker-compose.prod.yml down + +# Start containers +echo "โ–ถ๏ธ Starting containers..." +docker compose -f docker-compose.prod.yml up -d + +# Wait for health check +echo "๐Ÿฅ Waiting for health check..." +sleep 10 + +# Verify +echo "โœ… Verifying deployment..." +curl -f https://api.n8n-mcp.com/health || { + echo "โŒ Health check failed!" + docker compose -f docker-compose.prod.yml logs n8n-mcp + exit 1 +} + +echo "โœ… Deployment successful!" +``` + +Make executable: +```bash +chmod +x scripts/deploy.sh +``` + +#### 4.5 Testing Multi-Tenant Locally + +**Create test script:** `scripts/test-multi-tenant.sh` + +```bash +#!/bin/bash + +# Test multi-tenant API key authentication + +API_URL="http://localhost:3000/mcp" +API_KEY="test-key-replace-with-real-key" + +# Test 1: Health check (no auth needed) +echo "Test 1: Health check..." +curl -s http://localhost:3000/health +echo "" + +# Test 2: Request without auth (should fail) +echo "Test 2: No auth (should fail)..." +curl -s -w "\nHTTP Status: %{http_code}\n" \ + -X POST $API_URL \ + -H "Content-Type: application/json" +echo "" + +# Test 3: Request with invalid key (should fail) +echo "Test 3: Invalid key (should fail)..." +curl -s -w "\nHTTP Status: %{http_code}\n" \ + -X POST $API_URL \ + -H "Authorization: Bearer invalid-key" \ + -H "Content-Type: application/json" +echo "" + +# Test 4: Valid request (should succeed) +echo "Test 4: Valid key (should succeed)..." +curl -s -w "\nHTTP Status: %{http_code}\n" \ + -X POST $API_URL \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/list" + }' +echo "" + +# Test 5: Rate limiting (send 101 requests) +echo "Test 5: Rate limiting (101 requests)..." +for i in {1..101}; do + STATUS=$(curl -s -o /dev/null -w "%{http_code}" \ + -X POST $API_URL \ + -H "Authorization: Bearer $API_KEY" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}') + + if [ "$STATUS" == "429" ]; then + echo "โœ… Rate limited at request $i" + break + fi +done +``` + +--- + +## Phase 2: Frontend Implementation + +**Goal:** User dashboard for signup, API key management, n8n config +**Time:** 5 days +**Assignee:** Frontend developer + +### Day 5-6: Authentication & Setup + +#### 5.1 Setup Supabase in Next.js + +**Install dependencies:** +```bash +cd ../n8n-mcp-landing +npm install @supabase/ssr @supabase/supabase-js +``` + +**Create environment file:** `.env.local` +```bash +NEXT_PUBLIC_SUPABASE_URL=https://xxxxx.supabase.co +NEXT_PUBLIC_SUPABASE_ANON_KEY=eyJxxxxx +``` + +#### 5.2 Create Supabase Client Utils + +**File:** `src/lib/supabase/client.ts` + +```typescript +import { createBrowserClient } from '@supabase/ssr'; + +export function createClient() { + return createBrowserClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY! + ); +} +``` + +**File:** `src/lib/supabase/server.ts` + +```typescript +import { createServerClient, type CookieOptions } from '@supabase/ssr'; +import { cookies } from 'next/headers'; + +export async function createClient() { + const cookieStore = await cookies(); + + return createServerClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!, + { + cookies: { + get(name: string) { + return cookieStore.get(name)?.value; + }, + set(name: string, value: string, options: CookieOptions) { + try { + cookieStore.set({ name, value, ...options }); + } catch (error) { + // Handle error + } + }, + remove(name: string, options: CookieOptions) { + try { + cookieStore.set({ name, value: '', ...options }); + } catch (error) { + // Handle error + } + }, + }, + } + ); +} +``` + +#### 5.3 Create Middleware for Auth Protection + +**File:** `src/middleware.ts` + +```typescript +import { createServerClient, type CookieOptions } from '@supabase/ssr'; +import { NextResponse, type NextRequest } from 'next/server'; + +export async function middleware(request: NextRequest) { + let response = NextResponse.next({ + request: { + headers: request.headers, + }, + }); + + const supabase = createServerClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!, + { + cookies: { + get(name: string) { + return request.cookies.get(name)?.value; + }, + set(name: string, value: string, options: CookieOptions) { + request.cookies.set({ + name, + value, + ...options, + }); + response = NextResponse.next({ + request: { + headers: request.headers, + }, + }); + response.cookies.set({ + name, + value, + ...options, + }); + }, + remove(name: string, options: CookieOptions) { + request.cookies.set({ + name, + value: '', + ...options, + }); + response = NextResponse.next({ + request: { + headers: request.headers, + }, + }); + response.cookies.set({ + name, + value: '', + ...options, + }); + }, + }, + } + ); + + const { + data: { user }, + } = await supabase.auth.getUser(); + + // Protect dashboard routes + if (request.nextUrl.pathname.startsWith('/dashboard') && !user) { + return NextResponse.redirect(new URL('/login', request.url)); + } + + // Redirect to dashboard if already logged in + if ((request.nextUrl.pathname === '/login' || request.nextUrl.pathname === '/signup') && user) { + return NextResponse.redirect(new URL('/dashboard', request.url)); + } + + return response; +} + +export const config = { + matcher: ['/dashboard/:path*', '/login', '/signup'], +}; +``` + +#### 5.4 Create Authentication Pages + +**File:** `src/app/(auth)/signup/page.tsx` + +```typescript +'use client'; + +import { useState } from 'react'; +import { useRouter } from 'next/navigation'; +import { createClient } from '@/lib/supabase/client'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; + +export default function SignupPage() { + const router = useRouter(); + const [email, setEmail] = useState(''); + const [password, setPassword] = useState(''); + const [fullName, setFullName] = useState(''); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + const [message, setMessage] = useState(''); + + async function handleSignup(e: React.FormEvent) { + e.preventDefault(); + setLoading(true); + setError(''); + setMessage(''); + + const supabase = createClient(); + + const { error } = await supabase.auth.signUp({ + email, + password, + options: { + data: { + full_name: fullName, + }, + emailRedirectTo: `${location.origin}/auth/callback`, + }, + }); + + if (error) { + setError(error.message); + } else { + setMessage('Check your email for the confirmation link!'); + } + + setLoading(false); + } + + return ( +
+
+
+

Sign up for n8n-mcp

+

+ Join 471 users already building AI workflows +

+
+ +
+
+ + setFullName(e.target.value)} + /> +
+ +
+ + setEmail(e.target.value)} + /> +
+ +
+ + setPassword(e.target.value)} + /> +

+ Must be at least 8 characters +

+
+ + {error && ( +
+ {error} +
+ )} + + {message && ( +
+ {message} +
+ )} + + +
+ +
+ Already have an account?{' '} + + Log in + +
+
+
+ ); +} +``` + +**File:** `src/app/(auth)/login/page.tsx` + +```typescript +'use client'; + +import { useState } from 'react'; +import { useRouter } from 'next/navigation'; +import { createClient } from '@/lib/supabase/client'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; + +export default function LoginPage() { + const router = useRouter(); + const [email, setEmail] = useState(''); + const [password, setPassword] = useState(''); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(''); + + async function handleLogin(e: React.FormEvent) { + e.preventDefault(); + setLoading(true); + setError(''); + + const supabase = createClient(); + + const { error } = await supabase.auth.signInWithPassword({ + email, + password, + }); + + if (error) { + setError(error.message); + setLoading(false); + } else { + router.push('/dashboard'); + } + } + + return ( +
+
+
+

Welcome back

+

+ Log in to access your n8n-mcp dashboard +

+
+ +
+
+ + setEmail(e.target.value)} + /> +
+ +
+ + setPassword(e.target.value)} + /> +
+ + {error && ( +
+ {error} +
+ )} + + +
+ +
+ Don't have an account?{' '} + + Sign up + +
+
+
+ ); +} +``` + +**File:** `src/app/auth/callback/route.ts` + +```typescript +import { createClient } from '@/lib/supabase/server'; +import { NextResponse } from 'next/server'; + +export async function GET(request: Request) { + const { searchParams, origin } = new URL(request.url); + const code = searchParams.get('code'); + const next = searchParams.get('next') ?? '/dashboard'; + + if (code) { + const supabase = await createClient(); + const { error } = await supabase.auth.exchangeCodeForSession(code); + if (!error) { + return NextResponse.redirect(`${origin}${next}`); + } + } + + return NextResponse.redirect(`${origin}/login`); +} +``` + +--- + +### Day 7-8: Dashboard Implementation + +#### 7.1 Create Dashboard Layout + +**File:** `src/app/(dashboard)/layout.tsx` + +```typescript +import { createClient } from '@/lib/supabase/server'; +import { redirect } from 'next/navigation'; +import Link from 'next/link'; + +export default async function DashboardLayout({ + children, +}: { + children: React.ReactNode; +}) { + const supabase = await createClient(); + const { + data: { user }, + } = await supabase.auth.getUser(); + + if (!user) { + redirect('/login'); + } + + async function signOut() { + 'use server'; + const supabase = await createClient(); + await supabase.auth.signOut(); + redirect('/'); + } + + return ( +
+ {/* Sidebar */} + + + {/* Main content */} +
{children}
+
+ ); +} +``` + +#### 7.2 Dashboard Overview Page + +**File:** `src/app/(dashboard)/dashboard/page.tsx` + +```typescript +import { createClient } from '@/lib/supabase/server'; +import { Card } from '@/components/ui/card'; + +export default async function DashboardPage() { + const supabase = await createClient(); + const { + data: { user }, + } = await supabase.auth.getUser(); + + // Fetch stats + const { count: apiKeyCount } = await supabase + .from('api_keys') + .select('*', { count: 'exact', head: true }) + .eq('user_id', user!.id) + .eq('is_active', true); + + const { count: usageCount } = await supabase + .from('usage_logs') + .select('*', { count: 'exact', head: true }) + .eq('user_id', user!.id); + + const { data: n8nInstance } = await supabase + .from('n8n_instances') + .select('instance_url, is_active') + .eq('user_id', user!.id) + .single(); + + return ( +
+
+

Dashboard

+

+ Welcome to your n8n-mcp control panel +

+
+ +
+ +

API Keys

+

{apiKeyCount || 0}

+
+ + +

Requests Today

+

{usageCount || 0}

+
+ + +

n8n Status

+

+ {n8nInstance?.is_active ? 'โœ…' : 'โŒ'} +

+
+
+ + {!n8nInstance && ( + +

โš ๏ธ Action Required

+

+ You need to configure your n8n instance before using the service. +

+ + Configure n8n โ†’ + +
+ )} +
+ ); +} +``` + +#### 7.3 API Key Management Page + +**File:** `src/app/(dashboard)/api-keys/page.tsx` + +```typescript +import { createClient } from '@/lib/supabase/server'; +import { ApiKeyList } from '@/components/api-key-list'; +import { CreateApiKeyButton } from '@/components/create-api-key-button'; + +export default async function ApiKeysPage() { + const supabase = await createClient(); + const { + data: { user }, + } = await supabase.auth.getUser(); + + const { data: apiKeys } = await supabase + .from('api_keys') + .select('*') + .eq('user_id', user!.id) + .order('created_at', { ascending: false }); + + return ( +
+
+
+

API Keys

+

+ Manage your n8n-mcp API keys for MCP clients +

+
+ +
+ + +
+ ); +} +``` + +**File:** `src/components/create-api-key-button.tsx` + +```typescript +'use client'; + +import { useState } from 'react'; +import { useRouter } from 'next/navigation'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogTrigger, +} from '@/components/ui/dialog'; +import { generateApiKey } from '@/app/(dashboard)/api-keys/actions'; + +export function CreateApiKeyButton() { + const router = useRouter(); + const [open, setOpen] = useState(false); + const [name, setName] = useState(''); + const [loading, setLoading] = useState(false); + const [generatedKey, setGeneratedKey] = useState(null); + + async function handleCreate() { + setLoading(true); + try { + const result = await generateApiKey(name); + setGeneratedKey(result.key); + } catch (error) { + alert('Failed to generate API key'); + } finally { + setLoading(false); + } + } + + function handleClose() { + setOpen(false); + setName(''); + setGeneratedKey(null); + router.refresh(); + } + + return ( + + + + + + + Create API Key + + + {!generatedKey ? ( +
+
+ + setName(e.target.value)} + /> +

+ A friendly name to identify this key +

+
+ + +
+ ) : ( +
+
+

+ โš ๏ธ Save this key securely! +

+

+ You won't be able to see it again. +

+
+ +
+ +
+ {generatedKey} +
+
+ + + + +
+ )} +
+
+ ); +} +``` + +**File:** `src/app/(dashboard)/api-keys/actions.ts` + +```typescript +'use server'; + +import { createClient } from '@/lib/supabase/server'; +import crypto from 'crypto'; +import bcrypt from 'bcryptjs'; +import { revalidatePath } from 'next/cache'; + +export async function generateApiKey(name: string) { + const supabase = await createClient(); + const { + data: { user }, + } = await supabase.auth.getUser(); + + if (!user) throw new Error('Not authenticated'); + + // Generate secure random key + const key = crypto.randomBytes(32).toString('base64url'); + const fullKey = `nmcp_${key}`; + const hash = await bcrypt.hash(fullKey, 10); + const prefix = `nmcp_${key.substring(0, 8)}...`; + + // Store in database + const { data, error } = await supabase + .from('api_keys') + .insert({ + user_id: user.id, + key_hash: hash, + key_prefix: prefix, + name: name, + }) + .select() + .single(); + + if (error) throw error; + + revalidatePath('/dashboard/api-keys'); + + return { key: fullKey, id: data.id }; +} + +export async function revokeApiKey(id: string) { + const supabase = await createClient(); + const { + data: { user }, + } = await supabase.auth.getUser(); + + if (!user) throw new Error('Not authenticated'); + + const { error } = await supabase + .from('api_keys') + .update({ is_active: false }) + .eq('id', id) + .eq('user_id', user.id); + + if (error) throw error; + + revalidatePath('/dashboard/api-keys'); +} +``` + +**File:** `src/components/api-key-list.tsx` + +```typescript +'use client'; + +import { Card } from '@/components/ui/card'; +import { Button } from '@/components/ui/button'; +import { revokeApiKey } from '@/app/(dashboard)/api-keys/actions'; + +interface ApiKey { + id: string; + name: string; + key_prefix: string; + created_at: string; + last_used_at: string | null; + is_active: boolean; +} + +export function ApiKeyList({ apiKeys }: { apiKeys: ApiKey[] }) { + async function handleRevoke(id: string) { + if (confirm('Are you sure you want to revoke this API key?')) { + await revokeApiKey(id); + } + } + + if (apiKeys.length === 0) { + return ( + +

No API keys yet. Create your first one!

+
+ ); + } + + return ( +
+ {apiKeys.map((key) => ( + +
+
+

{key.name}

+

+ {key.key_prefix} +

+

+ Created: {new Date(key.created_at).toLocaleDateString()} + {key.last_used_at && ( + <> ยท Last used: {new Date(key.last_used_at).toLocaleString()} + )} +

+
+ +
+ {key.is_active ? ( + โ— Active + ) : ( + โ— Revoked + )} + {key.is_active && ( + + )} +
+
+
+ ))} +
+ ); +} +``` + +#### 7.4 n8n Configuration Page + +**File:** `src/app/(dashboard)/n8n-config/page.tsx` + +```typescript +import { createClient } from '@/lib/supabase/server'; +import { N8nConfigForm } from '@/components/n8n-config-form'; + +export default async function N8nConfigPage() { + const supabase = await createClient(); + const { + data: { user }, + } = await supabase.auth.getUser(); + + const { data: instance } = await supabase + .from('n8n_instances') + .select('instance_url, is_active') + .eq('user_id', user!.id) + .single(); + + return ( +
+
+

n8n Configuration

+

+ Connect your n8n instance to n8n-mcp +

+
+ + +
+ ); +} +``` + +**File:** `src/components/n8n-config-form.tsx` + +```typescript +'use client'; + +import { useState } from 'react'; +import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; +import { Card } from '@/components/ui/card'; +import { saveN8nConfig, testN8nConnection } from '@/app/(dashboard)/n8n-config/actions'; + +interface N8nConfigFormProps { + currentInstance: { + instance_url: string; + is_active: boolean; + } | null; +} + +export function N8nConfigForm({ currentInstance }: N8nConfigFormProps) { + const [instanceUrl, setInstanceUrl] = useState( + currentInstance?.instance_url || '' + ); + const [apiKey, setApiKey] = useState(''); + const [testing, setTesting] = useState(false); + const [saving, setSaving] = useState(false); + const [testResult, setTestResult] = useState<'success' | 'error' | null>(null); + const [error, setError] = useState(''); + + async function handleTest() { + setTesting(true); + setError(''); + setTestResult(null); + + try { + const result = await testN8nConnection(instanceUrl, apiKey); + setTestResult('success'); + } catch (err) { + setTestResult('error'); + setError(err instanceof Error ? err.message : 'Connection failed'); + } finally { + setTesting(false); + } + } + + async function handleSave() { + setSaving(true); + setError(''); + + try { + await saveN8nConfig(instanceUrl, apiKey); + alert('Configuration saved successfully!'); + } catch (err) { + setError(err instanceof Error ? err.message : 'Failed to save'); + } finally { + setSaving(false); + } + } + + return ( + +
+
+ + setInstanceUrl(e.target.value)} + /> +

+ The URL of your n8n instance +

+
+ +
+ + setApiKey(e.target.value)} + /> +

+ Find this in your n8n Settings โ†’ API +

+
+ + {testResult && ( +
+ {testResult === 'success' ? 'โœ… Connection successful!' : `โŒ ${error}`} +
+ )} + +
+ + + +
+ + {currentInstance && ( +
+

+ Current instance:{' '} + {currentInstance.instance_url} + + {currentInstance.is_active ? 'โœ… Active' : 'โŒ Inactive'} + +

+
+ )} +
+
+ ); +} +``` + +**File:** `src/app/(dashboard)/n8n-config/actions.ts` + +```typescript +'use server'; + +import { createClient } from '@/lib/supabase/server'; +import crypto from 'crypto'; + +// Simplified encryption (in production, use the backend's encryption) +function encrypt(text: string, userId: string): string { + // This is placeholder - in production, this should match backend encryption + // For MVP, we'll use a simple base64 encoding as Supabase will be our secure storage + return Buffer.from(text).toString('base64'); +} + +export async function testN8nConnection( + instanceUrl: string, + apiKey: string +): Promise { + try { + const response = await fetch(`${instanceUrl}/api/v1/workflows`, { + headers: { + 'X-N8N-API-KEY': apiKey, + }, + }); + + if (!response.ok) { + throw new Error('Invalid credentials or instance URL'); + } + + return true; + } catch (error) { + throw new Error('Failed to connect to n8n instance'); + } +} + +export async function saveN8nConfig( + instanceUrl: string, + apiKey: string +) { + const supabase = await createClient(); + const { + data: { user }, + } = await supabase.auth.getUser(); + + if (!user) throw new Error('Not authenticated'); + + // Test connection first + await testN8nConnection(instanceUrl, apiKey); + + // Encrypt API key (simplified for MVP) + const encryptedKey = encrypt(apiKey, user.id); + + // Upsert configuration + const { error } = await supabase.from('n8n_instances').upsert( + { + user_id: user.id, + instance_url: instanceUrl, + api_key_encrypted: encryptedKey, + is_active: true, + last_validated_at: new Date().toISOString(), + }, + { + onConflict: 'user_id,instance_url', + } + ); + + if (error) throw error; +} +``` + +--- + +### Day 9: Polish & Deployment + +#### 9.1 Add Usage Stats Page + +**File:** `src/app/(dashboard)/usage/page.tsx` + +```typescript +import { createClient } from '@/lib/supabase/server'; +import { Card } from '@/components/ui/card'; + +export default async function UsagePage() { + const supabase = await createClient(); + const { + data: { user }, + } = await supabase.auth.getUser(); + + // Get recent usage + const { data: recentLogs } = await supabase + .from('usage_logs') + .select('tool_name, status, created_at') + .eq('user_id', user!.id) + .order('created_at', { ascending: false }) + .limit(50); + + // Get stats + const { count: totalRequests } = await supabase + .from('usage_logs') + .select('*', { count: 'exact', head: true }) + .eq('user_id', user!.id); + + const { count: todayRequests } = await supabase + .from('usage_logs') + .select('*', { count: 'exact', head: true }) + .eq('user_id', user!.id) + .gte('created_at', new Date().toISOString().split('T')[0]); + + return ( +
+
+

Usage Statistics

+

+ Track your n8n-mcp API usage +

+
+ +
+ +

Total Requests

+

{totalRequests || 0}

+
+ + +

Today's Requests

+

{todayRequests || 0}

+
+
+ + +

Recent Activity

+
+ {recentLogs && recentLogs.length > 0 ? ( + recentLogs.map((log, i) => ( +
+ {log.tool_name} +
+ + {log.status} + + + {new Date(log.created_at).toLocaleTimeString()} + +
+
+ )) + ) : ( +

No activity yet

+ )} +
+
+
+ ); +} +``` + +#### 9.2 Deploy Frontend to Vercel + +```bash +# In n8n-mcp-landing directory +cd ../n8n-mcp-landing + +# Install Vercel CLI +npm install -g vercel + +# Login to Vercel +vercel login + +# Deploy +vercel --prod + +# Set environment variables in Vercel dashboard +# NEXT_PUBLIC_SUPABASE_URL +# NEXT_PUBLIC_SUPABASE_ANON_KEY +``` + +#### 9.3 Final Backend Build & Test + +```bash +# In n8n-mcp directory +cd ../n8n-mcp + +# Run all tests +npm test + +# Type check +npm run typecheck + +# Build +npm run build + +# Test Docker build +docker build -t n8n-mcp:test . + +# Test locally with docker-compose +docker-compose -f docker-compose.prod.yml up -d + +# Verify +curl http://localhost:3000/health +``` + +--- + +## Phase 3: Testing & Launch + +**Goal:** Test thoroughly and launch to 471 waitlist users +**Time:** 3 days +**Assignee:** All team members + +### Day 10: Multi-User & Platform Testing + +#### 10.1 Multi-User Testing + +**Create 2+ test accounts:** + +```bash +# Test User 1 +Email: test1@example.com +n8n Instance: https://test-n8n-1.com +API Key: generated via dashboard + +# Test User 2 +Email: test2@example.com +n8n Instance: https://test-n8n-2.com +API Key: generated via dashboard +``` + +**Test isolation:** + +1. User 1 creates API key +2. User 2 creates API key +3. Verify User 1 cannot see User 2's keys +4. Make MCP requests with both keys +5. Verify usage logs are isolated +6. Try User 1's key with User 2's data โ†’ should fail + +**Checklist:** +- [ ] Users can only see their own API keys +- [ ] Users can only see their own n8n config +- [ ] Users can only see their own usage logs +- [ ] Cross-user API keys don't work +- [ ] Rate limiting works per user + +#### 10.2 Platform Testing + +**Test all MCP clients:** + +**Claude Desktop:** +```json +// ~/Library/Application Support/Claude/claude_desktop_config.json (Mac) +// %APPDATA%\Claude\claude_desktop_config.json (Windows) +{ + "mcpServers": { + "n8n-mcp": { + "url": "https://api.n8n-mcp.com/mcp", + "authentication": { + "type": "bearer", + "token": "nmcp_your_key_here" + } + } + } +} +``` + +Test commands: +- "List n8n nodes" +- "Search for Slack nodes" +- "Get node info for HTTP Request" +- "Create a workflow with Webhook trigger" + +**Cursor:** +```json +// ~/.cursor/mcp.json +{ + "servers": { + "n8n-mcp": { + "url": "https://api.n8n-mcp.com/mcp", + "headers": { + "Authorization": "Bearer nmcp_your_key_here" + } + } + } +} +``` + +**Windsurf:** +```json +// Settings > MCP Servers +{ + "serverUrl": "https://api.n8n-mcp.com/mcp", + "authToken": "nmcp_your_key_here" +} +``` + +**Checklist:** +- [ ] Claude Desktop connects successfully +- [ ] Cursor connects successfully +- [ ] Windsurf connects successfully +- [ ] All MCP tools work in each client +- [ ] Rate limiting headers appear +- [ ] Errors are descriptive + +#### 10.3 Load Testing + +**Install siege:** +```bash +brew install siege # Mac +sudo apt install siege # Linux +``` + +**Create test script:** `scripts/load-test.sh` + +```bash +#!/bin/bash + +API_URL="https://api.n8n-mcp.com/mcp" +API_KEY="nmcp_test_key" + +# Create URLs file +cat > /tmp/urls.txt << EOF +$API_URL POST Content-Type: application/json +Authorization: Bearer $API_KEY +{"jsonrpc":"2.0","id":1,"method":"tools/list"} +EOF + +# Run load test: 100 concurrent users, 1 minute +siege -c 100 -t 1M -f /tmp/urls.txt + +# Expected results: +# - Availability: 100% +# - Response time: <500ms average +# - Some 429 rate limit responses (expected) +``` + +**Checklist:** +- [ ] Server handles 100 concurrent users +- [ ] Average response time <500ms +- [ ] No crashes or errors +- [ ] Rate limiting kicks in appropriately +- [ ] CPU usage <80% +- [ ] Memory usage <4GB + +--- + +### Day 11: Documentation & Email Campaign + +#### 11.1 Create User Documentation + +**File:** `docs/user-guide.md` + +```markdown +# n8n-mcp User Guide + +## Getting Started + +### 1. Sign Up + +Visit https://www.n8n-mcp.com and click "Sign Up". +Enter your email and create a password. +Verify your email address. + +### 2. Configure Your n8n Instance + +1. Go to Dashboard โ†’ n8n Configuration +2. Enter your n8n instance URL (e.g., https://your-n8n.com) +3. Enter your n8n API key (find in n8n Settings โ†’ API) +4. Click "Test Connection" +5. Click "Save Configuration" + +### 3. Create an API Key + +1. Go to Dashboard โ†’ API Keys +2. Click "Create API Key" +3. Enter a name (e.g., "Claude Desktop") +4. Copy the generated key (you won't see it again!) + +### 4. Configure Your MCP Client + +#### Claude Desktop + +File location: +- Mac: `~/Library/Application Support/Claude/claude_desktop_config.json` +- Windows: `%APPDATA%\Claude\claude_desktop_config.json` + +Add this configuration: + +\`\`\`json +{ + "mcpServers": { + "n8n-mcp": { + "url": "https://api.n8n-mcp.com/mcp", + "authentication": { + "type": "bearer", + "token": "nmcp_your_key_here" + } + } + } +} +\`\`\` + +Restart Claude Desktop. + +#### Cursor + +File: `~/.cursor/mcp.json` + +\`\`\`json +{ + "servers": { + "n8n-mcp": { + "url": "https://api.n8n-mcp.com/mcp", + "headers": { + "Authorization": "Bearer nmcp_your_key_here" + } + } + } +} +\`\`\` + +Restart Cursor. + +## Usage + +Try these commands: +- "List all n8n nodes" +- "Search for Slack nodes" +- "How do I use the HTTP Request node?" +- "Create a workflow that triggers on webhook" + +## Troubleshooting + +### "Unauthorized" Error +- Check your API key is correct +- Verify the key is active in your dashboard +- Ensure n8n instance is configured + +### "Rate Limit Exceeded" +- Free tier: 100 requests/minute +- Wait 1 minute and try again +- Contact us for higher limits + +### Connection Timeout +- Verify n8n instance is accessible +- Check your n8n API key is valid +- Test connection in dashboard + +## Support + +- Email: support@n8n-mcp.com +- Discord: [Join our community] +- GitHub: https://github.com/czlonkowski/n8n-mcp +``` + +#### 11.2 Create Email Templates + +**Waitlist Invitation Email:** + +```html +Subject: ๐ŸŽ‰ You're invited to n8n-mcp hosted service! + +Hi {{name}}, + +You're one of 471 users from our waitlist with early access to the hosted n8n-mcp service! + +What is n8n-mcp? +Connect your n8n workflows to Claude, Cursor, Windsurf, and any MCP-compatible AI assistant. + +Getting Started: +1. Sign up: https://www.n8n-mcp.com/signup?ref=waitlist +2. Configure your n8n instance +3. Generate an API key +4. Add to your MCP client +5. Start building AI-powered workflows! + +Free for Waitlist Users: +โœ… 100 requests/minute +โœ… All MCP tools +โœ… Community support +โœ… No credit card required + +Need help? Reply to this email or join our Discord. + +Happy automating! +The n8n-mcp Team + +--- +Didn't sign up for the waitlist? Ignore this email. +``` + +#### 11.3 Prepare Launch Checklist + +**File:** `docs/launch-checklist.md` + +```markdown +# Launch Checklist + +## Pre-Launch (Complete before sending emails) + +### Infrastructure +- [ ] Production server running +- [ ] SSL certificates working +- [ ] DNS configured correctly +- [ ] Health endpoint responding +- [ ] Monitoring enabled + +### Database +- [ ] Schema deployed +- [ ] RLS policies active +- [ ] Backups enabled +- [ ] Test data removed + +### Backend +- [ ] Multi-tenant mode enabled +- [ ] API key validation working +- [ ] Rate limiting functional +- [ ] Encryption working +- [ ] All tests passing + +### Frontend +- [ ] Deployed to production +- [ ] Auth flow working +- [ ] API key generation works +- [ ] n8n config saves correctly +- [ ] Usage stats displaying + +### Testing +- [ ] Multi-user isolation verified +- [ ] All MCP clients tested +- [ ] Load test passed +- [ ] Security audit done + +### Documentation +- [ ] User guide published +- [ ] Platform setup guides ready +- [ ] Troubleshooting docs complete +- [ ] Email templates ready + +## Launch Day + +### Morning +- [ ] Final smoke test +- [ ] Backup database +- [ ] Monitor logs +- [ ] Support email ready + +### Soft Launch (First 50 users) +- [ ] Send email to first 50 +- [ ] Monitor signups +- [ ] Watch for errors +- [ ] Respond to questions + +### Full Launch (Next 421 users) +- [ ] Verify soft launch successful +- [ ] Send remaining emails +- [ ] Monitor onboarding funnel +- [ ] Track activation rate + +## Post-Launch + +### First 24 Hours +- [ ] Monitor error rates +- [ ] Check server resources +- [ ] Respond to support emails +- [ ] Fix critical bugs + +### First Week +- [ ] Analyze usage patterns +- [ ] Collect user feedback +- [ ] Identify pain points +- [ ] Plan improvements +``` + +--- + +### Day 12: Launch! + +#### 12.1 Pre-Launch Verification + +```bash +# Run final checks +./scripts/pre-launch-check.sh +``` + +**File:** `scripts/pre-launch-check.sh` + +```bash +#!/bin/bash + +echo "๐Ÿ” Running pre-launch checks..." + +# Check health endpoint +echo "1. Health check..." +STATUS=$(curl -s -o /dev/null -w "%{http_code}" https://api.n8n-mcp.com/health) +if [ "$STATUS" == "200" ]; then + echo "โœ… Health check passed" +else + echo "โŒ Health check failed: $STATUS" + exit 1 +fi + +# Check SSL +echo "2. SSL certificate..." +openssl s_client -connect api.n8n-mcp.com:443 -servername api.n8n-mcp.com /dev/null | grep "Verify return code: 0" +if [ $? -eq 0 ]; then + echo "โœ… SSL valid" +else + echo "โŒ SSL invalid" + exit 1 +fi + +# Check frontend +echo "3. Frontend check..." +STATUS=$(curl -s -o /dev/null -w "%{http_code}" https://www.n8n-mcp.com) +if [ "$STATUS" == "200" ]; then + echo "โœ… Frontend accessible" +else + echo "โŒ Frontend failed: $STATUS" + exit 1 +fi + +# Check database connection +echo "4. Database check..." +# (Add Supabase connectivity test) + +echo "" +echo "โœ… All pre-launch checks passed!" +echo "Ready to launch! ๐Ÿš€" +``` + +#### 12.2 Launch Procedure + +**9:00 AM - Soft Launch (50 users)** + +```bash +# Send to first 50 waitlist users +# Monitor: https://dashboard.n8n-mcp.com/analytics + +# Watch logs +docker compose -f docker-compose.prod.yml logs -f n8n-mcp + +# Monitor server +htop +``` + +**11:00 AM - Check Results** + +Metrics to check: +- Signup rate: Target 70% (35/50) +- Activation rate: Target 60% (21/50) +- Error rate: Target <5% +- Support emails: Respond within 1 hour + +**2:00 PM - Full Launch (421 users)** + +If soft launch successful: +```bash +# Send to remaining waitlist +# Continue monitoring +``` + +#### 12.3 Monitoring During Launch + +**Real-time monitoring:** + +```bash +# Server resources +watch -n 5 'top -b -n 1 | head -20' + +# Request rate +watch -n 5 'docker compose logs n8n-mcp | grep "POST /mcp" | tail -20' + +# Error rate +watch -n 5 'docker compose logs n8n-mcp | grep "ERROR" | tail -10' + +# Database connections +# Check Supabase dashboard +``` + +**Key metrics:** +- Server CPU: Should stay <60% +- Memory: Should stay <4GB +- Response time: Should be <500ms +- Error rate: Should be <2% + +--- + +## Troubleshooting + +### Common Issues + +#### Issue 1: "Unauthorized" Errors + +**Symptoms:** +- Users getting 401 errors +- API key validation failing + +**Debug:** +```bash +# Check API key in database +# Via Supabase SQL Editor: +SELECT * FROM api_keys WHERE key_prefix LIKE 'nmcp_%'; + +# Check if user has n8n instance configured +SELECT * FROM n8n_instances WHERE user_id = 'xxx'; + +# Check backend logs +docker compose logs n8n-mcp | grep "validateApiKey" +``` + +**Solutions:** +- Verify API key was copied correctly +- Check n8n instance is configured +- Verify encryption key is set +- Test API key generation flow + +#### Issue 2: Rate Limiting Too Aggressive + +**Symptoms:** +- Users hitting rate limits quickly +- 429 errors frequent + +**Debug:** +```bash +# Check rate limit settings +docker compose exec n8n-mcp env | grep RATE_LIMIT + +# Check logs +docker compose logs n8n-mcp | grep "Rate limit exceeded" +``` + +**Solutions:** +```typescript +// Adjust in src/services/rate-limiter.ts +const rateLimiter = new RateLimiter(200, 60000); // Increase to 200/min + +// Or set via environment +RATE_LIMIT_REQUESTS=200 +``` + +#### Issue 3: n8n Connection Failures + +**Symptoms:** +- "Failed to decrypt credentials" +- "n8n instance not accessible" + +**Debug:** +```bash +# Test n8n connectivity +curl -H "X-N8N-API-KEY: xxx" https://user-n8n.com/api/v1/workflows + +# Check encryption +# Verify MASTER_ENCRYPTION_KEY is set correctly +``` + +**Solutions:** +- Verify n8n instance is publicly accessible +- Check n8n API key is valid +- Test encryption/decryption manually +- Verify firewall rules + +#### Issue 4: High Memory Usage + +**Symptoms:** +- Server running out of memory +- Docker containers being killed + +**Debug:** +```bash +# Check memory usage +docker stats + +# Check session count +# Add logging to SessionManager +``` + +**Solutions:** +```typescript +// Reduce session TTL +const sessionManager = new SessionManager({ + maxSessions: 500, // Reduce from 1000 + ttl: 1800000, // 30 minutes instead of 1 hour +}); + +// Or add to server +# Upgrade to CPX41 (8 vCPU, 16GB) - โ‚ฌ26/mo +``` + +#### Issue 5: Database Connection Errors + +**Symptoms:** +- "Could not connect to Supabase" +- Queries timing out + +**Debug:** +```bash +# Check Supabase dashboard +# Connection pooling status + +# Check environment variables +docker compose exec n8n-mcp env | grep SUPABASE +``` + +**Solutions:** +- Verify SUPABASE_SERVICE_KEY is correct +- Check Supabase project is not paused +- Upgrade to Supabase Pro if hitting limits +- Add connection retry logic + +### Debug Commands + +**Check container status:** +```bash +docker compose ps +docker compose logs -f n8n-mcp +docker compose logs -f caddy +``` + +**Test API endpoint:** +```bash +# Health check +curl https://api.n8n-mcp.com/health + +# Test with API key +curl -X POST https://api.n8n-mcp.com/mcp \ + -H "Authorization: Bearer nmcp_xxx" \ + -H "Content-Type: application/json" \ + -d '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' +``` + +**Check database:** +```sql +-- Via Supabase SQL Editor + +-- Count users +SELECT COUNT(*) FROM users; + +-- Count active API keys +SELECT COUNT(*) FROM api_keys WHERE is_active = true; + +-- Check recent usage +SELECT user_id, tool_name, status, created_at +FROM usage_logs +ORDER BY created_at DESC +LIMIT 20; + +-- Find rate limited requests +SELECT user_id, COUNT(*) as rate_limited_count +FROM usage_logs +WHERE status = 'rate_limited' +AND created_at > NOW() - INTERVAL '1 hour' +GROUP BY user_id +ORDER BY rate_limited_count DESC; +``` + +--- + +## Rollback Procedures + +### Scenario 1: Critical Backend Bug + +**If you need to rollback backend:** + +```bash +# SSH to server +ssh root@your-server + +# Stop containers +cd /opt/n8n-mcp +docker compose -f docker-compose.prod.yml down + +# Revert to previous image +docker pull ghcr.io/czlonkowski/n8n-mcp:previous + +# Update docker-compose to use previous image +# Or checkout previous git commit +git log # Find previous working commit +git checkout + +# Redeploy +docker compose -f docker-compose.prod.yml up -d + +# Verify +curl https://api.n8n-mcp.com/health +``` + +**Notify users:** +``` +Subject: Brief Service Interruption + +We experienced a technical issue and had to rollback to a previous version. +Service is now restored. We apologize for any inconvenience. +``` + +### Scenario 2: Database Schema Issue + +**If schema migration causes issues:** + +```sql +-- Via Supabase SQL Editor + +-- Rollback last migration +BEGIN; + +-- Drop new columns/tables (if added) +DROP TABLE IF EXISTS new_table; +ALTER TABLE existing_table DROP COLUMN IF EXISTS new_column; + +-- Restore data from backup (if needed) +-- Contact Supabase support for restore + +COMMIT; +``` + +### Scenario 3: Frontend Issue + +**If frontend has bugs:** + +```bash +# Rollback Vercel deployment +vercel rollback + +# Or deploy previous version +git checkout +vercel --prod +``` + +### Scenario 4: Complete Outage + +**If entire service is down:** + +1. **Immediate Actions:** + - Post status update (Twitter, Discord) + - Email all active users + - Disable signup temporarily + +2. **Investigation:** +```bash +# Check all services +docker compose ps +docker compose logs --tail=100 + +# Check server resources +htop +df -h + +# Check Supabase status +# Visit Supabase dashboard +``` + +3. **Recovery:** +```bash +# Restart all services +docker compose -f docker-compose.prod.yml restart + +# If that doesn't work, full redeploy +docker compose down +docker compose pull +docker compose up -d +``` + +4. **Post-mortem:** + - Document what happened + - Identify root cause + - Implement fixes + - Update runbook + +--- + +## Success Metrics + +### Week 1 Targets + +| Metric | Target | How to Measure | +|--------|--------|----------------| +| Signups | 300/471 (64%) | Supabase users table | +| Activation | 70% | Users with API key + n8n config | +| First MCP Call | 60% | Users with usage_logs entry | +| Error Rate | <2% | usage_logs WHERE status='error' | +| Support Response | <2 hours | Email metrics | + +### Week 4 Targets + +| Metric | Target | How to Measure | +|--------|--------|----------------| +| Day 7 Retention | 40% | Active users 7 days after signup | +| Day 30 Retention | 25% | Active users after 30 days | +| Avg Requests/User/Day | >5 | usage_logs COUNT / users | +| Platform Distribution | Track | % Claude vs Cursor vs Windsurf | +| User Satisfaction | >4/5 | Survey after 7 days | + +--- + +## Next Steps After MVP + +### Post-MVP Release 1: Analytics (Weeks 5-6) + +- Detailed usage dashboard +- Tool usage breakdown +- Performance metrics +- Error tracking (Sentry) + +### Post-MVP Release 2: Paid Tiers (Weeks 7-10) + +- Stripe integration +- Plan management +- Billing dashboard +- Upgrade/downgrade flows + +### Post-MVP Release 3: Advanced Features (Weeks 11-12) + +- Team collaboration +- Shared workflows +- API key rotation +- Custom alerts + +--- + +**End of Implementation Guide** + +This guide provides complete step-by-step instructions for implementing the n8n-mcp MVP in 2.5 weeks. Follow each phase carefully, test thoroughly, and launch with confidence! + +For questions or issues during implementation: +- Check troubleshooting section +- Review existing code in n8n-mcp repo +- Consult MVP_DEPLOYMENT_PLAN_SIMPLIFIED.md + +Good luck with your launch! ๐Ÿš€ \ No newline at end of file diff --git a/MVP_DEPLOYMENT_PLAN.md b/MVP_DEPLOYMENT_PLAN.md new file mode 100644 index 0000000..9925873 --- /dev/null +++ b/MVP_DEPLOYMENT_PLAN.md @@ -0,0 +1,1464 @@ +# n8n-mcp Hosted Service: MVP Deployment Plan + +**Project:** Multi-Tenant n8n-MCP Service on Hetzner +**Domain:** www.n8n-mcp.com (already owned) +**Goal:** Launch MVP to 471 waitlist users (free tier) +**Timeline:** 3-4 weeks to MVP launch +**Date:** 2025-10-11 +**Version:** 3.0 - MVP Focus + +--- + +## Executive Summary + +### MVP Scope (Waitlist Launch - No Payments) + +**What We're Building:** +- Multi-tenant n8n-mcp service hosted on Hetzner +- User authentication and dashboard (Supabase) +- API key management +- Per-user n8n instance configuration +- Support for Claude Desktop, Cursor, Windsurf, and all MCP clients +- Free tier for all 471 waitlist users + +**What We're NOT Building (Post-MVP):** +- Stripe/payment integration (will add after learnings) +- Usage tracking analytics (basic only for MVP) +- Advanced rate limiting per plan (simple rate limit for MVP) +- Customer support portal (email support only for MVP) + +### Critical Discovery: 70% Already Built! + +**n8n-mcp analysis** revealed the codebase already has: +- โœ… `InstanceContext` pattern for per-user isolation +- โœ… LRU cache with TTL for API clients +- โœ… All 16 MCP tools context-aware +- โœ… HTTP header extraction for multi-tenant +- โœ… Session management with cleanup + +**Implementation reduced from 15-20 days to 5-7 days!** + +### Infrastructure Sizing (Telemetry-Based) + +**Current Usage (600 DAU distributed):** +- Peak RPS: 116 max, 44 p99, 21 p95, 6.6 avg +- Concurrent users: 8 max, 4 p95, 2 avg +- Peak hours: 13:00-16:00 UTC + +**MVP Launch Config (471 waitlist users):** +- 1x CPX31 (4 vCPU, 8GB RAM, 160GB) - โ‚ฌ14.00/mo +- PostgreSQL Basic (2 vCPU, 4GB, 80GB) - โ‚ฌ33.00/mo +- Load Balancer LB11 - โ‚ฌ5.49/mo +- Object Storage 100GB - โ‚ฌ2.00/mo +- **Total: โ‚ฌ54.49/month (~โ‚ฌ0.12/user)** + +**Scale trigger:** Add 2nd app server when DAU > 800 or RPS > 30 sustained + +### Timeline + +| Week | Phase | Deliverable | +|------|-------|-------------| +| **Week 1** | Infrastructure + Multi-tenant backend | Working MCP service with API key auth | +| **Week 2** | Dashboard (Next.js 15 + Supabase) | User can sign up, create keys, configure n8n | +| **Week 3** | Integration + Testing | All platforms tested, waitlist invited | +| **Week 4** | Launch + Monitoring | MVP live, gathering feedback | + +**Launch Date:** End of Week 4 (November 8, 2025 target) + +--- + +## Repository Structure + +### Separate Repositories (User Decision) + +``` +1. n8n-mcp (backend service) + โ”œโ”€โ”€ Multi-tenant API key authentication + โ”œโ”€โ”€ MCP server (HTTP Streamable only) + โ”œโ”€โ”€ Docker Compose deployment + โ””โ”€โ”€ Located: /Users/romualdczlonkowski/Pliki/n8n-mcp/n8n-mcp + +2. n8n-mcp-landing (frontend web app) + โ”œโ”€โ”€ Next.js 15 + Supabase + shadcn/ui + โ”œโ”€โ”€ User dashboard and authentication + โ”œโ”€โ”€ API key management UI + โ””โ”€โ”€ Located: /Users/romualdczlonkowski/Pliki/n8n-mcp-landing + โ””โ”€โ”€ Already using Next.js 15.3.4 โœ… +``` + +**Rationale:** Separate repos allow independent deployments and users configure MCP clients via URLs anyway. + +--- + +## Release Plan + +### MVP (Week 1-4): Waitlist Launch + +**Goal:** Get 471 waitlist users using hosted n8n-mcp service (free) + +#### Backend (n8n-mcp service) + +**What's Needed:** +1. **API Key Authentication** (2-3 days) + - PostgreSQL connection for user data + - API key validation middleware + - Load per-user n8n credentials + - **Discovery:** 70% already implemented via `InstanceContext` + +2. **HTTP Streamable Only** (1 day) + - Remove SSE transport code + - Simplify to StreamableHTTP only + - Update health checks + +3. **Docker Compose Stack** (2 days) + - Production docker-compose.yml (3 containers) + - Nginx load balancer + - Redis for sessions + - Prometheus + Grafana monitoring + - Zero-downtime deployment script + +4. **Database Schema** (1 day) + - Supabase PostgreSQL schema + - Tables: users, api_keys, n8n_instances, usage_logs + - RLS policies + - Indexes for performance + +**Total Backend:** 6-7 days + +#### Frontend (n8n-mcp-landing) + +**What's Needed:** +1. **Supabase Authentication** (2 days) + - Email/password signup (no OAuth for MVP) + - Email verification flow + - Protected routes middleware + - **Already Next.js 15 โœ…** + +2. **Dashboard Pages** (3-4 days) + - Landing page update (redirect users to hosted service) + - Dashboard overview + - API key management (create, view, revoke) + - n8n instance configuration form + - Account settings + - **Use existing shadcn/ui components โœ…** + +3. **Integration with Backend** (1 day) + - Supabase client setup + - RLS policies + - Type generation from database + +**Total Frontend:** 6-7 days + +#### Infrastructure & DevOps + +**What's Needed:** +1. **Hetzner Setup** (1 day) + - Provision CPX31 + PostgreSQL + LB + - DNS configuration (www + api subdomains) + - SSL certificates (Let's Encrypt) + +2. **CI/CD Pipeline** (1 day) + - GitHub Actions for backend + - Docker build + push to GHCR + - Automated deployment via SSH + - Rollback procedure + +**Total DevOps:** 2 days + +#### Testing & Launch + +**What's Needed:** +1. **Testing** (3 days) + - Unit tests (authentication, multi-tenant isolation) + - Integration tests (full user flow) + - Platform testing (Claude, Cursor, Windsurf) + - Load testing (simulate 471 users) + +2. **Documentation** (2 days) + - User onboarding guide + - Platform-specific setup guides + - Troubleshooting docs + - Admin playbook + +3. **Waitlist Invitation** (1 day) + - Email campaign to 471 users + - Onboarding support + - Feedback collection + +**Total Testing:** 6 days + +### Post-MVP Release 1 (Week 5-6): Usage Analytics + +**Goal:** Understand how users are using the service + +**Features:** +- Usage tracking dashboard (requests per hour/day) +- Tool usage analytics (which MCP tools most popular) +- User engagement metrics (DAU, WAU, retention) +- Error tracking (Sentry integration) + +**Estimate:** 1-2 weeks + +### Post-MVP Release 2 (Week 7-10): Paid Tiers + +**Goal:** Start generating revenue from power users + +**Features:** +- Stripe integration (Pro + Enterprise tiers) +- Plan limits enforcement (rate limiting per plan) +- Upgrade/downgrade flows +- Billing dashboard +- Customer portal + +**Estimate:** 3-4 weeks + +### Post-MVP Release 3 (Week 11-12): Advanced Features + +**Goal:** Differentiate from self-hosted + +**Features:** +- Shared workflow templates (community) +- Team collaboration (multiple users per account) +- API key rotation automation +- Advanced monitoring (custom alerts) +- Priority support ticketing + +**Estimate:** 2 weeks + +--- + +## MVP Technical Architecture + +### 1. Backend Architecture (n8n-mcp) + +#### Multi-Tenant Flow + +``` +User Request with Bearer Token + โ†“ +[Nginx Load Balancer] + โ†“ +[API Key Validation Middleware] + โ”œโ”€> Query PostgreSQL for api_key + โ”œโ”€> Load user's n8n credentials + โ””โ”€> Create InstanceContext + โ†“ +[MCP Tool Handler] (existing code!) + โ”œโ”€> getN8nApiClient(context) + โ””โ”€> Uses LRU cache (80%+ hit rate) + โ†“ +[User's n8n Instance] + โ†“ +[Response to User] +``` + +#### Docker Compose Stack + +```yaml +services: + nginx: + - Load balancing (least_conn) + - Rate limiting (global) + - Health checks + - WebSocket support + + mcp-app-1, mcp-app-2, mcp-app-3: + - n8n-mcp containers (HTTP Streamable only) + - Health checks every 30s + - Graceful shutdown (SIGTERM) + - Resource limits (2GB RAM, 1 CPU each) + + redis: + - Session storage + - Rate limit tracking + - Persistence (AOF) + + prometheus: + - Metrics collection + - 30-day retention + + grafana: + - Dashboards + - Alerting +``` + +#### Files to Modify + +**1. src/http-server-single-session.ts** (200 lines modified) +```typescript +// ADD: API key validation +async function validateApiKey(apiKey: string): Promise { + const { data, error } = await supabase + .from('api_keys') + .select('user_id, n8n_instances(instance_url, api_key_encrypted)') + .eq('key_hash', await bcrypt.hash(apiKey, 10)) + .eq('is_active', true) + .single(); + + if (error || !data) throw new UnauthorizedError(); + + // Decrypt n8n API key + const n8nApiKey = decrypt(data.n8n_instances.api_key_encrypted, data.user_id); + + return { + user_id: data.user_id, + n8n_url: data.n8n_instances.instance_url, + n8n_api_key: n8nApiKey + }; +} + +// MODIFY: Request handler +async handleRequest(req: Request): Promise { + const apiKey = req.headers.get('Authorization')?.replace('Bearer ', ''); + const userContext = await validateApiKey(apiKey); + + // Create InstanceContext (existing pattern!) + const context: InstanceContext = { + n8nApiUrl: userContext.n8n_url, + n8nApiKey: userContext.n8n_api_key + }; + + // Existing code handles the rest! + return this.mcpServer.handleRequest(req, context); +} +``` + +**2. src/services/api-key-validator.ts** (NEW - 400 lines) +- PostgreSQL connection pooling +- bcrypt validation +- n8n credential decryption (AES-256-GCM) +- Rate limit checking +- Audit logging + +**3. Remove SSE Transport** (1 day) +- Delete `src/http-server-single-session.ts` lines handling SSE +- Keep only StreamableHTTPServerTransport +- Update tests + +**4. Database Connection** +```typescript +// NEW: src/services/database.ts +import { createClient } from '@supabase/supabase-js'; + +export const supabase = createClient( + process.env.SUPABASE_URL!, + process.env.SUPABASE_SERVICE_KEY!, // Service role bypasses RLS + { + auth: { persistSession: false }, + db: { schema: 'public' } + } +); +``` + +#### Environment Variables + +```bash +# New for MVP +DATABASE_URL=postgresql://... +SUPABASE_URL=https://xxx.supabase.co +SUPABASE_SERVICE_KEY=eyJxxx... # Service role key +AUTH_MODE=api_key # New mode +ENABLE_MULTI_TENANT=true +MASTER_ENCRYPTION_KEY=xxx # For n8n credentials + +# Existing +NODE_ENV=production +MCP_MODE=http +PORT=3000 +NODES_DB_PATH=/app/data/nodes.db +``` + +### 2. Frontend Architecture (n8n-mcp-landing) + +#### Supabase Schema + +```sql +-- Users table (extends auth.users) +CREATE TABLE public.users ( + id UUID PRIMARY KEY REFERENCES auth.users(id), + email TEXT NOT NULL UNIQUE, + full_name TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +-- API Keys table +CREATE TABLE public.api_keys ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES public.users(id) ON DELETE CASCADE, + key_hash TEXT NOT NULL UNIQUE, + key_prefix TEXT NOT NULL, -- For display: "nmcp_abc123..." + name TEXT NOT NULL, + last_used_at TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT NOW(), + is_active BOOLEAN DEFAULT TRUE +); + +-- n8n Instance Configuration +CREATE TABLE public.n8n_instances ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + user_id UUID NOT NULL REFERENCES public.users(id) ON DELETE CASCADE, + instance_url TEXT NOT NULL, + api_key_encrypted TEXT NOT NULL, -- Encrypted with per-user key + is_active BOOLEAN DEFAULT TRUE, + last_validated_at TIMESTAMPTZ, + created_at TIMESTAMPTZ DEFAULT NOW(), + CONSTRAINT unique_user_instance UNIQUE(user_id, instance_url) +); + +-- Usage tracking (basic for MVP) +CREATE TABLE public.usage_logs ( + id BIGSERIAL PRIMARY KEY, + user_id UUID NOT NULL REFERENCES public.users(id), + api_key_id UUID REFERENCES public.api_keys(id), + tool_name TEXT NOT NULL, + status TEXT NOT NULL, -- 'success' | 'error' | 'rate_limited' + created_at TIMESTAMPTZ DEFAULT NOW() +); + +-- RLS Policies +ALTER TABLE public.users ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.api_keys ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.n8n_instances ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.usage_logs ENABLE ROW LEVEL SECURITY; + +CREATE POLICY "Users can view own data" ON public.users + FOR SELECT USING (auth.uid() = id); + +CREATE POLICY "Users can manage own API keys" ON public.api_keys + FOR ALL USING (auth.uid() = user_id); + +CREATE POLICY "Users can manage own n8n config" ON public.n8n_instances + FOR ALL USING (auth.uid() = user_id); + +CREATE POLICY "Users can view own usage" ON public.usage_logs + FOR SELECT USING (auth.uid() = user_id); +``` + +#### Next.js 15 App Structure + +``` +src/app/ +โ”œโ”€โ”€ (auth)/ +โ”‚ โ”œโ”€โ”€ login/page.tsx +โ”‚ โ”œโ”€โ”€ signup/page.tsx +โ”‚ โ””โ”€โ”€ verify-email/page.tsx +โ”œโ”€โ”€ (dashboard)/ +โ”‚ โ”œโ”€โ”€ dashboard/page.tsx # Overview +โ”‚ โ”œโ”€โ”€ api-keys/page.tsx # Create, view, revoke keys +โ”‚ โ”œโ”€โ”€ n8n-config/page.tsx # Configure n8n instance +โ”‚ โ””โ”€โ”€ settings/page.tsx # Account settings +โ”œโ”€โ”€ api/ +โ”‚ โ”œโ”€โ”€ auth/callback/route.ts # Supabase auth callback +โ”‚ โ””โ”€โ”€ webhooks/ +โ”‚ โ””โ”€โ”€ (future stripe webhook) +โ”œโ”€โ”€ layout.tsx +โ”œโ”€โ”€ page.tsx # Landing page (updated) +โ””โ”€โ”€ middleware.ts # Auth protection + +src/components/ +โ”œโ”€โ”€ api-key-card.tsx +โ”œโ”€โ”€ n8n-config-form.tsx +โ”œโ”€โ”€ usage-chart.tsx (basic for MVP) +โ””โ”€โ”€ ui/ (existing shadcn/ui) + +src/lib/ +โ”œโ”€โ”€ supabase/ +โ”‚ โ”œโ”€โ”€ client.ts # Browser client +โ”‚ โ”œโ”€โ”€ server.ts # Server client +โ”‚ โ””โ”€โ”€ middleware.ts # Auth middleware +โ””โ”€โ”€ utils.ts (existing) +``` + +#### Key Components + +**1. Authentication Setup** + +```typescript +// src/lib/supabase/middleware.ts +import { createServerClient } from '@supabase/ssr'; +import { NextResponse } from 'next/server'; + +export async function updateSession(request: NextRequest) { + let response = NextResponse.next(); + + const supabase = createServerClient( + process.env.NEXT_PUBLIC_SUPABASE_URL!, + process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!, + { + cookies: { + get: (name) => request.cookies.get(name)?.value, + set: (name, value, options) => { + response.cookies.set({ name, value, ...options }); + }, + remove: (name, options) => { + response.cookies.set({ name, value: '', ...options }); + }, + }, + } + ); + + const { data: { user } } = await supabase.auth.getUser(); + + // Protected routes + if (!user && request.nextUrl.pathname.startsWith('/dashboard')) { + return NextResponse.redirect(new URL('/login', request.url)); + } + + return response; +} +``` + +**2. API Key Management** + +```typescript +// src/app/(dashboard)/api-keys/page.tsx +'use server'; + +import { createClient } from '@/lib/supabase/server'; +import crypto from 'crypto'; +import bcrypt from 'bcryptjs'; + +export async function generateApiKey(name: string) { + const supabase = createClient(); + const { data: { user } } = await supabase.auth.getUser(); + + // Generate secure key + const key = crypto.randomBytes(32).toString('base64url'); + const fullKey = `nmcp_${key}`; + const hash = await bcrypt.hash(fullKey, 10); + const prefix = `nmcp_${key.substring(0, 8)}...`; + + // Store in database + const { data, error } = await supabase + .from('api_keys') + .insert({ + user_id: user!.id, + key_hash: hash, + key_prefix: prefix, + name: name + }) + .select() + .single(); + + return { key: fullKey, id: data.id }; // Show only once! +} +``` + +**3. n8n Configuration Form** + +```typescript +// src/app/(dashboard)/n8n-config/page.tsx +'use server'; + +import { encrypt } from '@/lib/encryption'; + +export async function saveN8nConfig( + instanceUrl: string, + apiKey: string +) { + const supabase = createClient(); + const { data: { user } } = await supabase.auth.getUser(); + + // Test connection + const response = await fetch(`${instanceUrl}/api/v1/workflows`, { + headers: { 'X-N8N-API-KEY': apiKey } + }); + + if (!response.ok) { + throw new Error('Invalid n8n credentials'); + } + + // Encrypt and store + const encryptedKey = encrypt(apiKey, user!.id); + + await supabase.from('n8n_instances').upsert({ + user_id: user!.id, + instance_url: instanceUrl, + api_key_encrypted: encryptedKey, + last_validated_at: new Date().toISOString() + }); +} +``` + +### 3. Infrastructure Setup + +#### Hetzner Provisioning + +```bash +# Via Hetzner Cloud Console +1. Create project "n8n-mcp-production" +2. Create CPX31 server (โ‚ฌ14/mo) + - Location: Falkenstein, Germany + - Image: Ubuntu 22.04 LTS + - SSH keys: Add your public key +3. Create Managed PostgreSQL Basic (โ‚ฌ33/mo) + - Version: PostgreSQL 15 + - Backups: Enabled +4. Create Load Balancer LB11 (โ‚ฌ5.49/mo) + - Algorithm: Least connections + - Health checks: HTTP /health +5. Create Object Storage (โ‚ฌ2/mo) + - For backups and logs +``` + +#### DNS Configuration + +``` +A www.n8n-mcp.com โ†’ Load Balancer IP +A api.n8n-mcp.com โ†’ Load Balancer IP +TXT _acme-challenge โ†’ (for SSL verification) +``` + +#### Docker Compose Deployment + +```bash +# On server +cd /opt +git clone https://github.com/czlonkowski/n8n-mcp.git +cd n8n-mcp + +# Create secrets +mkdir -p secrets +echo "your-postgres-password" > secrets/postgres_password.txt +echo "your-master-encryption-key" > secrets/master_encryption_key.txt +chmod 600 secrets/*.txt + +# Create .env +cat > .env << EOF +DATABASE_URL=postgresql://user:pass@postgres-host:5432/n8n_mcp +SUPABASE_URL=https://xxx.supabase.co +SUPABASE_SERVICE_KEY=eyJxxx... +AUTH_MODE=api_key +ENABLE_MULTI_TENANT=true +NODE_ENV=production +EOF + +# Build and deploy +docker compose -f docker-compose.prod.yml up -d + +# Verify +curl http://localhost:3000/health +``` + +#### Zero-Downtime Deployment + +```bash +# Install docker-rollout plugin +curl -fsSL https://github.com/wowu/docker-rollout/releases/latest/download/docker-rollout \ + -o ~/.docker/cli-plugins/docker-rollout +chmod +x ~/.docker/cli-plugins/docker-rollout + +# Deploy script (6x per day) +#!/bin/bash +# deploy.sh + +set -e + +echo "Building new image..." +docker build -t ghcr.io/czlonkowski/n8n-mcp:latest . +docker push ghcr.io/czlonkowski/n8n-mcp:latest + +echo "Rolling update..." +docker rollout mcp-app-1 mcp-app-2 mcp-app-3 + +echo "Deployment complete!" +``` + +--- + +## MVP User Flow + +### 1. User Signs Up (n8n-mcp-landing) + +``` +1. Visit www.n8n-mcp.com +2. Click "Get Started" (from waitlist email) +3. Sign up with email/password +4. Verify email (Supabase Auth link) +5. Redirected to dashboard +``` + +### 2. User Configures n8n Instance + +``` +1. Navigate to "n8n Configuration" +2. Enter n8n instance URL (e.g., https://my-n8n.com) +3. Enter n8n API key +4. Click "Test Connection" + โ”œโ”€> Backend validates credentials + โ””โ”€> Shows โœ… or โŒ +5. Click "Save" + โ”œโ”€> Encrypt n8n API key + โ””โ”€> Store in PostgreSQL +``` + +### 3. User Creates API Key + +``` +1. Navigate to "API Keys" +2. Click "Create New Key" +3. Enter friendly name (e.g., "Claude Desktop") +4. Click "Generate" +5. Modal shows key ONCE: + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Your API Key (save this securely!) โ”‚ + โ”‚ nmcp_abc123def456ghi789jkl012mno345 โ”‚ + โ”‚ [Copy to Clipboard] โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +6. User copies key +7. Key hash stored in database +``` + +### 4. User Configures MCP Client + +#### Claude Desktop + +```json +// Settings > Connectors > Add Custom Connector +{ + "name": "n8n-mcp Hosted", + "url": "https://api.n8n-mcp.com/mcp", + "authentication": { + "type": "bearer", + "token": "nmcp_abc123def456ghi789jkl012mno345" + } +} +``` + +#### Cursor + +```json +// ~/.cursor/mcp.json +{ + "servers": { + "n8n-mcp": { + "url": "https://api.n8n-mcp.com/mcp", + "headers": { + "Authorization": "Bearer nmcp_abc123def456ghi789jkl012mno345" + } + } + } +} +``` + +#### Windsurf + +```json +// Settings > MCP Servers +{ + "serverUrl": "https://api.n8n-mcp.com/mcp", + "authToken": "nmcp_abc123def456ghi789jkl012mno345" +} +``` + +### 5. User Tests Connection + +``` +1. Open MCP client (Claude/Cursor/Windsurf) +2. Type: "list n8n nodes" +3. MCP request flow: + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Client sends Bearer token โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Nginx routes to n8n-mcp โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Validate API key (PostgreSQL) โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Load user's n8n credentials โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Create InstanceContext โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Execute MCP tool (existing!) โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + โ–ผ + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Return node list from nodes.db โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +4. User sees list of 536 n8n nodes +5. Success! โœ… +``` + +--- + +## Landing Page Migration Strategy + +### Current State + +**www.n8n-mcp.com** (n8n-mcp-landing repo): +- Landing page with waitlist signup (471 users) +- Community videos +- Feature showcase +- GitHub link for installation + +### MVP Changes + +**Update Landing Page to Direct Users to Hosted Service:** + +```typescript +// src/app/page.tsx - Update hero section + +export default function HomePage() { + return ( + <> + +

n8n-mcp: AI-Powered n8n Workflows

+

Use Claude, Cursor, Windsurf with your n8n workflows

+ + {/* OLD: GitHub installation instructions */} + {/* NEW: Sign up for hosted service */} + +
+ + +
+ +

+ 471 users from our waitlist already have access! + No credit card required. +

+
+ + + {/* Highlight hosted benefits */} + +

Instant Setup

+

No installation needed. Sign up and start using in 5 minutes.

+
+ + +

Secure & Private

+

Your n8n credentials encrypted. Your workflows stay in your instance.

+
+ + +

All MCP Clients

+

Works with Claude Desktop, Cursor, Windsurf, and more.

+
+ + +

Community Support

+

Join 471 users already building AI workflows.

+
+
+ + {/* Keep existing community videos */} + + + {/* Add: Self-hosting still available */} + +

Prefer Self-Hosting?

+

+ n8n-mcp is open source. You can still install it locally. + + View on GitHub โ†’ + +

+
+ + ); +} +``` + +### Migration Steps + +1. **Keep existing landing page** (don't break links) +2. **Add signup flow** (new routes: /signup, /login) +3. **Add dashboard** (new routes: /dashboard/*) +4. **Update hero CTA** from "Install" to "Sign Up" +5. **Keep GitHub link** in footer (for self-hosters) +6. **Add "How It Works"** section explaining hosted service + +### Content Updates + +**Before (self-hosted focus):** +> "Install n8n-mcp and connect your n8n instance to Claude Desktop." + +**After (hosted service focus):** +> "Connect your n8n instance to Claude, Cursor, Windsurf in 5 minutes. No installation needed." + +**Keep both options visible:** +- Primary CTA: "Start Using Now" โ†’ /signup +- Secondary: "Self-Host" โ†’ GitHub + +--- + +## MVP Success Metrics + +### Week 1-2: Alpha Testing (Internal) + +| Metric | Target | +|--------|--------| +| Backend deployed | โœ… | +| Frontend deployed | โœ… | +| Internal testing complete | 10 test users | +| All platforms tested | Claude, Cursor, Windsurf | +| Zero critical bugs | 0 P0 issues | + +### Week 3-4: Beta Launch (Waitlist) + +| Metric | Target | Measurement | +|--------|--------|-------------| +| **Signups** | 300/471 (64%) | First 2 weeks | +| **Activation** | 70% | Users who configure n8n + create key | +| **First MCP Call** | 60% | Users who make โ‰ฅ1 MCP request | +| **Day 7 Retention** | 40% | Active 7 days after signup | +| **Platform Distribution** | - | % Claude vs Cursor vs Windsurf | + +### Operational Metrics (Ongoing) + +| Metric | Target | Alert Threshold | +|--------|--------|-----------------| +| **Uptime** | 99%+ | < 99% in 24h | +| **Response Time (p95)** | <500ms | >800ms for 5min | +| **Error Rate** | <1% | >2% for 5min | +| **Database Queries** | <50ms p95 | >100ms for 5min | +| **API Key Validation** | <20ms | >50ms | + +### User Feedback Collection + +**Methods:** +1. In-app feedback form (dashboard) +2. Email survey after 7 days +3. Weekly office hours (optional) +4. Discord/Slack community (existing?) + +**Key Questions:** +- How easy was setup? (1-5 scale) +- Which MCP client do you use? +- What workflows are you building? +- Would you pay for this? How much? +- What features do you need? + +--- + +## Post-MVP: Learnings โ†’ Paid Tiers + +### Hypothesis to Test + +**Assumption:** Users will pay for higher rate limits and priority support. + +**Data to Collect:** +- Average requests per user per day +- Peak usage times +- Most-used MCP tools +- Churn reasons (if users stop) +- Feature requests frequency + +### Pricing Strategy (Post-MVP) + +Based on learnings, implement: + +``` +Free Tier (Waitlist - MVP) +โ”œโ”€ 600 requests/hour (10/min) +โ”œโ”€ 10k requests/day +โ”œโ”€ 2 API keys +โ””โ”€ Community support + +Pro Tier (~โ‚ฌ10/month) +โ”œโ”€ 6,000 requests/hour (100/min) +โ”œโ”€ 100k requests/day +โ”œโ”€ 10 API keys +โ”œโ”€ Email support (24h response) +โ””โ”€ Workflow sharing (future) + +Enterprise Tier (Custom) +โ”œโ”€ Unlimited requests +โ”œโ”€ Unlimited API keys +โ”œโ”€ Dedicated support +โ”œโ”€ SLA guarantee +โ””โ”€ Custom integrations +``` + +### When to Add Payments + +**Criteria:** +1. โœ… 200+ active users (DAU) +2. โœ… 80%+ satisfaction score +3. โœ… <5% churn rate +4. โœ… Clear value proposition validated +5. โœ… User requests for paid features + +**Timeline:** 4-6 weeks after MVP launch + +--- + +## Risk Assessment + +### Technical Risks + +**RISK-01: Multi-tenant isolation failure** +- **Impact:** User A accesses User B's data +- **Likelihood:** Low (RLS policies + validation) +- **Mitigation:** + - Comprehensive testing with 2+ test users + - Audit logs for all API key validations + - Automated tests for RLS policies + +**RISK-02: n8n credential leakage** +- **Impact:** User's n8n instance compromised +- **Likelihood:** Low (AES-256-GCM encryption) +- **Mitigation:** + - Encryption tested thoroughly + - Master key rotation procedure documented + - Monitor for unusual n8n API calls + +**RISK-03: Database bottleneck** +- **Impact:** Slow response times, user frustration +- **Likelihood:** Medium (471 users hitting simultaneously) +- **Mitigation:** + - Connection pooling (Supavisor) + - Composite indexes on (user_id, created_at) + - Cache API key lookups (Redis) + +**RISK-04: Docker Compose limitations** +- **Impact:** Can't scale beyond single host +- **Likelihood:** Low (need >5k DAU to hit limits) +- **Mitigation:** + - Document Kubernetes migration path + - Re-evaluate at 2k DAU + +### Business Risks + +**RISK-05: Low waitlist conversion** +- **Impact:** <200/471 users sign up +- **Likelihood:** Medium (email list may be stale) +- **Mitigation:** + - Send personalized invitations + - Offer early bird benefits + - Follow up with non-responders + +**RISK-06: High churn** +- **Impact:** Users sign up but don't return +- **Likelihood:** Medium (setup friction, not enough value) +- **Mitigation:** + - Optimize onboarding flow (measure drop-offs) + - Email engagement campaigns + - User interviews to understand blockers + +**RISK-07: Insufficient value for paid tier** +- **Impact:** No one converts to paid (post-MVP) +- **Likelihood:** Medium (unknown willingness to pay) +- **Mitigation:** + - Collect payment intent data during MVP + - Survey users on pricing + - Offer early bird discounts to validate pricing + +### Operational Risks + +**RISK-08: Overwhelmed by support** +- **Impact:** Can't keep up with 471 users' questions +- **Likelihood:** High (new users will have issues) +- **Mitigation:** + - Comprehensive documentation + - FAQ page + - Community Discord/Slack + - Automated onboarding emails + +**RISK-09: Infrastructure costs exceed budget** +- **Impact:** โ‚ฌ54.49/mo not enough at scale +- **Likelihood:** Low (telemetry shows headroom) +- **Mitigation:** + - Monitor resource usage daily + - Scale trigger: Add server when CPU >60% + - Break-even: Only need 3.5 paying users post-MVP + +--- + +## Timeline & Milestones + +### Week 1: Backend Multi-Tenant + Infrastructure + +**Days 1-2: Infrastructure Setup** +- [ ] Provision Hetzner CPX31 + PostgreSQL + Load Balancer +- [ ] Configure DNS (www + api subdomains) +- [ ] Set up SSL certificates (Let's Encrypt) +- [ ] Deploy monitoring (Prometheus + Grafana) + +**Days 3-5: Multi-Tenant Backend** +- [ ] Implement API key validation (src/services/api-key-validator.ts) +- [ ] Modify HTTP server for multi-tenant (src/http-server-single-session.ts) +- [ ] Remove SSE transport code +- [ ] Add PostgreSQL connection (src/services/database.ts) +- [ ] Implement n8n credential decryption + +**Days 6-7: Testing & Docker** +- [ ] Unit tests (authentication, validation) +- [ ] Integration tests (multi-user scenarios) +- [ ] Create docker-compose.prod.yml +- [ ] Test zero-downtime deployment + +**Deliverable:** Working n8n-mcp service with API key authentication + +### Week 2: Frontend Dashboard + +**Days 1-2: Authentication** +- [ ] Set up Supabase project +- [ ] Implement email/password signup +- [ ] Email verification flow +- [ ] Protected routes middleware +- [ ] Login/logout flows + +**Days 3-4: Dashboard Pages** +- [ ] Dashboard overview (basic stats) +- [ ] API key management page + - Create new key + - View existing keys + - Revoke keys +- [ ] n8n configuration page + - Form for URL + API key + - Test connection button + - Save (encrypted) + +**Days 5-6: Polish & Integration** +- [ ] Account settings page +- [ ] Error handling and loading states +- [ ] Toast notifications (Sonner) +- [ ] Type generation from Supabase schema +- [ ] RLS policy testing + +**Day 7: Deployment** +- [ ] Deploy frontend to Vercel or Hetzner +- [ ] Test full user flow (signup โ†’ API key โ†’ MCP call) +- [ ] Fix critical bugs + +**Deliverable:** Functional dashboard where users can sign up and configure n8n-mcp + +### Week 3: Integration Testing & Documentation + +**Days 1-2: Platform Testing** +- [ ] Test Claude Desktop integration (Windows, Mac, Linux) +- [ ] Test Cursor integration +- [ ] Test Windsurf integration +- [ ] Test custom HTTP client (curl) +- [ ] Verify all 16 MCP tools work + +**Days 3-4: Load Testing** +- [ ] Simulate 471 users +- [ ] Test peak load (116 RPS from telemetry) +- [ ] Verify rate limiting works +- [ ] Database query performance testing +- [ ] Fix performance bottlenecks + +**Days 5-7: Documentation** +- [ ] User onboarding guide + - How to sign up + - How to configure n8n + - How to create API keys +- [ ] Platform-specific setup guides + - Claude Desktop step-by-step + - Cursor step-by-step + - Windsurf step-by-step +- [ ] Troubleshooting docs + - Common errors + - Debug steps +- [ ] Admin playbook + - Deployment procedures + - Rollback procedures + - Incident response + +**Deliverable:** Fully tested system with comprehensive documentation + +### Week 4: Launch to Waitlist + +**Days 1-2: Pre-Launch Prep** +- [ ] Final security audit +- [ ] Backup procedures tested +- [ ] Monitoring alerts configured (Slack) +- [ ] Status page set up (optional) +- [ ] Landing page updated (hosted service focus) + +**Day 3: Soft Launch (50 users)** +- [ ] Email first 50 users from waitlist +- [ ] Monitor closely for issues +- [ ] Gather immediate feedback +- [ ] Fix critical bugs + +**Days 4-5: Full Launch (471 users)** +- [ ] Email remaining 421 users +- [ ] Monitor onboarding funnel +- [ ] Respond to support questions +- [ ] Track activation rate + +**Days 6-7: Post-Launch** +- [ ] Analyze metrics (signups, activation, retention) +- [ ] User interviews (5-10 users) +- [ ] Identify top pain points +- [ ] Plan Release 1 (analytics) + +**Deliverable:** MVP live with 471 waitlist users invited + +--- + +## Implementation Checklist + +### Pre-Development + +- [ ] Budget approved (โ‚ฌ54.49/month for 4+ months) +- [ ] Team assignments clear (backend, frontend, devops) +- [ ] Accounts created: + - [ ] Hetzner Cloud + - [ ] Supabase + - [ ] GitHub Container Registry (GHCR) + - [ ] (Future) Stripe +- [ ] Development environment set up locally +- [ ] Access to n8n-mcp and n8n-mcp-landing repos + +### Week 1 Checklist + +**Infrastructure:** +- [ ] Hetzner CPX31 provisioned +- [ ] PostgreSQL Basic provisioned +- [ ] Load Balancer LB11 provisioned +- [ ] Object Storage provisioned +- [ ] DNS records created (www, api) +- [ ] SSL certificates obtained (Let's Encrypt) + +**Backend:** +- [ ] `src/services/api-key-validator.ts` implemented +- [ ] `src/http-server-single-session.ts` modified for multi-tenant +- [ ] SSE transport code removed +- [ ] `src/services/database.ts` created +- [ ] n8n credential encryption implemented +- [ ] Unit tests written (80%+ coverage) +- [ ] Integration tests written +- [ ] docker-compose.prod.yml created +- [ ] Zero-downtime deployment script tested + +**Verification:** +- [ ] Can authenticate with API key +- [ ] Multi-user isolation works (test with 2+ users) +- [ ] n8n credentials loaded correctly per user +- [ ] MCP tools work with InstanceContext +- [ ] Health checks pass +- [ ] Docker Compose deploys successfully + +### Week 2 Checklist + +**Supabase:** +- [ ] Supabase project created +- [ ] Database schema deployed (users, api_keys, n8n_instances, usage_logs) +- [ ] RLS policies enabled and tested +- [ ] Indexes created +- [ ] Email auth configured (SMTP) +- [ ] Email templates customized + +**Frontend:** +- [ ] Authentication flow implemented (signup, login, logout) +- [ ] Email verification tested +- [ ] Protected routes middleware works +- [ ] Dashboard overview page +- [ ] API key management page (create, view, revoke) +- [ ] n8n configuration page (form, test, save) +- [ ] Account settings page +- [ ] Error handling and loading states +- [ ] Toast notifications working +- [ ] TypeScript types generated from Supabase + +**Verification:** +- [ ] User can sign up and verify email +- [ ] User can create API key and see it once +- [ ] User can configure n8n instance (encrypted) +- [ ] User can revoke API key +- [ ] RLS policies prevent cross-user data access +- [ ] Frontend deployed (Vercel or Hetzner) + +### Week 3 Checklist + +**Testing:** +- [ ] Claude Desktop tested (Mac, Windows) +- [ ] Cursor tested +- [ ] Windsurf tested +- [ ] All 16 MCP tools tested +- [ ] Load test (471 users simulated) +- [ ] Database performance verified (<50ms p95) +- [ ] Rate limiting tested +- [ ] Error scenarios tested (invalid API key, invalid n8n creds, etc.) + +**Documentation:** +- [ ] User onboarding guide written +- [ ] Platform setup guides written (Claude, Cursor, Windsurf) +- [ ] Troubleshooting docs written +- [ ] Admin playbook written +- [ ] API reference updated +- [ ] Landing page updated with hosted service info + +**Verification:** +- [ ] End-to-end user flow works flawlessly +- [ ] Documentation is clear and comprehensive +- [ ] No critical bugs remaining +- [ ] Performance meets targets + +### Week 4 Checklist + +**Pre-Launch:** +- [ ] Security audit completed +- [ ] Backup procedures documented and tested +- [ ] Monitoring alerts configured +- [ ] Email templates for waitlist invitation +- [ ] Landing page updated (CTA to signup) +- [ ] Support email set up + +**Launch:** +- [ ] Soft launch email sent (50 users) +- [ ] Monitoring onboarding metrics +- [ ] Support questions answered +- [ ] Critical bugs fixed +- [ ] Full launch email sent (421 users) + +**Post-Launch:** +- [ ] Metrics analyzed (signups, activation, retention) +- [ ] User feedback collected (survey, interviews) +- [ ] Pain points identified +- [ ] Release 1 planned (analytics) + +--- + +## Cost Summary + +### MVP Development Costs + +**Infrastructure (Monthly):** +- CPX31 (4 vCPU, 8GB): โ‚ฌ14.00 +- PostgreSQL Basic: โ‚ฌ33.00 +- Load Balancer LB11: โ‚ฌ5.49 +- Object Storage 100GB: โ‚ฌ2.00 +- **Total: โ‚ฌ54.49/month** + +**Cost per user:** โ‚ฌ54.49 / 471 = **โ‚ฌ0.12/user/month** + +**4-month MVP period:** โ‚ฌ54.49 ร— 4 = **โ‚ฌ217.96** + +**Development Time:** +- Backend: 7 days +- Frontend: 7 days +- Testing: 7 days +- Launch: 7 days +- **Total: 28 days (4 weeks)** + +### Break-Even Analysis (Post-MVP) + +**With Paid Tiers (Post-MVP Release 2):** + +Assumptions: +- 10% convert to Pro (โ‚ฌ10/month) = 47 users = โ‚ฌ470/month +- 2% convert to Enterprise (โ‚ฌ100/month avg) = 9 users = โ‚ฌ900/month +- **Total revenue: โ‚ฌ1,370/month** + +Costs: +- Infrastructure: โ‚ฌ54.49 +- Stripe fees (3%): โ‚ฌ41.10 +- **Net profit: โ‚ฌ1,274.51/month** + +Break-even: 3.5 paying users = **Achieved at 1% conversion** โœ… + +--- + +## Next Steps + +### Immediate Actions (Today) + +1. **Review this MVP plan** - Confirm scope and timeline +2. **Assign team roles** - Backend, frontend, devops +3. **Create Hetzner account** - If not already done +4. **Create Supabase project** - Free tier for development +5. **Set up local development**: + - Backend: n8n-mcp repo + - Frontend: n8n-mcp-landing repo + +### Week 1 Kick-off (Monday) + +1. **Infrastructure setup** (Day 1) + - Provision Hetzner resources + - Configure DNS + - Set up monitoring + +2. **Start backend development** (Day 2) + - Create branch: `feature/multi-tenant` + - Begin API key validation implementation + - Set up PostgreSQL connection + +3. **Start frontend development** (Day 2) + - Create branch: `feature/dashboard` + - Set up Supabase authentication + - Begin dashboard layout + +### Questions to Answer + +Before starting development: + +1. **Team** + - Who is responsible for backend? + - Who is responsible for frontend? + - Who is responsible for devops? + - Do we need to hire contractors? + +2. **Budget** + - โ‚ฌ54.49/month infrastructure approved? + - Budget for 4+ months until revenue? + +3. **Timeline** + - 4-week MVP realistic? + - Any external dependencies? + - Hard deadlines? + +4. **Scope** + - MVP features confirmed? + - Any must-haves missing? + - Any nice-to-haves to remove? + +--- + +## Conclusion + +**MVP is achievable in 4 weeks** thanks to: +1. โœ… 70% multi-tenant code already exists (InstanceContext) +2. โœ… Landing page already on Next.js 15 +3. โœ… Infrastructure sizing validated by telemetry (600 DAU baseline) +4. โœ… All technologies researched with production patterns + +**Key Success Factors:** +- Focus ruthlessly on MVP scope (no scope creep!) +- Leverage existing code (InstanceContext pattern) +- Use proven patterns (Supabase + Next.js 15) +- Test with real users early (50-user soft launch) +- Gather feedback relentlessly + +**After MVP:** +- Release 1: Usage analytics (1-2 weeks) +- Release 2: Paid tiers with Stripe (3-4 weeks) +- Release 3: Advanced features (2 weeks) + +**Go/No-Go Decision:** + +โœ… **Proceed if:** +- Team capacity available (3-4 weeks full-time or 6-8 weeks part-time) +- Budget approved (โ‚ฌ217.96 for 4 months) +- Commitment to post-launch support (monitoring, user support) + +โŒ **Delay if:** +- Team at capacity with other projects +- Uncertainty about maintaining hosted service long-term +- Budget constraints + +--- + +**Document Version:** 3.0 - MVP Focus +**Last Updated:** 2025-10-11 +**Next Review:** After Week 1 completion +**Owner:** n8n-mcp Team diff --git a/TELEMETRY_PRUNING_GUIDE.md b/TELEMETRY_PRUNING_GUIDE.md new file mode 100644 index 0000000..61a6eed --- /dev/null +++ b/TELEMETRY_PRUNING_GUIDE.md @@ -0,0 +1,623 @@ +# Telemetry Data Pruning & Aggregation Guide + +## Overview + +This guide provides a complete solution for managing n8n-mcp telemetry data in Supabase to stay within the 500 MB free tier limit while preserving valuable insights for product development. + +## Current Situation + +- **Database Size**: 265 MB / 500 MB (53% of limit) +- **Growth Rate**: 7.7 MB/day (54 MB/week) +- **Time Until Full**: ~17 days +- **Total Events**: 641,487 events + 17,247 workflows + +### Storage Breakdown + +| Event Type | Count | Size | % of Total | +|------------|-------|------|------------| +| `tool_sequence` | 362,704 | 96 MB | 72% | +| `tool_used` | 191,938 | 28 MB | 21% | +| `validation_details` | 36,280 | 14 MB | 11% | +| `workflow_created` | 23,213 | 4.5 MB | 3% | +| Others | ~26,000 | ~3 MB | 2% | + +## Solution Strategy + +**Aggregate โ†’ Delete โ†’ Retain only recent raw events** + +### Expected Results + +| Metric | Before | After | Improvement | +|--------|--------|-------|-------------| +| Database Size | 265 MB | ~90-120 MB | **55-65% reduction** | +| Growth Rate | 7.7 MB/day | ~2-3 MB/day | **60-70% slower** | +| Days Until Full | 17 days | **Sustainable** | Never fills | +| Free Tier Usage | 53% | ~20-25% | **75-80% headroom** | + +## Implementation Steps + +### Step 1: Execute the SQL Migration + +Open Supabase SQL Editor and run the entire contents of `supabase-telemetry-aggregation.sql`: + +```sql +-- Copy and paste the entire supabase-telemetry-aggregation.sql file +-- Or run it directly from the file +``` + +This will create: +- 5 aggregation tables +- Aggregation functions +- Automated cleanup function +- Monitoring functions +- Scheduled cron job (daily at 2 AM UTC) + +### Step 2: Verify Cron Job Setup + +Check that the cron job was created successfully: + +```sql +-- View scheduled cron jobs +SELECT + jobid, + schedule, + command, + nodename, + nodeport, + database, + username, + active +FROM cron.job +WHERE jobname = 'telemetry-daily-cleanup'; +``` + +Expected output: +- Schedule: `0 2 * * *` (daily at 2 AM UTC) +- Active: `true` + +### Step 3: Run Initial Emergency Cleanup + +Get immediate space relief by running the emergency cleanup: + +```sql +-- This will aggregate and delete data older than 7 days +SELECT * FROM emergency_cleanup(); +``` + +Expected results: +``` +action | rows_deleted | space_freed_mb +------------------------------------+--------------+---------------- +Deleted non-critical events > 7d | ~284,924 | ~52 MB +Deleted error events > 14d | ~2,400 | ~0.5 MB +Deleted duplicate workflows | ~8,500 | ~11 MB +TOTAL (run VACUUM separately) | 0 | ~63.5 MB +``` + +### Step 4: Reclaim Disk Space + +After deletion, reclaim the actual disk space: + +```sql +-- Reclaim space from deleted rows +VACUUM FULL telemetry_events; +VACUUM FULL telemetry_workflows; + +-- Update statistics for query optimization +ANALYZE telemetry_events; +ANALYZE telemetry_workflows; +``` + +**Note**: `VACUUM FULL` may take a few minutes and locks the table. Run during off-peak hours if possible. + +### Step 5: Verify Results + +Check the new database size: + +```sql +SELECT * FROM check_database_size(); +``` + +Expected output: +``` +total_size_mb | events_size_mb | workflows_size_mb | aggregates_size_mb | percent_of_limit | days_until_full | status +--------------+----------------+-------------------+--------------------+------------------+-----------------+--------- +202.5 | 85.2 | 35.8 | 12.5 | 40.5 | ~95 | HEALTHY +``` + +## Daily Operations (Automated) + +Once set up, the system runs automatically: + +1. **Daily at 2 AM UTC**: Cron job runs +2. **Aggregation**: Data older than 3 days is aggregated into summary tables +3. **Deletion**: Raw events are deleted after aggregation +4. **Cleanup**: VACUUM runs to reclaim space +5. **Retention**: + - High-volume events: 3 days + - Error events: 30 days + - Aggregated insights: Forever + +## Monitoring Commands + +### Check Database Health + +```sql +-- View current size and status +SELECT * FROM check_database_size(); +``` + +### View Aggregated Insights + +```sql +-- Top tools used daily +SELECT + aggregation_date, + tool_name, + usage_count, + success_count, + error_count, + ROUND(100.0 * success_count / NULLIF(usage_count, 0), 1) as success_rate_pct +FROM telemetry_tool_usage_daily +ORDER BY aggregation_date DESC, usage_count DESC +LIMIT 50; + +-- Most common tool sequences +SELECT + aggregation_date, + tool_sequence, + occurrence_count, + ROUND(avg_sequence_duration_ms, 0) as avg_duration_ms, + ROUND(100 * success_rate, 1) as success_rate_pct +FROM telemetry_tool_patterns +ORDER BY occurrence_count DESC +LIMIT 20; + +-- Error patterns over time +SELECT + aggregation_date, + error_type, + error_context, + occurrence_count, + affected_users, + sample_error_message +FROM telemetry_error_patterns +ORDER BY aggregation_date DESC, occurrence_count DESC +LIMIT 30; + +-- Workflow creation trends +SELECT + aggregation_date, + complexity, + node_count_range, + has_trigger, + has_webhook, + workflow_count, + ROUND(avg_node_count, 1) as avg_nodes +FROM telemetry_workflow_insights +ORDER BY aggregation_date DESC, workflow_count DESC +LIMIT 30; + +-- Validation success rates +SELECT + aggregation_date, + validation_type, + profile, + success_count, + failure_count, + ROUND(100.0 * success_count / NULLIF(success_count + failure_count, 0), 1) as success_rate_pct, + common_failure_reasons +FROM telemetry_validation_insights +ORDER BY aggregation_date DESC, (success_count + failure_count) DESC +LIMIT 30; +``` + +### Check Cron Job Execution History + +```sql +-- View recent cron job runs +SELECT + runid, + jobid, + database, + status, + return_message, + start_time, + end_time +FROM cron.job_run_details +WHERE jobid = (SELECT jobid FROM cron.job WHERE jobname = 'telemetry-daily-cleanup') +ORDER BY start_time DESC +LIMIT 10; +``` + +## Manual Operations + +### Run Cleanup On-Demand + +If you need to run cleanup outside the scheduled time: + +```sql +-- Run with default 3-day retention +SELECT * FROM run_telemetry_aggregation_and_cleanup(3); +VACUUM ANALYZE telemetry_events; + +-- Or with custom retention (e.g., 5 days) +SELECT * FROM run_telemetry_aggregation_and_cleanup(5); +VACUUM ANALYZE telemetry_events; +``` + +### Emergency Cleanup (Critical Situations) + +If database is approaching limit and you need immediate relief: + +```sql +-- Step 1: Run emergency cleanup (7-day retention) +SELECT * FROM emergency_cleanup(); + +-- Step 2: Reclaim space aggressively +VACUUM FULL telemetry_events; +VACUUM FULL telemetry_workflows; +ANALYZE telemetry_events; +ANALYZE telemetry_workflows; + +-- Step 3: Verify results +SELECT * FROM check_database_size(); +``` + +### Adjust Retention Policy + +To change the default 3-day retention period: + +```sql +-- Update cron job to use 5-day retention instead +SELECT cron.unschedule('telemetry-daily-cleanup'); + +SELECT cron.schedule( + 'telemetry-daily-cleanup', + '0 2 * * *', -- Daily at 2 AM UTC + $$ + SELECT run_telemetry_aggregation_and_cleanup(5); -- 5 days instead of 3 + VACUUM ANALYZE telemetry_events; + VACUUM ANALYZE telemetry_workflows; + $$ +); +``` + +## Data Retention Policies + +### Raw Events Retention + +| Event Type | Retention | Reason | +|------------|-----------|--------| +| `tool_sequence` | 3 days | High volume, low long-term value | +| `tool_used` | 3 days | High volume, aggregated daily | +| `validation_details` | 3 days | Aggregated into insights | +| `workflow_created` | 3 days | Aggregated into patterns | +| `session_start` | 3 days | Operational data only | +| `search_query` | 3 days | Operational data only | +| `error_occurred` | **30 days** | Extended for debugging | +| `workflow_validation_failed` | 3 days | Captured in aggregates | + +### Aggregated Data Retention + +All aggregated data is kept **indefinitely**: +- Daily tool usage statistics +- Tool sequence patterns +- Workflow creation trends +- Error patterns and frequencies +- Validation success rates + +### Workflow Retention + +- **Unique workflows**: Kept indefinitely (one per unique hash) +- **Duplicate workflows**: Deleted after 3 days +- **Workflow metadata**: Aggregated into daily insights + +## Intelligence Preserved + +Even after aggressive pruning, you still have access to: + +### Long-term Product Insights +- Which tools are most/least used over time +- Tool usage trends and adoption curves +- Common workflow patterns and complexities +- Error frequencies and types across versions +- Validation failure patterns + +### Development Intelligence +- Feature adoption rates (by day/week/month) +- Pain points (high error rates, validation failures) +- User behavior patterns (tool sequences, workflow styles) +- Version comparison (changes in usage between releases) + +### Recent Debugging Data +- Last 3 days of raw events for immediate issues +- Last 30 days of error events for bug tracking +- Sample error messages for each error type + +## Troubleshooting + +### Cron Job Not Running + +Check if pg_cron extension is enabled: + +```sql +-- Enable pg_cron +CREATE EXTENSION IF NOT EXISTS pg_cron; + +-- Verify it's enabled +SELECT * FROM pg_extension WHERE extname = 'pg_cron'; +``` + +### Aggregation Functions Failing + +Check for errors in cron job execution: + +```sql +-- View error messages +SELECT + status, + return_message, + start_time +FROM cron.job_run_details +WHERE jobid = (SELECT jobid FROM cron.job WHERE jobname = 'telemetry-daily-cleanup') + AND status = 'failed' +ORDER BY start_time DESC; +``` + +### VACUUM Not Reclaiming Space + +If `VACUUM ANALYZE` isn't reclaiming enough space, use `VACUUM FULL`: + +```sql +-- More aggressive space reclamation (locks table) +VACUUM FULL telemetry_events; +``` + +### Database Still Growing Too Fast + +Reduce retention period further: + +```sql +-- Change to 2-day retention (more aggressive) +SELECT * FROM run_telemetry_aggregation_and_cleanup(2); +``` + +Or delete more event types: + +```sql +-- Delete additional low-value events +DELETE FROM telemetry_events +WHERE created_at < NOW() - INTERVAL '3 days' + AND event IN ('session_start', 'search_query', 'diagnostic_completed', 'health_check_completed'); +``` + +## Performance Considerations + +### Cron Job Execution Time + +The daily cleanup typically takes: +- **Aggregation**: 30-60 seconds +- **Deletion**: 15-30 seconds +- **VACUUM**: 2-5 minutes +- **Total**: ~3-7 minutes + +### Query Performance + +All aggregation tables have indexes on: +- Date columns (for time-series queries) +- Lookup columns (tool_name, error_type, etc.) +- User columns (for user-specific analysis) + +### Lock Considerations + +- `VACUUM ANALYZE`: Minimal locking, safe during operation +- `VACUUM FULL`: Locks table, run during off-peak hours +- Aggregation functions: Read-only queries, no locking + +## Customization + +### Add Custom Aggregations + +To track additional metrics, create new aggregation tables: + +```sql +-- Example: Session duration aggregation +CREATE TABLE telemetry_session_duration_daily ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + aggregation_date DATE NOT NULL, + avg_duration_seconds NUMERIC, + median_duration_seconds NUMERIC, + max_duration_seconds NUMERIC, + session_count INTEGER, + created_at TIMESTAMPTZ DEFAULT NOW(), + UNIQUE(aggregation_date) +); + +-- Add to cleanup function +-- (modify run_telemetry_aggregation_and_cleanup) +``` + +### Modify Retention Policies + +Edit the `run_telemetry_aggregation_and_cleanup` function to adjust retention by event type: + +```sql +-- Keep validation_details for 7 days instead of 3 +DELETE FROM telemetry_events +WHERE created_at < (NOW() - INTERVAL '7 days') + AND event = 'validation_details'; +``` + +### Change Cron Schedule + +Adjust the execution time if needed: + +```sql +-- Run at different time (e.g., 3 AM UTC) +SELECT cron.schedule( + 'telemetry-daily-cleanup', + '0 3 * * *', -- 3 AM instead of 2 AM + $$ SELECT run_telemetry_aggregation_and_cleanup(3); VACUUM ANALYZE telemetry_events; $$ +); + +-- Run twice daily (2 AM and 2 PM) +SELECT cron.schedule( + 'telemetry-cleanup-morning', + '0 2 * * *', + $$ SELECT run_telemetry_aggregation_and_cleanup(3); $$ +); + +SELECT cron.schedule( + 'telemetry-cleanup-afternoon', + '0 14 * * *', + $$ SELECT run_telemetry_aggregation_and_cleanup(3); $$ +); +``` + +## Backup & Recovery + +### Before Running Emergency Cleanup + +Create a backup of aggregation queries: + +```sql +-- Export aggregated data to CSV or backup tables +CREATE TABLE telemetry_tool_usage_backup AS +SELECT * FROM telemetry_tool_usage_daily; + +CREATE TABLE telemetry_patterns_backup AS +SELECT * FROM telemetry_tool_patterns; +``` + +### Restore Deleted Data + +Raw event data cannot be restored after deletion. However, aggregated insights are preserved indefinitely. + +To prevent accidental data loss: +1. Test cleanup functions on staging first +2. Review `check_database_size()` before running emergency cleanup +3. Start with longer retention periods (7 days) and reduce gradually +4. Monitor aggregated data quality for 1-2 weeks + +## Monitoring Dashboard Queries + +### Weekly Growth Report + +```sql +-- Database growth over last 7 days +SELECT + DATE(created_at) as date, + COUNT(*) as events_created, + COUNT(DISTINCT event) as event_types, + COUNT(DISTINCT user_id) as active_users, + ROUND(SUM(pg_column_size(telemetry_events.*))::NUMERIC / 1024 / 1024, 2) as size_mb +FROM telemetry_events +WHERE created_at >= NOW() - INTERVAL '7 days' +GROUP BY DATE(created_at) +ORDER BY date DESC; +``` + +### Storage Efficiency Report + +```sql +-- Compare raw vs aggregated storage +SELECT + 'Raw Events (last 3 days)' as category, + COUNT(*) as row_count, + pg_size_pretty(pg_total_relation_size('telemetry_events')) as table_size +FROM telemetry_events +WHERE created_at >= NOW() - INTERVAL '3 days' + +UNION ALL + +SELECT + 'Aggregated Insights (all time)', + (SELECT COUNT(*) FROM telemetry_tool_usage_daily) + + (SELECT COUNT(*) FROM telemetry_tool_patterns) + + (SELECT COUNT(*) FROM telemetry_workflow_insights) + + (SELECT COUNT(*) FROM telemetry_error_patterns) + + (SELECT COUNT(*) FROM telemetry_validation_insights), + pg_size_pretty( + pg_total_relation_size('telemetry_tool_usage_daily') + + pg_total_relation_size('telemetry_tool_patterns') + + pg_total_relation_size('telemetry_workflow_insights') + + pg_total_relation_size('telemetry_error_patterns') + + pg_total_relation_size('telemetry_validation_insights') + ); +``` + +### Top Events by Size + +```sql +-- Which event types consume most space +SELECT + event, + COUNT(*) as event_count, + pg_size_pretty(SUM(pg_column_size(telemetry_events.*))::BIGINT) as total_size, + pg_size_pretty(AVG(pg_column_size(telemetry_events.*))::BIGINT) as avg_size_per_event, + ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 2) as pct_of_events +FROM telemetry_events +GROUP BY event +ORDER BY SUM(pg_column_size(telemetry_events.*)) DESC; +``` + +## Success Metrics + +Track these metrics weekly to ensure the system is working: + +### Target Metrics (After Implementation) + +- โœ… Database size: **< 150 MB** (< 30% of limit) +- โœ… Growth rate: **< 3 MB/day** (sustainable) +- โœ… Raw event retention: **3 days** (configurable) +- โœ… Aggregated data: **All-time insights available** +- โœ… Cron job success rate: **> 95%** +- โœ… Query performance: **< 500ms for aggregated queries** + +### Review Schedule + +- **Daily**: Check `check_database_size()` status +- **Weekly**: Review aggregated insights and growth trends +- **Monthly**: Analyze cron job success rate and adjust retention if needed +- **After each release**: Compare usage patterns to previous version + +## Quick Reference + +### Essential Commands + +```sql +-- Check database health +SELECT * FROM check_database_size(); + +-- View recent aggregated insights +SELECT * FROM telemetry_tool_usage_daily ORDER BY aggregation_date DESC LIMIT 10; + +-- Run manual cleanup (3-day retention) +SELECT * FROM run_telemetry_aggregation_and_cleanup(3); +VACUUM ANALYZE telemetry_events; + +-- Emergency cleanup (7-day retention) +SELECT * FROM emergency_cleanup(); +VACUUM FULL telemetry_events; + +-- View cron job status +SELECT * FROM cron.job WHERE jobname = 'telemetry-daily-cleanup'; + +-- View cron execution history +SELECT * FROM cron.job_run_details +WHERE jobid = (SELECT jobid FROM cron.job WHERE jobname = 'telemetry-daily-cleanup') +ORDER BY start_time DESC LIMIT 5; +``` + +## Support + +If you encounter issues: + +1. Check the troubleshooting section above +2. Review cron job execution logs +3. Verify pg_cron extension is enabled +4. Test aggregation functions manually +5. Check Supabase dashboard for errors + +For questions or improvements, refer to the main project documentation. diff --git a/data/nodes.db b/data/nodes.db index 99e1de6..0773006 100644 Binary files a/data/nodes.db and b/data/nodes.db differ diff --git a/package.json b/package.json index 640af48..8659117 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "n8n-mcp", - "version": "2.18.10", + "version": "2.19.0", "description": "Integration between n8n workflow automation and Model Context Protocol (MCP)", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/src/http-server-single-session.ts b/src/http-server-single-session.ts index 17716d1..effdf9c 100644 --- a/src/http-server-single-session.ts +++ b/src/http-server-single-session.ts @@ -25,6 +25,7 @@ import { STANDARD_PROTOCOL_VERSION } from './utils/protocol-version'; import { InstanceContext, validateInstanceContext } from './types/instance-context'; +import { SessionRestoreHook, SessionState } from './types/session-restoration'; dotenv.config(); @@ -84,12 +85,30 @@ export class SingleSessionHTTPServer { private sessionTimeout = 30 * 60 * 1000; // 30 minutes private authToken: string | null = null; private cleanupTimer: NodeJS.Timeout | null = null; - - constructor() { + + // Session restoration options (Phase 1 - v2.19.0) + private onSessionNotFound?: SessionRestoreHook; + private sessionRestorationTimeout: number; + + constructor(options: { + sessionTimeout?: number; + onSessionNotFound?: SessionRestoreHook; + sessionRestorationTimeout?: number; + } = {}) { // Validate environment on construction this.validateEnvironment(); + + // Session restoration configuration + this.onSessionNotFound = options.onSessionNotFound; + this.sessionRestorationTimeout = options.sessionRestorationTimeout || 5000; // 5 seconds default + + // Override session timeout if provided + if (options.sessionTimeout) { + this.sessionTimeout = options.sessionTimeout; + } + // No longer pre-create session - will be created per initialize request following SDK pattern - + // Start periodic session cleanup this.startSessionCleanup(); } @@ -187,23 +206,52 @@ export class SingleSessionHTTPServer { } /** - * Validate session ID format + * Validate session ID format (Security-Hardened - REQ-8) * - * Accepts any non-empty string to support various MCP clients: - * - UUIDv4 (internal n8n-mcp format) - * - instance-{userId}-{hash}-{uuid} (multi-tenant format) - * - Custom formats from mcp-remote and other proxies + * Validates session ID format to prevent injection attacks: + * - SQL injection + * - NoSQL injection + * - Path traversal + * - DoS via oversized IDs * - * Security: Session validation happens via lookup in this.transports, - * not format validation. This ensures compatibility with all MCP clients. + * Accepts multiple formats for MCP client compatibility: + * 1. UUIDv4 (internal format): xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + * 2. Multi-tenant format: instance-{userId}-{hash}-{uuid} + * 3. Generic safe format: any alphanumeric string with hyphens/underscores (20-100 chars) * * @param sessionId - Session identifier from MCP client * @returns true if valid, false otherwise + * @since 2.19.0 - Enhanced with security validation + * @since 2.19.1 - Relaxed validation for MCP proxy compatibility */ private isValidSessionId(sessionId: string): boolean { - // Accept any non-empty string as session ID - // This ensures compatibility with all MCP clients and proxies - return Boolean(sessionId && sessionId.length > 0); + if (!sessionId || typeof sessionId !== 'string') { + return false; + } + + // Length validation (20-100 chars) - DoS protection + if (sessionId.length < 20 || sessionId.length > 100) { + return false; + } + + // Character whitelist (alphanumeric + hyphens + underscores) - Injection protection + // Allow underscores for compatibility with some MCP clients (e.g., mcp-remote) + if (!/^[a-zA-Z0-9_-]+$/.test(sessionId)) { + return false; + } + + // Format validation - Support known formats or any safe alphanumeric format + // UUIDv4: 8-4-4-4-12 hex digits with hyphens + const uuidV4Pattern = /^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + + // Multi-tenant: instance-{userId}-{hash}-{uuid} + // Must start with 'instance-' and have at least 4 parts + const multiTenantPattern = /^instance-[a-zA-Z0-9_]+-[a-zA-Z0-9_]+-[a-zA-Z0-9_-]+$/; + + // Accept UUIDv4, multi-tenant, OR any safe alphanumeric format (for flexibility) + return uuidV4Pattern.test(sessionId) || + multiTenantPattern.test(sessionId) || + /^[a-zA-Z0-9_-]{20,100}$/.test(sessionId); // Generic safe format } /** @@ -297,6 +345,155 @@ export class SingleSessionHTTPServer { } } + /** + * Timeout utility for session restoration + * Creates a promise that rejects after the specified milliseconds + * + * @param ms - Timeout duration in milliseconds + * @returns Promise that rejects with TimeoutError + * @since 2.19.0 + */ + private timeout(ms: number): Promise { + return new Promise((_, reject) => { + setTimeout(() => { + const error = new Error(`Operation timed out after ${ms}ms`); + error.name = 'TimeoutError'; + reject(error); + }, ms); + }); + } + + /** + * Create a new session (IDEMPOTENT - REQ-2) + * + * This method is idempotent to prevent race conditions during concurrent + * restoration attempts. If the session already exists, returns existing + * session ID without creating a duplicate. + * + * @param instanceContext - Instance-specific configuration + * @param sessionId - Optional pre-defined session ID (for restoration) + * @returns The session ID (newly created or existing) + * @throws Error if session ID format is invalid + * @since 2.19.0 + */ + private createSession( + instanceContext: InstanceContext, + sessionId?: string + ): string { + // Generate session ID if not provided + const id = sessionId || this.generateSessionId(instanceContext); + + // CRITICAL: Idempotency check to prevent race conditions + if (this.transports[id]) { + logger.debug('Session already exists, skipping creation (idempotent)', { + sessionId: id + }); + return id; + } + + // Validate session ID format if provided externally + if (sessionId && !this.isValidSessionId(sessionId)) { + logger.error('Invalid session ID format during creation', { sessionId }); + throw new Error('Invalid session ID format'); + } + + const server = new N8NDocumentationMCPServer(instanceContext); + + // Create transport and server + const transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: () => id, + onsessioninitialized: (initializedSessionId: string) => { + // Session already stored, this just logs initialization + logger.info('Session initialized during explicit creation', { + sessionId: initializedSessionId + }); + } + }); + + // CRITICAL: Store session data immediately (not in callback) + // This ensures sessions are available synchronously for tests and direct API calls + this.transports[id] = transport; + this.servers[id] = server; + this.sessionMetadata[id] = { + lastAccess: new Date(), + createdAt: new Date() + }; + this.sessionContexts[id] = instanceContext; + + // Set up cleanup handlers + transport.onclose = () => { + if (transport.sessionId) { + logger.info('Transport closed during createSession, cleaning up', { + sessionId: transport.sessionId + }); + this.removeSession(transport.sessionId, 'transport_closed'); + } + }; + + transport.onerror = (error: Error) => { + if (transport.sessionId) { + logger.error('Transport error during createSession', { + sessionId: transport.sessionId, + error: error.message + }); + this.removeSession(transport.sessionId, 'transport_error').catch(err => { + logger.error('Error during transport error cleanup', { error: err }); + }); + } + }; + + // CRITICAL: Connect server to transport before returning + // Without this, the server won't process requests! + // Note: We don't await here because createSession is synchronous + // The connection will complete asynchronously via onsessioninitialized + server.connect(transport).catch(err => { + logger.error('Failed to connect server to transport in createSession', { + sessionId: id, + error: err instanceof Error ? err.message : String(err) + }); + // Clean up on connection failure + this.removeSession(id, 'connection_failed').catch(cleanupErr => { + logger.error('Error during connection failure cleanup', { error: cleanupErr }); + }); + }); + + logger.info('Session created successfully (connecting server to transport)', { + sessionId: id, + hasInstanceContext: !!instanceContext, + instanceId: instanceContext?.instanceId + }); + + return id; + } + + /** + * Generate session ID based on instance context + * Used for multi-tenant mode + * + * @param instanceContext - Instance-specific configuration + * @returns Generated session ID + */ + private generateSessionId(instanceContext?: InstanceContext): string { + const isMultiTenantEnabled = process.env.ENABLE_MULTI_TENANT === 'true'; + const sessionStrategy = process.env.MULTI_TENANT_SESSION_STRATEGY || 'instance'; + + if (isMultiTenantEnabled && sessionStrategy === 'instance' && instanceContext?.instanceId) { + // Multi-tenant mode with instance strategy + const configHash = createHash('sha256') + .update(JSON.stringify({ + url: instanceContext.n8nApiUrl, + instanceId: instanceContext.instanceId + })) + .digest('hex') + .substring(0, 8); + + return `instance-${instanceContext.instanceId}-${configHash}-${uuidv4()}`; + } + + // Standard UUIDv4 + return uuidv4(); + } + /** * Get session metrics for monitoring */ @@ -556,32 +753,160 @@ export class SingleSessionHTTPServer { this.updateSessionAccess(sessionId); } else { - // Invalid request - no session ID and not an initialize request - const errorDetails = { - hasSessionId: !!sessionId, - isInitialize: isInitialize, - sessionIdValid: sessionId ? this.isValidSessionId(sessionId) : false, - sessionExists: sessionId ? !!this.transports[sessionId] : false - }; - - logger.warn('handleRequest: Invalid request - no session ID and not initialize', errorDetails); - - let errorMessage = 'Bad Request: No valid session ID provided and not an initialize request'; - if (sessionId && !this.isValidSessionId(sessionId)) { - errorMessage = 'Bad Request: Invalid session ID format'; - } else if (sessionId && !this.transports[sessionId]) { - errorMessage = 'Bad Request: Session not found or expired'; + // Handle unknown session ID - check if we can restore it + if (sessionId) { + // REQ-8: Validate session ID format FIRST (security) + if (!this.isValidSessionId(sessionId)) { + logger.warn('handleRequest: Invalid session ID format rejected', { + sessionId: sessionId.substring(0, 20) + }); + res.status(400).json({ + jsonrpc: '2.0', + error: { + code: -32602, + message: 'Invalid session ID format' + }, + id: req.body?.id || null + }); + return; + } + + // REQ-1: Try session restoration if hook provided + if (this.onSessionNotFound) { + logger.info('Attempting session restoration', { sessionId }); + + try { + // Call restoration hook with timeout + const restoredContext = await Promise.race([ + this.onSessionNotFound(sessionId), + this.timeout(this.sessionRestorationTimeout) + ]); + + // Handle both null and undefined defensively + // Both indicate the hook declined to restore the session + if (restoredContext === null || restoredContext === undefined) { + logger.info('Session restoration declined by hook', { + sessionId, + returnValue: restoredContext === null ? 'null' : 'undefined' + }); + res.status(400).json({ + jsonrpc: '2.0', + error: { + code: -32000, + message: 'Session not found or expired' + }, + id: req.body?.id || null + }); + return; + } + + // Validate the context returned by the hook + const validation = validateInstanceContext(restoredContext); + if (!validation.valid) { + logger.error('Invalid context returned from restoration hook', { + sessionId, + errors: validation.errors + }); + res.status(400).json({ + jsonrpc: '2.0', + error: { + code: -32000, + message: 'Invalid session context' + }, + id: req.body?.id || null + }); + return; + } + + // REQ-2: Create session (idempotent) + logger.info('Session restoration successful, creating session', { + sessionId, + instanceId: restoredContext.instanceId + }); + + this.createSession(restoredContext, sessionId); + + // Verify session was created + if (!this.transports[sessionId]) { + logger.error('Session creation failed after restoration', { sessionId }); + res.status(500).json({ + jsonrpc: '2.0', + error: { + code: -32603, + message: 'Session creation failed' + }, + id: req.body?.id || null + }); + return; + } + + // Use the restored session + transport = this.transports[sessionId]; + logger.info('Using restored session transport', { sessionId }); + + } catch (error) { + // Handle timeout + if (error instanceof Error && error.name === 'TimeoutError') { + logger.error('Session restoration timeout', { + sessionId, + timeout: this.sessionRestorationTimeout + }); + res.status(408).json({ + jsonrpc: '2.0', + error: { + code: -32000, + message: 'Session restoration timeout' + }, + id: req.body?.id || null + }); + return; + } + + // Handle other errors + logger.error('Session restoration failed', { + sessionId, + error: error instanceof Error ? error.message : String(error) + }); + res.status(500).json({ + jsonrpc: '2.0', + error: { + code: -32603, + message: 'Session restoration failed' + }, + id: req.body?.id || null + }); + return; + } + } else { + // No restoration hook - session not found + logger.warn('Session not found and no restoration hook configured', { + sessionId + }); + res.status(400).json({ + jsonrpc: '2.0', + error: { + code: -32000, + message: 'Session not found or expired' + }, + id: req.body?.id || null + }); + return; + } + } else { + // No session ID and not initialize - invalid request + logger.warn('handleRequest: Invalid request - no session ID and not initialize', { + isInitialize + }); + res.status(400).json({ + jsonrpc: '2.0', + error: { + code: -32000, + message: 'Bad Request: No valid session ID provided and not an initialize request' + }, + id: req.body?.id || null + }); + return; } - - res.status(400).json({ - jsonrpc: '2.0', - error: { - code: -32000, - message: errorMessage - }, - id: req.body?.id || null - }); - return; } // Handle request with the transport @@ -1360,9 +1685,9 @@ export class SingleSessionHTTPServer { /** * Get current session info (for testing/debugging) */ - getSessionInfo(): { - active: boolean; - sessionId?: string; + getSessionInfo(): { + active: boolean; + sessionId?: string; age?: number; sessions?: { total: number; @@ -1373,10 +1698,10 @@ export class SingleSessionHTTPServer { }; } { const metrics = this.getSessionMetrics(); - + // Legacy SSE session info if (!this.session) { - return { + return { active: false, sessions: { total: metrics.totalSessions, @@ -1387,7 +1712,7 @@ export class SingleSessionHTTPServer { } }; } - + return { active: true, sessionId: this.session.sessionId, @@ -1401,6 +1726,213 @@ export class SingleSessionHTTPServer { } }; } + + /** + * Get all active session IDs (Phase 2 - REQ-5) + * Useful for periodic backup to database + * + * @returns Array of active session IDs + * @since 2.19.0 + * + * @example + * ```typescript + * const sessionIds = server.getActiveSessions(); + * console.log(`Active sessions: ${sessionIds.length}`); + * ``` + */ + getActiveSessions(): string[] { + return Object.keys(this.transports); + } + + /** + * Get session state for persistence (Phase 2 - REQ-5) + * Returns null if session doesn't exist + * + * @param sessionId - The session ID to retrieve state for + * @returns Session state or null if not found + * @since 2.19.0 + * + * @example + * ```typescript + * const state = server.getSessionState('session-123'); + * if (state) { + * await database.saveSession(state); + * } + * ``` + */ + getSessionState(sessionId: string): SessionState | null { + // Check if session exists + if (!this.transports[sessionId]) { + return null; + } + + const metadata = this.sessionMetadata[sessionId]; + const instanceContext = this.sessionContexts[sessionId]; + + // Defensive check - session should have metadata + if (!metadata) { + logger.warn('Session exists but missing metadata', { sessionId }); + return null; + } + + // Calculate expiration time + const expiresAt = new Date(metadata.lastAccess.getTime() + this.sessionTimeout); + + return { + sessionId, + instanceContext: instanceContext || { + n8nApiUrl: process.env.N8N_API_URL, + n8nApiKey: process.env.N8N_API_KEY, + instanceId: process.env.N8N_INSTANCE_ID + }, + createdAt: metadata.createdAt, + lastAccess: metadata.lastAccess, + expiresAt, + metadata: instanceContext?.metadata + }; + } + + /** + * Get all session states (Phase 2 - REQ-5) + * Useful for bulk backup operations + * + * @returns Array of all session states + * @since 2.19.0 + * + * @example + * ```typescript + * // Periodic backup every 5 minutes + * setInterval(async () => { + * const states = server.getAllSessionStates(); + * for (const state of states) { + * await database.upsertSession(state); + * } + * }, 300000); + * ``` + */ + getAllSessionStates(): SessionState[] { + const sessionIds = this.getActiveSessions(); + const states: SessionState[] = []; + + for (const sessionId of sessionIds) { + const state = this.getSessionState(sessionId); + if (state) { + states.push(state); + } + } + + return states; + } + + /** + * Manually restore a session (Phase 2 - REQ-5) + * Creates a session with the given ID and instance context + * Idempotent - returns true even if session already exists + * + * @param sessionId - The session ID to restore + * @param instanceContext - Instance configuration for the session + * @returns true if session was created or already exists, false on validation error + * @since 2.19.0 + * + * @example + * ```typescript + * // Restore session from database + * const restored = server.manuallyRestoreSession( + * 'session-123', + * { n8nApiUrl: '...', n8nApiKey: '...', instanceId: 'user-456' } + * ); + * console.log(`Session restored: ${restored}`); + * ``` + */ + manuallyRestoreSession(sessionId: string, instanceContext: InstanceContext): boolean { + try { + // Validate session ID format + if (!this.isValidSessionId(sessionId)) { + logger.error('Invalid session ID format in manual restoration', { sessionId }); + return false; + } + + // Validate instance context + const validation = validateInstanceContext(instanceContext); + if (!validation.valid) { + logger.error('Invalid instance context in manual restoration', { + sessionId, + errors: validation.errors + }); + return false; + } + + // Create session (idempotent - returns existing if already exists) + this.createSession(instanceContext, sessionId); + + logger.info('Session manually restored', { + sessionId, + instanceId: instanceContext.instanceId + }); + + return true; + } catch (error) { + logger.error('Failed to manually restore session', { + sessionId, + error: error instanceof Error ? error.message : String(error) + }); + return false; + } + } + + /** + * Manually delete a session (Phase 2 - REQ-5) + * Removes the session and cleans up all resources + * + * @param sessionId - The session ID to delete + * @returns true if session was deleted, false if session didn't exist + * @since 2.19.0 + * + * @example + * ```typescript + * // Delete expired sessions + * const deleted = server.manuallyDeleteSession('session-123'); + * if (deleted) { + * console.log('Session deleted successfully'); + * } + * ``` + */ + manuallyDeleteSession(sessionId: string): boolean { + // Check if session exists + if (!this.transports[sessionId]) { + logger.debug('Session not found for manual deletion', { sessionId }); + return false; + } + + // CRITICAL: Delete session data synchronously for unit tests + // Close transport asynchronously in background, but remove from maps immediately + try { + // Close transport asynchronously (non-blocking) + if (this.transports[sessionId]) { + this.transports[sessionId].close().catch(error => { + logger.warn('Error closing transport during manual deletion', { + sessionId, + error: error instanceof Error ? error.message : String(error) + }); + }); + } + + // Remove session data immediately (synchronous) + delete this.transports[sessionId]; + delete this.servers[sessionId]; + delete this.sessionMetadata[sessionId]; + delete this.sessionContexts[sessionId]; + + logger.info('Session manually deleted', { sessionId }); + return true; + } catch (error) { + logger.error('Error during manual session deletion', { + sessionId, + error: error instanceof Error ? error.message : String(error) + }); + return false; + } + } } // Start if called directly diff --git a/src/index.ts b/src/index.ts index b5c1005..7ea6504 100644 --- a/src/index.ts +++ b/src/index.ts @@ -19,6 +19,13 @@ export { isInstanceContext } from './types/instance-context'; +// Session restoration types (v2.19.0) +export type { + SessionRestoreHook, + SessionRestorationOptions, + SessionState +} from './types/session-restoration'; + // Re-export MCP SDK types for convenience export type { Tool, diff --git a/src/mcp-engine.ts b/src/mcp-engine.ts index d1a6632..bb8b71b 100644 --- a/src/mcp-engine.ts +++ b/src/mcp-engine.ts @@ -9,6 +9,7 @@ import { Request, Response } from 'express'; import { SingleSessionHTTPServer } from './http-server-single-session'; import { logger } from './utils/logger'; import { InstanceContext } from './types/instance-context'; +import { SessionRestoreHook, SessionState } from './types/session-restoration'; export interface EngineHealth { status: 'healthy' | 'unhealthy'; @@ -25,6 +26,22 @@ export interface EngineHealth { export interface EngineOptions { sessionTimeout?: number; logLevel?: 'error' | 'warn' | 'info' | 'debug'; + + /** + * Session restoration hook for multi-tenant persistence + * Called when a client tries to use an unknown session ID + * Return instance context to restore the session, or null to reject + * + * @since 2.19.0 + */ + onSessionNotFound?: SessionRestoreHook; + + /** + * Maximum time to wait for session restoration (milliseconds) + * @default 5000 (5 seconds) + * @since 2.19.0 + */ + sessionRestorationTimeout?: number; } export class N8NMCPEngine { @@ -32,9 +49,9 @@ export class N8NMCPEngine { private startTime: Date; constructor(options: EngineOptions = {}) { - this.server = new SingleSessionHTTPServer(); + this.server = new SingleSessionHTTPServer(options); this.startTime = new Date(); - + if (options.logLevel) { process.env.LOG_LEVEL = options.logLevel; } @@ -97,7 +114,7 @@ export class N8NMCPEngine { total: Math.round(memoryUsage.heapTotal / 1024 / 1024), unit: 'MB' }, - version: '2.3.2' + version: '2.19.0' }; } catch (error) { logger.error('Health check failed:', error); @@ -106,7 +123,7 @@ export class N8NMCPEngine { uptime: 0, sessionActive: false, memoryUsage: { used: 0, total: 0, unit: 'MB' }, - version: '2.3.2' + version: '2.19.0' }; } } @@ -118,10 +135,118 @@ export class N8NMCPEngine { getSessionInfo(): { active: boolean; sessionId?: string; age?: number } { return this.server.getSessionInfo(); } - + + /** + * Get all active session IDs (Phase 2 - REQ-5) + * Returns array of currently active session IDs + * + * @returns Array of session IDs + * @since 2.19.0 + * + * @example + * ```typescript + * const engine = new N8NMCPEngine(); + * const sessionIds = engine.getActiveSessions(); + * console.log(`Active sessions: ${sessionIds.length}`); + * ``` + */ + getActiveSessions(): string[] { + return this.server.getActiveSessions(); + } + + /** + * Get session state for a specific session (Phase 2 - REQ-5) + * Returns session state or null if session doesn't exist + * + * @param sessionId - The session ID to get state for + * @returns SessionState object or null + * @since 2.19.0 + * + * @example + * ```typescript + * const state = engine.getSessionState('session-123'); + * if (state) { + * // Save to database + * await db.saveSession(state); + * } + * ``` + */ + getSessionState(sessionId: string): SessionState | null { + return this.server.getSessionState(sessionId); + } + + /** + * Get all session states (Phase 2 - REQ-5) + * Returns array of all active session states for bulk backup + * + * @returns Array of SessionState objects + * @since 2.19.0 + * + * @example + * ```typescript + * // Periodic backup every 5 minutes + * setInterval(async () => { + * const states = engine.getAllSessionStates(); + * for (const state of states) { + * await database.upsertSession(state); + * } + * }, 300000); + * ``` + */ + getAllSessionStates(): SessionState[] { + return this.server.getAllSessionStates(); + } + + /** + * Manually restore a session (Phase 2 - REQ-5) + * Creates a session with the given ID and instance context + * + * @param sessionId - The session ID to restore + * @param instanceContext - Instance configuration + * @returns true if session was restored successfully, false otherwise + * @since 2.19.0 + * + * @example + * ```typescript + * // Restore session from database + * const session = await db.loadSession('session-123'); + * if (session) { + * const restored = engine.restoreSession( + * session.sessionId, + * session.instanceContext + * ); + * console.log(`Restored: ${restored}`); + * } + * ``` + */ + restoreSession(sessionId: string, instanceContext: InstanceContext): boolean { + return this.server.manuallyRestoreSession(sessionId, instanceContext); + } + + /** + * Manually delete a session (Phase 2 - REQ-5) + * Removes the session and cleans up resources + * + * @param sessionId - The session ID to delete + * @returns true if session was deleted, false if not found + * @since 2.19.0 + * + * @example + * ```typescript + * // Delete expired session + * const deleted = engine.deleteSession('session-123'); + * if (deleted) { + * await db.deleteSession('session-123'); + * } + * ``` + */ + deleteSession(sessionId: string): boolean { + return this.server.manuallyDeleteSession(sessionId); + } + /** * Graceful shutdown for service lifecycle - * + * * @example * process.on('SIGTERM', async () => { * await engine.shutdown(); diff --git a/src/types/session-restoration.ts b/src/types/session-restoration.ts new file mode 100644 index 0000000..332d2a9 --- /dev/null +++ b/src/types/session-restoration.ts @@ -0,0 +1,111 @@ +/** + * Session Restoration Types + * + * Defines types for session persistence and restoration functionality. + * Enables multi-tenant backends to restore sessions after container restarts. + * + * @since 2.19.0 + */ + +import { InstanceContext } from './instance-context'; + +/** + * Session restoration hook callback + * + * Called when a client tries to use an unknown session ID. + * The backend can load session state from external storage (database, Redis, etc.) + * and return the instance context to recreate the session. + * + * @param sessionId - The session ID that was not found in memory + * @returns Instance context to restore the session, or null if session should not be restored + * + * @example + * ```typescript + * const engine = new N8NMCPEngine({ + * onSessionNotFound: async (sessionId) => { + * // Load from database + * const session = await db.loadSession(sessionId); + * if (!session || session.expired) return null; + * return session.instanceContext; + * } + * }); + * ``` + */ +export type SessionRestoreHook = (sessionId: string) => Promise; + +/** + * Session restoration configuration options + * + * @since 2.19.0 + */ +export interface SessionRestorationOptions { + /** + * Session timeout in milliseconds + * After this period of inactivity, sessions are expired and cleaned up + * @default 1800000 (30 minutes) + */ + sessionTimeout?: number; + + /** + * Maximum time to wait for session restoration hook to complete + * If the hook takes longer than this, the request will fail with 408 Request Timeout + * @default 5000 (5 seconds) + */ + sessionRestorationTimeout?: number; + + /** + * Hook called when a client tries to use an unknown session ID + * Return instance context to restore the session, or null to reject + * + * @param sessionId - The session ID that was not found + * @returns Instance context for restoration, or null + * + * Error handling: + * - Hook throws exception โ†’ 500 Internal Server Error + * - Hook times out โ†’ 408 Request Timeout + * - Hook returns null โ†’ 400 Bad Request (session not found) + * - Hook returns invalid context โ†’ 400 Bad Request (invalid context) + */ + onSessionNotFound?: SessionRestoreHook; +} + +/** + * Session state for persistence + * Contains all information needed to restore a session after restart + * + * @since 2.19.0 + */ +export interface SessionState { + /** + * Unique session identifier + */ + sessionId: string; + + /** + * Instance-specific configuration + * Contains n8n API credentials and instance ID + */ + instanceContext: InstanceContext; + + /** + * When the session was created + */ + createdAt: Date; + + /** + * Last time the session was accessed + * Used for TTL-based expiration + */ + lastAccess: Date; + + /** + * When the session will expire + * Calculated from lastAccess + sessionTimeout + */ + expiresAt: Date; + + /** + * Optional metadata for application-specific use + */ + metadata?: Record; +} diff --git a/supabase-telemetry-aggregation.sql b/supabase-telemetry-aggregation.sql new file mode 100644 index 0000000..32237d9 --- /dev/null +++ b/supabase-telemetry-aggregation.sql @@ -0,0 +1,752 @@ +-- ============================================================================ +-- N8N-MCP Telemetry Aggregation & Automated Pruning System +-- ============================================================================ +-- Purpose: Create aggregation tables and automated cleanup to maintain +-- database under 500MB free tier limit while preserving insights +-- +-- Strategy: Aggregate โ†’ Delete โ†’ Retain only recent raw events +-- Expected savings: ~120 MB (from 265 MB โ†’ ~145 MB steady state) +-- ============================================================================ + +-- ============================================================================ +-- PART 1: AGGREGATION TABLES +-- ============================================================================ + +-- Daily tool usage summary (replaces 96 MB of tool_sequence raw data) +CREATE TABLE IF NOT EXISTS telemetry_tool_usage_daily ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + aggregation_date DATE NOT NULL, + user_id TEXT NOT NULL, + tool_name TEXT NOT NULL, + usage_count INTEGER NOT NULL DEFAULT 0, + success_count INTEGER NOT NULL DEFAULT 0, + error_count INTEGER NOT NULL DEFAULT 0, + avg_execution_time_ms NUMERIC, + total_execution_time_ms BIGINT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(aggregation_date, user_id, tool_name) +); + +CREATE INDEX idx_tool_usage_daily_date ON telemetry_tool_usage_daily(aggregation_date DESC); +CREATE INDEX idx_tool_usage_daily_tool ON telemetry_tool_usage_daily(tool_name); +CREATE INDEX idx_tool_usage_daily_user ON telemetry_tool_usage_daily(user_id); + +COMMENT ON TABLE telemetry_tool_usage_daily IS 'Daily aggregation of tool usage replacing raw tool_used and tool_sequence events. Saves ~95% storage.'; + +-- Tool sequence patterns (replaces individual sequences with pattern analysis) +CREATE TABLE IF NOT EXISTS telemetry_tool_patterns ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + aggregation_date DATE NOT NULL, + tool_sequence TEXT[] NOT NULL, -- Array of tool names in order + sequence_hash TEXT NOT NULL, -- Hash of the sequence for grouping + occurrence_count INTEGER NOT NULL DEFAULT 1, + avg_sequence_duration_ms NUMERIC, + success_rate NUMERIC, -- 0.0 to 1.0 + common_errors JSONB, -- {"error_type": count} + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(aggregation_date, sequence_hash) +); + +CREATE INDEX idx_tool_patterns_date ON telemetry_tool_patterns(aggregation_date DESC); +CREATE INDEX idx_tool_patterns_hash ON telemetry_tool_patterns(sequence_hash); + +COMMENT ON TABLE telemetry_tool_patterns IS 'Common tool usage patterns aggregated daily. Identifies workflows and AI behavior patterns.'; + +-- Workflow insights (aggregates workflow_created events) +CREATE TABLE IF NOT EXISTS telemetry_workflow_insights ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + aggregation_date DATE NOT NULL, + complexity TEXT, -- simple/medium/complex + node_count_range TEXT, -- 1-5, 6-10, 11-20, 21+ + has_trigger BOOLEAN, + has_webhook BOOLEAN, + common_node_types TEXT[], -- Top node types used + workflow_count INTEGER NOT NULL DEFAULT 0, + avg_node_count NUMERIC, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(aggregation_date, complexity, node_count_range, has_trigger, has_webhook) +); + +CREATE INDEX idx_workflow_insights_date ON telemetry_workflow_insights(aggregation_date DESC); +CREATE INDEX idx_workflow_insights_complexity ON telemetry_workflow_insights(complexity); + +COMMENT ON TABLE telemetry_workflow_insights IS 'Daily workflow creation patterns. Shows adoption trends without storing duplicate workflows.'; + +-- Error patterns (keeps error intelligence, deletes raw error events) +CREATE TABLE IF NOT EXISTS telemetry_error_patterns ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + aggregation_date DATE NOT NULL, + error_type TEXT NOT NULL, + error_context TEXT, -- e.g., 'validation', 'workflow_execution', 'node_operation' + occurrence_count INTEGER NOT NULL DEFAULT 1, + affected_users INTEGER NOT NULL DEFAULT 0, + first_seen TIMESTAMPTZ, + last_seen TIMESTAMPTZ, + sample_error_message TEXT, -- Keep one representative message + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(aggregation_date, error_type, error_context) +); + +CREATE INDEX idx_error_patterns_date ON telemetry_error_patterns(aggregation_date DESC); +CREATE INDEX idx_error_patterns_type ON telemetry_error_patterns(error_type); + +COMMENT ON TABLE telemetry_error_patterns IS 'Error patterns over time. Preserves debugging insights while pruning raw error events.'; + +-- Validation insights (aggregates validation_details) +CREATE TABLE IF NOT EXISTS telemetry_validation_insights ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + aggregation_date DATE NOT NULL, + validation_type TEXT, -- 'node', 'workflow', 'expression' + profile TEXT, -- 'minimal', 'runtime', 'ai-friendly', 'strict' + success_count INTEGER NOT NULL DEFAULT 0, + failure_count INTEGER NOT NULL DEFAULT 0, + common_failure_reasons JSONB, -- {"reason": count} + avg_validation_time_ms NUMERIC, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE(aggregation_date, validation_type, profile) +); + +CREATE INDEX idx_validation_insights_date ON telemetry_validation_insights(aggregation_date DESC); +CREATE INDEX idx_validation_insights_type ON telemetry_validation_insights(validation_type); + +COMMENT ON TABLE telemetry_validation_insights IS 'Validation success/failure patterns. Shows where users struggle without storing every validation event.'; + +-- ============================================================================ +-- PART 2: AGGREGATION FUNCTIONS +-- ============================================================================ + +-- Function to aggregate tool usage data +CREATE OR REPLACE FUNCTION aggregate_tool_usage(cutoff_date TIMESTAMPTZ) +RETURNS INTEGER AS $$ +DECLARE + rows_aggregated INTEGER; +BEGIN + -- Aggregate tool_used events + INSERT INTO telemetry_tool_usage_daily ( + aggregation_date, + user_id, + tool_name, + usage_count, + success_count, + error_count, + avg_execution_time_ms, + total_execution_time_ms + ) + SELECT + DATE(created_at) as aggregation_date, + user_id, + properties->>'toolName' as tool_name, + COUNT(*) as usage_count, + COUNT(*) FILTER (WHERE (properties->>'success')::boolean = true) as success_count, + COUNT(*) FILTER (WHERE (properties->>'success')::boolean = false OR properties->>'error' IS NOT NULL) as error_count, + AVG((properties->>'executionTime')::numeric) as avg_execution_time_ms, + SUM((properties->>'executionTime')::numeric) as total_execution_time_ms + FROM telemetry_events + WHERE event = 'tool_used' + AND created_at < cutoff_date + AND properties->>'toolName' IS NOT NULL + GROUP BY DATE(created_at), user_id, properties->>'toolName' + ON CONFLICT (aggregation_date, user_id, tool_name) + DO UPDATE SET + usage_count = telemetry_tool_usage_daily.usage_count + EXCLUDED.usage_count, + success_count = telemetry_tool_usage_daily.success_count + EXCLUDED.success_count, + error_count = telemetry_tool_usage_daily.error_count + EXCLUDED.error_count, + total_execution_time_ms = telemetry_tool_usage_daily.total_execution_time_ms + EXCLUDED.total_execution_time_ms, + avg_execution_time_ms = (telemetry_tool_usage_daily.total_execution_time_ms + EXCLUDED.total_execution_time_ms) / + (telemetry_tool_usage_daily.usage_count + EXCLUDED.usage_count), + updated_at = NOW(); + + GET DIAGNOSTICS rows_aggregated = ROW_COUNT; + + RAISE NOTICE 'Aggregated % rows from tool_used events', rows_aggregated; + RETURN rows_aggregated; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aggregate_tool_usage IS 'Aggregates tool_used events into daily summaries before deletion'; + +-- Function to aggregate tool sequence patterns +CREATE OR REPLACE FUNCTION aggregate_tool_patterns(cutoff_date TIMESTAMPTZ) +RETURNS INTEGER AS $$ +DECLARE + rows_aggregated INTEGER; +BEGIN + INSERT INTO telemetry_tool_patterns ( + aggregation_date, + tool_sequence, + sequence_hash, + occurrence_count, + avg_sequence_duration_ms, + success_rate + ) + SELECT + DATE(created_at) as aggregation_date, + (properties->>'toolSequence')::text[] as tool_sequence, + md5(array_to_string((properties->>'toolSequence')::text[], ',')) as sequence_hash, + COUNT(*) as occurrence_count, + AVG((properties->>'duration')::numeric) as avg_sequence_duration_ms, + AVG(CASE WHEN (properties->>'success')::boolean THEN 1.0 ELSE 0.0 END) as success_rate + FROM telemetry_events + WHERE event = 'tool_sequence' + AND created_at < cutoff_date + AND properties->>'toolSequence' IS NOT NULL + GROUP BY DATE(created_at), (properties->>'toolSequence')::text[] + ON CONFLICT (aggregation_date, sequence_hash) + DO UPDATE SET + occurrence_count = telemetry_tool_patterns.occurrence_count + EXCLUDED.occurrence_count, + avg_sequence_duration_ms = ( + (telemetry_tool_patterns.avg_sequence_duration_ms * telemetry_tool_patterns.occurrence_count + + EXCLUDED.avg_sequence_duration_ms * EXCLUDED.occurrence_count) / + (telemetry_tool_patterns.occurrence_count + EXCLUDED.occurrence_count) + ), + success_rate = ( + (telemetry_tool_patterns.success_rate * telemetry_tool_patterns.occurrence_count + + EXCLUDED.success_rate * EXCLUDED.occurrence_count) / + (telemetry_tool_patterns.occurrence_count + EXCLUDED.occurrence_count) + ), + updated_at = NOW(); + + GET DIAGNOSTICS rows_aggregated = ROW_COUNT; + + RAISE NOTICE 'Aggregated % rows from tool_sequence events', rows_aggregated; + RETURN rows_aggregated; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aggregate_tool_patterns IS 'Aggregates tool_sequence events into pattern analysis before deletion'; + +-- Function to aggregate workflow insights +CREATE OR REPLACE FUNCTION aggregate_workflow_insights(cutoff_date TIMESTAMPTZ) +RETURNS INTEGER AS $$ +DECLARE + rows_aggregated INTEGER; +BEGIN + INSERT INTO telemetry_workflow_insights ( + aggregation_date, + complexity, + node_count_range, + has_trigger, + has_webhook, + common_node_types, + workflow_count, + avg_node_count + ) + SELECT + DATE(created_at) as aggregation_date, + properties->>'complexity' as complexity, + CASE + WHEN (properties->>'nodeCount')::int BETWEEN 1 AND 5 THEN '1-5' + WHEN (properties->>'nodeCount')::int BETWEEN 6 AND 10 THEN '6-10' + WHEN (properties->>'nodeCount')::int BETWEEN 11 AND 20 THEN '11-20' + ELSE '21+' + END as node_count_range, + (properties->>'hasTrigger')::boolean as has_trigger, + (properties->>'hasWebhook')::boolean as has_webhook, + ARRAY[]::text[] as common_node_types, -- Will be populated separately if needed + COUNT(*) as workflow_count, + AVG((properties->>'nodeCount')::numeric) as avg_node_count + FROM telemetry_events + WHERE event = 'workflow_created' + AND created_at < cutoff_date + GROUP BY + DATE(created_at), + properties->>'complexity', + node_count_range, + (properties->>'hasTrigger')::boolean, + (properties->>'hasWebhook')::boolean + ON CONFLICT (aggregation_date, complexity, node_count_range, has_trigger, has_webhook) + DO UPDATE SET + workflow_count = telemetry_workflow_insights.workflow_count + EXCLUDED.workflow_count, + avg_node_count = ( + (telemetry_workflow_insights.avg_node_count * telemetry_workflow_insights.workflow_count + + EXCLUDED.avg_node_count * EXCLUDED.workflow_count) / + (telemetry_workflow_insights.workflow_count + EXCLUDED.workflow_count) + ), + updated_at = NOW(); + + GET DIAGNOSTICS rows_aggregated = ROW_COUNT; + + RAISE NOTICE 'Aggregated % rows from workflow_created events', rows_aggregated; + RETURN rows_aggregated; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aggregate_workflow_insights IS 'Aggregates workflow_created events into pattern insights before deletion'; + +-- Function to aggregate error patterns +CREATE OR REPLACE FUNCTION aggregate_error_patterns(cutoff_date TIMESTAMPTZ) +RETURNS INTEGER AS $$ +DECLARE + rows_aggregated INTEGER; +BEGIN + INSERT INTO telemetry_error_patterns ( + aggregation_date, + error_type, + error_context, + occurrence_count, + affected_users, + first_seen, + last_seen, + sample_error_message + ) + SELECT + DATE(created_at) as aggregation_date, + properties->>'errorType' as error_type, + properties->>'context' as error_context, + COUNT(*) as occurrence_count, + COUNT(DISTINCT user_id) as affected_users, + MIN(created_at) as first_seen, + MAX(created_at) as last_seen, + (ARRAY_AGG(properties->>'message' ORDER BY created_at DESC))[1] as sample_error_message + FROM telemetry_events + WHERE event = 'error_occurred' + AND created_at < cutoff_date + GROUP BY DATE(created_at), properties->>'errorType', properties->>'context' + ON CONFLICT (aggregation_date, error_type, error_context) + DO UPDATE SET + occurrence_count = telemetry_error_patterns.occurrence_count + EXCLUDED.occurrence_count, + affected_users = GREATEST(telemetry_error_patterns.affected_users, EXCLUDED.affected_users), + first_seen = LEAST(telemetry_error_patterns.first_seen, EXCLUDED.first_seen), + last_seen = GREATEST(telemetry_error_patterns.last_seen, EXCLUDED.last_seen), + updated_at = NOW(); + + GET DIAGNOSTICS rows_aggregated = ROW_COUNT; + + RAISE NOTICE 'Aggregated % rows from error_occurred events', rows_aggregated; + RETURN rows_aggregated; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aggregate_error_patterns IS 'Aggregates error_occurred events into pattern analysis before deletion'; + +-- Function to aggregate validation insights +CREATE OR REPLACE FUNCTION aggregate_validation_insights(cutoff_date TIMESTAMPTZ) +RETURNS INTEGER AS $$ +DECLARE + rows_aggregated INTEGER; +BEGIN + INSERT INTO telemetry_validation_insights ( + aggregation_date, + validation_type, + profile, + success_count, + failure_count, + common_failure_reasons, + avg_validation_time_ms + ) + SELECT + DATE(created_at) as aggregation_date, + properties->>'validationType' as validation_type, + properties->>'profile' as profile, + COUNT(*) FILTER (WHERE (properties->>'success')::boolean = true) as success_count, + COUNT(*) FILTER (WHERE (properties->>'success')::boolean = false) as failure_count, + jsonb_object_agg( + COALESCE(properties->>'failureReason', 'unknown'), + COUNT(*) + ) FILTER (WHERE (properties->>'success')::boolean = false) as common_failure_reasons, + AVG((properties->>'validationTime')::numeric) as avg_validation_time_ms + FROM telemetry_events + WHERE event = 'validation_details' + AND created_at < cutoff_date + GROUP BY DATE(created_at), properties->>'validationType', properties->>'profile' + ON CONFLICT (aggregation_date, validation_type, profile) + DO UPDATE SET + success_count = telemetry_validation_insights.success_count + EXCLUDED.success_count, + failure_count = telemetry_validation_insights.failure_count + EXCLUDED.failure_count, + updated_at = NOW(); + + GET DIAGNOSTICS rows_aggregated = ROW_COUNT; + + RAISE NOTICE 'Aggregated % rows from validation_details events', rows_aggregated; + RETURN rows_aggregated; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION aggregate_validation_insights IS 'Aggregates validation_details events into insights before deletion'; + +-- ============================================================================ +-- PART 3: MASTER AGGREGATION & CLEANUP FUNCTION +-- ============================================================================ + +CREATE OR REPLACE FUNCTION run_telemetry_aggregation_and_cleanup( + retention_days INTEGER DEFAULT 3 +) +RETURNS TABLE( + event_type TEXT, + rows_aggregated INTEGER, + rows_deleted INTEGER, + space_freed_mb NUMERIC +) AS $$ +DECLARE + cutoff_date TIMESTAMPTZ; + total_before BIGINT; + total_after BIGINT; + agg_count INTEGER; + del_count INTEGER; +BEGIN + cutoff_date := NOW() - (retention_days || ' days')::INTERVAL; + + RAISE NOTICE 'Starting aggregation and cleanup for data older than %', cutoff_date; + + -- Get table size before cleanup + SELECT pg_total_relation_size('telemetry_events') INTO total_before; + + -- ======================================================================== + -- STEP 1: AGGREGATE DATA BEFORE DELETION + -- ======================================================================== + + -- Tool usage aggregation + SELECT aggregate_tool_usage(cutoff_date) INTO agg_count; + SELECT COUNT(*) INTO del_count FROM telemetry_events + WHERE event = 'tool_used' AND created_at < cutoff_date; + + event_type := 'tool_used'; + rows_aggregated := agg_count; + rows_deleted := del_count; + RETURN NEXT; + + -- Tool patterns aggregation + SELECT aggregate_tool_patterns(cutoff_date) INTO agg_count; + SELECT COUNT(*) INTO del_count FROM telemetry_events + WHERE event = 'tool_sequence' AND created_at < cutoff_date; + + event_type := 'tool_sequence'; + rows_aggregated := agg_count; + rows_deleted := del_count; + RETURN NEXT; + + -- Workflow insights aggregation + SELECT aggregate_workflow_insights(cutoff_date) INTO agg_count; + SELECT COUNT(*) INTO del_count FROM telemetry_events + WHERE event = 'workflow_created' AND created_at < cutoff_date; + + event_type := 'workflow_created'; + rows_aggregated := agg_count; + rows_deleted := del_count; + RETURN NEXT; + + -- Error patterns aggregation + SELECT aggregate_error_patterns(cutoff_date) INTO agg_count; + SELECT COUNT(*) INTO del_count FROM telemetry_events + WHERE event = 'error_occurred' AND created_at < cutoff_date; + + event_type := 'error_occurred'; + rows_aggregated := agg_count; + rows_deleted := del_count; + RETURN NEXT; + + -- Validation insights aggregation + SELECT aggregate_validation_insights(cutoff_date) INTO agg_count; + SELECT COUNT(*) INTO del_count FROM telemetry_events + WHERE event = 'validation_details' AND created_at < cutoff_date; + + event_type := 'validation_details'; + rows_aggregated := agg_count; + rows_deleted := del_count; + RETURN NEXT; + + -- ======================================================================== + -- STEP 2: DELETE OLD RAW EVENTS (now that they're aggregated) + -- ======================================================================== + + DELETE FROM telemetry_events + WHERE created_at < cutoff_date + AND event IN ( + 'tool_used', + 'tool_sequence', + 'workflow_created', + 'validation_details', + 'session_start', + 'search_query', + 'diagnostic_completed', + 'health_check_completed' + ); + + -- Keep error_occurred for 30 days (extended retention for debugging) + DELETE FROM telemetry_events + WHERE created_at < (NOW() - INTERVAL '30 days') + AND event = 'error_occurred'; + + -- ======================================================================== + -- STEP 3: CLEAN UP OLD WORKFLOWS (keep only unique patterns) + -- ======================================================================== + + -- Delete duplicate workflows older than retention period + WITH workflow_duplicates AS ( + SELECT id + FROM ( + SELECT id, + ROW_NUMBER() OVER ( + PARTITION BY workflow_hash + ORDER BY created_at DESC + ) as rn + FROM telemetry_workflows + WHERE created_at < cutoff_date + ) sub + WHERE rn > 1 + ) + DELETE FROM telemetry_workflows + WHERE id IN (SELECT id FROM workflow_duplicates); + + GET DIAGNOSTICS del_count = ROW_COUNT; + + event_type := 'duplicate_workflows'; + rows_aggregated := 0; + rows_deleted := del_count; + RETURN NEXT; + + -- ======================================================================== + -- STEP 4: VACUUM TO RECLAIM SPACE + -- ======================================================================== + + -- Note: VACUUM cannot be run inside a function, must be run separately + -- The cron job will handle this + + -- Get table size after cleanup + SELECT pg_total_relation_size('telemetry_events') INTO total_after; + + -- Summary row + event_type := 'TOTAL_SPACE_FREED'; + rows_aggregated := 0; + rows_deleted := 0; + space_freed_mb := ROUND((total_before - total_after)::NUMERIC / 1024 / 1024, 2); + RETURN NEXT; + + RAISE NOTICE 'Cleanup complete. Space freed: % MB', space_freed_mb; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION run_telemetry_aggregation_and_cleanup IS 'Master function to aggregate data and delete old events. Run daily via cron.'; + +-- ============================================================================ +-- PART 4: SUPABASE CRON JOB SETUP +-- ============================================================================ + +-- Enable pg_cron extension (if not already enabled) +CREATE EXTENSION IF NOT EXISTS pg_cron; + +-- Schedule daily cleanup at 2 AM UTC (low traffic time) +-- This will aggregate data older than 3 days and then delete it +SELECT cron.schedule( + 'telemetry-daily-cleanup', + '0 2 * * *', -- Every day at 2 AM UTC + $$ + SELECT run_telemetry_aggregation_and_cleanup(3); + VACUUM ANALYZE telemetry_events; + VACUUM ANALYZE telemetry_workflows; + $$ +); + +COMMENT ON EXTENSION pg_cron IS 'Cron job scheduler for automated telemetry cleanup'; + +-- ============================================================================ +-- PART 5: MONITORING & ALERTING +-- ============================================================================ + +-- Function to check database size and alert if approaching limit +CREATE OR REPLACE FUNCTION check_database_size() +RETURNS TABLE( + total_size_mb NUMERIC, + events_size_mb NUMERIC, + workflows_size_mb NUMERIC, + aggregates_size_mb NUMERIC, + percent_of_limit NUMERIC, + days_until_full NUMERIC, + status TEXT +) AS $$ +DECLARE + db_size BIGINT; + events_size BIGINT; + workflows_size BIGINT; + agg_size BIGINT; + limit_mb CONSTANT NUMERIC := 500; -- Free tier limit + growth_rate_mb_per_day NUMERIC; +BEGIN + -- Get current sizes + SELECT pg_database_size(current_database()) INTO db_size; + SELECT pg_total_relation_size('telemetry_events') INTO events_size; + SELECT pg_total_relation_size('telemetry_workflows') INTO workflows_size; + + SELECT COALESCE( + pg_total_relation_size('telemetry_tool_usage_daily') + + pg_total_relation_size('telemetry_tool_patterns') + + pg_total_relation_size('telemetry_workflow_insights') + + pg_total_relation_size('telemetry_error_patterns') + + pg_total_relation_size('telemetry_validation_insights'), + 0 + ) INTO agg_size; + + total_size_mb := ROUND(db_size::NUMERIC / 1024 / 1024, 2); + events_size_mb := ROUND(events_size::NUMERIC / 1024 / 1024, 2); + workflows_size_mb := ROUND(workflows_size::NUMERIC / 1024 / 1024, 2); + aggregates_size_mb := ROUND(agg_size::NUMERIC / 1024 / 1024, 2); + percent_of_limit := ROUND((total_size_mb / limit_mb) * 100, 1); + + -- Estimate growth rate (simple 7-day average) + SELECT ROUND( + (SELECT COUNT(*) FROM telemetry_events WHERE created_at > NOW() - INTERVAL '7 days')::NUMERIC + * (pg_column_size(telemetry_events.*))::NUMERIC + / 7 / 1024 / 1024, 2 + ) INTO growth_rate_mb_per_day + FROM telemetry_events LIMIT 1; + + IF growth_rate_mb_per_day > 0 THEN + days_until_full := ROUND((limit_mb - total_size_mb) / growth_rate_mb_per_day, 0); + ELSE + days_until_full := NULL; + END IF; + + -- Determine status + IF percent_of_limit >= 90 THEN + status := 'CRITICAL - Immediate action required'; + ELSIF percent_of_limit >= 75 THEN + status := 'WARNING - Monitor closely'; + ELSIF percent_of_limit >= 50 THEN + status := 'CAUTION - Plan optimization'; + ELSE + status := 'HEALTHY'; + END IF; + + RETURN NEXT; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION check_database_size IS 'Monitor database size and growth. Run daily or on-demand.'; + +-- ============================================================================ +-- PART 6: EMERGENCY CLEANUP (ONE-TIME USE) +-- ============================================================================ + +-- Emergency function to immediately free up space (use if critical) +CREATE OR REPLACE FUNCTION emergency_cleanup() +RETURNS TABLE( + action TEXT, + rows_deleted INTEGER, + space_freed_mb NUMERIC +) AS $$ +DECLARE + size_before BIGINT; + size_after BIGINT; + del_count INTEGER; +BEGIN + SELECT pg_total_relation_size('telemetry_events') INTO size_before; + + -- Aggregate everything older than 7 days + PERFORM run_telemetry_aggregation_and_cleanup(7); + + -- Delete all non-critical events older than 7 days + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '7 days' + AND event NOT IN ('error_occurred', 'workflow_validation_failed'); + + GET DIAGNOSTICS del_count = ROW_COUNT; + + action := 'Deleted non-critical events > 7 days'; + rows_deleted := del_count; + RETURN NEXT; + + -- Delete error events older than 14 days + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '14 days' + AND event = 'error_occurred'; + + GET DIAGNOSTICS del_count = ROW_COUNT; + + action := 'Deleted error events > 14 days'; + rows_deleted := del_count; + RETURN NEXT; + + -- Delete duplicate workflows + WITH workflow_duplicates AS ( + SELECT id + FROM ( + SELECT id, + ROW_NUMBER() OVER ( + PARTITION BY workflow_hash + ORDER BY created_at DESC + ) as rn + FROM telemetry_workflows + ) sub + WHERE rn > 1 + ) + DELETE FROM telemetry_workflows + WHERE id IN (SELECT id FROM workflow_duplicates); + + GET DIAGNOSTICS del_count = ROW_COUNT; + + action := 'Deleted duplicate workflows'; + rows_deleted := del_count; + RETURN NEXT; + + -- VACUUM will be run separately + SELECT pg_total_relation_size('telemetry_events') INTO size_after; + + action := 'TOTAL (run VACUUM separately)'; + rows_deleted := 0; + space_freed_mb := ROUND((size_before - size_after)::NUMERIC / 1024 / 1024, 2); + RETURN NEXT; + + RAISE NOTICE 'Emergency cleanup complete. Run VACUUM FULL for maximum space recovery.'; +END; +$$ LANGUAGE plpgsql; + +COMMENT ON FUNCTION emergency_cleanup IS 'Emergency cleanup when database is near capacity. Run once, then VACUUM.'; + +-- ============================================================================ +-- USAGE INSTRUCTIONS +-- ============================================================================ + +/* + +SETUP (Run once): + 1. Execute this entire script in Supabase SQL Editor + 2. Verify cron job is scheduled: + SELECT * FROM cron.job; + 3. Run initial monitoring: + SELECT * FROM check_database_size(); + +DAILY OPERATIONS (Automatic): + - Cron job runs daily at 2 AM UTC + - Aggregates data older than 3 days + - Deletes raw events after aggregation + - Vacuums tables to reclaim space + +MONITORING: + -- Check current database health + SELECT * FROM check_database_size(); + + -- View aggregated insights + SELECT * FROM telemetry_tool_usage_daily ORDER BY aggregation_date DESC LIMIT 100; + SELECT * FROM telemetry_tool_patterns ORDER BY occurrence_count DESC LIMIT 20; + SELECT * FROM telemetry_error_patterns ORDER BY occurrence_count DESC LIMIT 20; + +MANUAL CLEANUP (if needed): + -- Run cleanup manually (3-day retention) + SELECT * FROM run_telemetry_aggregation_and_cleanup(3); + VACUUM ANALYZE telemetry_events; + + -- Emergency cleanup (7-day retention) + SELECT * FROM emergency_cleanup(); + VACUUM FULL telemetry_events; + VACUUM FULL telemetry_workflows; + +TUNING: + -- Adjust retention period (e.g., 5 days instead of 3) + SELECT cron.schedule( + 'telemetry-daily-cleanup', + '0 2 * * *', + $$ SELECT run_telemetry_aggregation_and_cleanup(5); VACUUM ANALYZE telemetry_events; $$ + ); + +EXPECTED RESULTS: + - Initial run: ~120 MB space freed (265 MB โ†’ ~145 MB) + - Steady state: ~90-120 MB total database size + - Growth rate: ~2-3 MB/day (down from 7.7 MB/day) + - Headroom: 70-80% of free tier limit available + +*/ diff --git a/telemetry-pruning-analysis.md b/telemetry-pruning-analysis.md new file mode 100644 index 0000000..de093f4 --- /dev/null +++ b/telemetry-pruning-analysis.md @@ -0,0 +1,961 @@ +# n8n-MCP Telemetry Database Pruning Strategy + +**Analysis Date:** 2025-10-10 +**Current Database Size:** 265 MB (telemetry_events: 199 MB, telemetry_workflows: 66 MB) +**Free Tier Limit:** 500 MB +**Projected 4-Week Size:** 609 MB (exceeds limit by 109 MB) + +--- + +## Executive Summary + +**Critical Finding:** At current growth rate (56.75% of data from last 7 days), we will exceed the 500 MB free tier limit in approximately 2 weeks. Implementing a 7-day retention policy can immediately save 36.5 MB (37.6%) and prevent database overflow. + +**Key Insights:** +- 641,487 event records consuming 199 MB +- 17,247 workflow records consuming 66 MB +- Daily growth rate: ~7-8 MB/day for events +- 43.25% of data is older than 7 days but provides diminishing value + +**Immediate Action Required:** Implement automated pruning to maintain database under 500 MB. + +--- + +## 1. Current State Assessment + +### Database Size and Distribution + +| Table | Rows | Current Size | Growth Rate | Bytes/Row | +|-------|------|--------------|-------------|-----------| +| telemetry_events | 641,487 | 199 MB | 56.66% from last 7d | 325 | +| telemetry_workflows | 17,247 | 66 MB | 60.09% from last 7d | 4,013 | +| **TOTAL** | **658,734** | **265 MB** | **56.75% from last 7d** | **403** | + +### Event Type Distribution + +| Event Type | Count | % of Total | Storage | Avg Props Size | Oldest Event | +|------------|-------|-----------|---------|----------------|--------------| +| tool_sequence | 362,170 | 56.4% | 67 MB | 194 bytes | 2025-09-26 | +| tool_used | 191,659 | 29.9% | 14 MB | 77 bytes | 2025-09-26 | +| validation_details | 36,266 | 5.7% | 11 MB | 329 bytes | 2025-09-26 | +| workflow_created | 23,151 | 3.6% | 2.6 MB | 115 bytes | 2025-09-26 | +| session_start | 12,575 | 2.0% | 1.2 MB | 101 bytes | 2025-09-26 | +| workflow_validation_failed | 9,739 | 1.5% | 314 KB | 33 bytes | 2025-09-26 | +| error_occurred | 4,935 | 0.8% | 626 KB | 130 bytes | 2025-09-26 | +| search_query | 974 | 0.2% | 106 KB | 112 bytes | 2025-09-26 | +| Other | 18 | <0.1% | 5 KB | Various | Recent | + +### Growth Pattern Analysis + +**Daily Data Accumulation (Last 15 Days):** + +| Date | Events/Day | Daily Size | Cumulative Size | +|------|-----------|------------|-----------------| +| 2025-10-10 | 28,457 | 4.3 MB | 97 MB | +| 2025-10-09 | 54,717 | 8.2 MB | 93 MB | +| 2025-10-08 | 52,901 | 7.9 MB | 85 MB | +| 2025-10-07 | 52,538 | 8.1 MB | 77 MB | +| 2025-10-06 | 51,401 | 7.8 MB | 69 MB | +| 2025-10-05 | 50,528 | 7.9 MB | 61 MB | + +**Average Daily Growth:** ~7.7 MB/day +**Weekly Growth:** ~54 MB/week +**Projected to hit 500 MB limit:** ~17 days (late October 2025) + +### Workflow Data Distribution + +| Complexity | Count | % | Avg Nodes | Avg JSON Size | Estimated Size | +|-----------|-------|---|-----------|---------------|----------------| +| Simple | 12,923 | 77.6% | 5.48 | 2,122 bytes | 20 MB | +| Medium | 3,708 | 22.3% | 13.93 | 4,458 bytes | 12 MB | +| Complex | 616 | 0.1% | 26.62 | 7,909 bytes | 3.2 MB | + +**Key Finding:** No duplicate workflow hashes found - each workflow is unique (good data quality). + +--- + +## 2. Data Value Classification + +### TIER 1: Critical - Keep Indefinitely + +**Error Patterns (error_occurred)** +- **Why:** Essential for identifying systemic issues and regression detection +- **Volume:** 4,935 events (626 KB) +- **Recommendation:** Keep all errors with aggregated summaries for older data +- **Retention:** Detailed errors 30 days, aggregated stats indefinitely + +**Tool Usage Statistics (Aggregated)** +- **Why:** Product analytics and feature prioritization +- **Recommendation:** Aggregate daily/weekly summaries after 14 days +- **Keep:** Summary tables with tool usage counts, success rates, avg duration + +### TIER 2: High Value - Keep 30 Days + +**Validation Details (validation_details)** +- **Current:** 36,266 events, 11 MB, avg 329 bytes +- **Why:** Important for understanding validation issues during current development cycle +- **Value Period:** 30 days (covers current version development) +- **After 30d:** Aggregate to summary stats (validation success rate by node type) + +**Workflow Creation Patterns (workflow_created)** +- **Current:** 23,151 events, 2.6 MB +- **Why:** Track feature adoption and workflow patterns +- **Value Period:** 30 days for detailed analysis +- **After 30d:** Keep aggregated metrics only + +### TIER 3: Medium Value - Keep 14 Days + +**Session Data (session_start)** +- **Current:** 12,575 events, 1.2 MB +- **Why:** User engagement tracking +- **Value Period:** 14 days sufficient for engagement analysis +- **Pruning Impact:** 497 KB saved (40% reduction) + +**Workflow Validation Failures (workflow_validation_failed)** +- **Current:** 9,739 events, 314 KB +- **Why:** Tracks validation patterns but less detailed than validation_details +- **Value Period:** 14 days +- **Pruning Impact:** 170 KB saved (54% reduction) + +### TIER 4: Short-Term Value - Keep 7 Days + +**Tool Sequences (tool_sequence)** +- **Current:** 362,170 events, 67 MB (largest table!) +- **Why:** Tracks multi-tool workflows but extremely high volume +- **Value Period:** 7 days for recent pattern analysis +- **Pruning Impact:** 29 MB saved (43% reduction) - HIGHEST IMPACT +- **Rationale:** Tool usage patterns stabilize quickly; older sequences provide diminishing returns + +**Tool Usage Events (tool_used)** +- **Current:** 191,659 events, 14 MB +- **Why:** Individual tool executions - can be aggregated +- **Value Period:** 7 days detailed, then aggregate +- **Pruning Impact:** 6.2 MB saved (44% reduction) + +**Search Queries (search_query)** +- **Current:** 974 events, 106 KB +- **Why:** Low volume, useful for understanding search patterns +- **Value Period:** 7 days sufficient +- **Pruning Impact:** Minimal (~1 KB) + +### TIER 5: Ephemeral - Keep 3 Days + +**Diagnostic/Health Checks (diagnostic_completed, health_check_completed)** +- **Current:** 17 events, ~2.5 KB +- **Why:** Operational health checks, only current state matters +- **Value Period:** 3 days +- **Pruning Impact:** Negligible but good hygiene + +### Workflow Data Retention Strategy + +**telemetry_workflows Table (66 MB):** +- **Simple workflows (5-6 nodes):** Keep 7 days โ†’ Save 11 MB +- **Medium workflows (13-14 nodes):** Keep 14 days โ†’ Save 6.7 MB +- **Complex workflows (26+ nodes):** Keep 30 days โ†’ Save 1.9 MB +- **Total Workflow Savings:** 19.6 MB with tiered retention + +**Rationale:** Complex workflows are rarer and more valuable for understanding advanced use cases. + +--- + +## 3. Pruning Recommendations with Space Savings + +### Strategy A: Conservative 14-Day Retention (Recommended for Initial Implementation) + +| Action | Records Deleted | Space Saved | Risk Level | +|--------|----------------|-------------|------------| +| Delete tool_sequence > 14d | 0 | 0 MB | None - all recent | +| Delete tool_used > 14d | 0 | 0 MB | None - all recent | +| Delete validation_details > 14d | 4,259 | 1.2 MB | Low | +| Delete session_start > 14d | 0 | 0 MB | None - all recent | +| Delete workflows > 14d | 1 | <1 KB | None | +| **TOTAL** | **4,260** | **1.2 MB** | **Low** | + +**Assessment:** Minimal immediate impact but data is too recent. Not sufficient to prevent overflow. + +### Strategy B: Aggressive 7-Day Retention (RECOMMENDED) + +| Action | Records Deleted | Space Saved | Risk Level | +|--------|----------------|-------------|------------| +| Delete tool_sequence > 7d | 155,389 | 29 MB | Low - pattern data | +| Delete tool_used > 7d | 82,827 | 6.2 MB | Low - usage metrics | +| Delete validation_details > 7d | 17,465 | 5.4 MB | Medium - debugging data | +| Delete workflow_created > 7d | 9,106 | 1.0 MB | Low - creation events | +| Delete session_start > 7d | 5,664 | 497 KB | Low - session data | +| Delete error_occurred > 7d | 2,321 | 206 KB | Medium - error history | +| Delete workflow_validation_failed > 7d | 5,269 | 170 KB | Low - validation events | +| Delete workflows > 7d (simple) | 5,146 | 11 MB | Low - simple workflows | +| Delete workflows > 7d (medium) | 1,506 | 6.7 MB | Medium - medium workflows | +| Delete workflows > 7d (complex) | 231 | 1.9 MB | High - complex workflows | +| **TOTAL** | **284,924** | **62.1 MB** | **Medium** | + +**New Database Size:** 265 MB - 62.1 MB = **202.9 MB (76.6% of limit)** +**Buffer:** 297 MB remaining (~38 days at current growth rate) + +### Strategy C: Hybrid Tiered Retention (OPTIMAL LONG-TERM) + +| Event Type | Retention Period | Records Deleted | Space Saved | +|-----------|------------------|----------------|-------------| +| tool_sequence | 7 days | 155,389 | 29 MB | +| tool_used | 7 days | 82,827 | 6.2 MB | +| validation_details | 14 days | 4,259 | 1.2 MB | +| workflow_created | 14 days | 3 | <1 KB | +| session_start | 7 days | 5,664 | 497 KB | +| error_occurred | 30 days (keep all) | 0 | 0 MB | +| workflow_validation_failed | 7 days | 5,269 | 170 KB | +| search_query | 7 days | 10 | 1 KB | +| Workflows (simple) | 7 days | 5,146 | 11 MB | +| Workflows (medium) | 14 days | 0 | 0 MB | +| Workflows (complex) | 30 days (keep all) | 0 | 0 MB | +| **TOTAL** | **Various** | **258,567** | **48.1 MB** | + +**New Database Size:** 265 MB - 48.1 MB = **216.9 MB (82% of limit)** +**Buffer:** 283 MB remaining (~36 days at current growth rate) + +--- + +## 4. Additional Optimization Opportunities + +### Optimization 1: Properties Field Compression + +**Finding:** validation_details events have bloated properties (avg 329 bytes, max 9 KB) + +```sql +-- Identify large validation_details records +SELECT id, user_id, created_at, pg_column_size(properties) as size_bytes +FROM telemetry_events +WHERE event = 'validation_details' + AND pg_column_size(properties) > 1000 +ORDER BY size_bytes DESC; +-- Result: 417 records > 1KB, 2 records > 5KB +``` + +**Recommendation:** Truncate verbose error messages in validation_details after 7 days +- Keep error types and counts +- Remove full stack traces and detailed messages +- Estimated savings: 2-3 MB + +### Optimization 2: Remove Redundant tool_sequence Data + +**Finding:** tool_sequence properties contain mostly null values + +```sql +-- Analysis shows all tool_sequence.properties->>'tools' are null +-- 362,170 records storing null in properties field +``` + +**Recommendation:** +1. Investigate why tool_sequence properties are empty +2. If by design, reduce properties field size or use a flag +3. Potential savings: 10-15 MB if properties field is eliminated + +### Optimization 3: Workflow Deduplication by Hash + +**Finding:** No duplicate workflow_hash values found (good!) + +**Recommendation:** Continue using workflow_hash for future deduplication if needed. No action required. + +### Optimization 4: Dead Row Cleanup + +**Finding:** telemetry_workflows has 1,591 dead rows (9.5% overhead) + +```sql +-- Run VACUUM to reclaim space +VACUUM FULL telemetry_workflows; +-- Expected savings: ~6-7 MB +``` + +**Recommendation:** Schedule weekly VACUUM operations + +### Optimization 5: Index Optimization + +**Current indexes consume space but improve query performance** + +```sql +-- Check index sizes +SELECT + schemaname, tablename, indexname, + pg_size_pretty(pg_relation_size(indexrelid)) as index_size +FROM pg_stat_user_indexes +WHERE schemaname = 'public' +ORDER BY pg_relation_size(indexrelid) DESC; +``` + +**Recommendation:** Review if all indexes are necessary after pruning strategy is implemented + +--- + +## 5. Implementation Strategy + +### Phase 1: Immediate Emergency Pruning (Day 1) + +**Goal:** Free up 60+ MB immediately to prevent overflow + +```sql +-- EMERGENCY PRUNING: Delete data older than 7 days +BEGIN; + +-- Backup count before deletion +SELECT + event, + COUNT(*) FILTER (WHERE created_at < NOW() - INTERVAL '7 days') as to_delete +FROM telemetry_events +GROUP BY event; + +-- Delete old events +DELETE FROM telemetry_events +WHERE created_at < NOW() - INTERVAL '7 days'; +-- Expected: ~278,051 rows deleted, ~36.5 MB saved + +-- Delete old simple workflows +DELETE FROM telemetry_workflows +WHERE created_at < NOW() - INTERVAL '7 days' + AND complexity = 'simple'; +-- Expected: ~5,146 rows deleted, ~11 MB saved + +-- Verify new size +SELECT + schemaname, relname, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||relname)) AS size +FROM pg_stat_user_tables +WHERE schemaname = 'public'; + +COMMIT; + +-- Clean up dead rows +VACUUM FULL telemetry_events; +VACUUM FULL telemetry_workflows; +``` + +**Expected Result:** Database size reduced to ~210-220 MB (55-60% buffer remaining) + +### Phase 2: Implement Automated Retention Policy (Week 1) + +**Create a scheduled Supabase Edge Function or pg_cron job** + +```sql +-- Create retention policy function +CREATE OR REPLACE FUNCTION apply_retention_policy() +RETURNS void AS $$ +BEGIN + -- Tier 4: 7-day retention for high-volume events + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '7 days' + AND event IN ('tool_sequence', 'tool_used', 'session_start', + 'workflow_validation_failed', 'search_query'); + + -- Tier 3: 14-day retention for medium-value events + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '14 days' + AND event IN ('validation_details', 'workflow_created'); + + -- Tier 1: 30-day retention for errors (keep longer) + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '30 days' + AND event = 'error_occurred'; + + -- Workflow retention by complexity + DELETE FROM telemetry_workflows + WHERE created_at < NOW() - INTERVAL '7 days' + AND complexity = 'simple'; + + DELETE FROM telemetry_workflows + WHERE created_at < NOW() - INTERVAL '14 days' + AND complexity = 'medium'; + + DELETE FROM telemetry_workflows + WHERE created_at < NOW() - INTERVAL '30 days' + AND complexity = 'complex'; + + -- Cleanup + VACUUM telemetry_events; + VACUUM telemetry_workflows; +END; +$$ LANGUAGE plpgsql; + +-- Schedule daily execution (using pg_cron extension) +SELECT cron.schedule('retention-policy', '0 2 * * *', 'SELECT apply_retention_policy()'); +``` + +### Phase 3: Create Aggregation Tables (Week 2) + +**Preserve insights while deleting raw data** + +```sql +-- Daily tool usage summary +CREATE TABLE IF NOT EXISTS telemetry_daily_tool_stats ( + date DATE NOT NULL, + tool TEXT NOT NULL, + usage_count INTEGER NOT NULL, + unique_users INTEGER NOT NULL, + avg_duration_ms NUMERIC, + error_count INTEGER DEFAULT 0, + created_at TIMESTAMPTZ DEFAULT NOW(), + PRIMARY KEY (date, tool) +); + +-- Daily validation summary +CREATE TABLE IF NOT EXISTS telemetry_daily_validation_stats ( + date DATE NOT NULL, + node_type TEXT, + total_validations INTEGER NOT NULL, + failed_validations INTEGER NOT NULL, + success_rate NUMERIC, + common_errors JSONB, + created_at TIMESTAMPTZ DEFAULT NOW(), + PRIMARY KEY (date, node_type) +); + +-- Aggregate function to run before pruning +CREATE OR REPLACE FUNCTION aggregate_before_pruning() +RETURNS void AS $$ +BEGIN + -- Aggregate tool usage for data about to be deleted + INSERT INTO telemetry_daily_tool_stats (date, tool, usage_count, unique_users, avg_duration_ms) + SELECT + DATE(created_at) as date, + properties->>'tool' as tool, + COUNT(*) as usage_count, + COUNT(DISTINCT user_id) as unique_users, + AVG((properties->>'duration')::numeric) as avg_duration_ms + FROM telemetry_events + WHERE event = 'tool_used' + AND created_at < NOW() - INTERVAL '7 days' + AND created_at >= NOW() - INTERVAL '8 days' + GROUP BY DATE(created_at), properties->>'tool' + ON CONFLICT (date, tool) DO NOTHING; + + -- Aggregate validation stats + INSERT INTO telemetry_daily_validation_stats (date, node_type, total_validations, failed_validations) + SELECT + DATE(created_at) as date, + properties->>'nodeType' as node_type, + COUNT(*) as total_validations, + COUNT(*) FILTER (WHERE properties->>'valid' = 'false') as failed_validations + FROM telemetry_events + WHERE event = 'validation_details' + AND created_at < NOW() - INTERVAL '14 days' + AND created_at >= NOW() - INTERVAL '15 days' + GROUP BY DATE(created_at), properties->>'nodeType' + ON CONFLICT (date, node_type) DO NOTHING; +END; +$$ LANGUAGE plpgsql; + +-- Update cron job to aggregate before pruning +SELECT cron.schedule('aggregate-then-prune', '0 2 * * *', + 'SELECT aggregate_before_pruning(); SELECT apply_retention_policy();'); +``` + +### Phase 4: Monitoring and Alerting (Week 2) + +**Create size monitoring function** + +```sql +CREATE OR REPLACE FUNCTION check_database_size() +RETURNS TABLE( + total_size_mb NUMERIC, + limit_mb NUMERIC, + percent_used NUMERIC, + days_until_full NUMERIC +) AS $$ +DECLARE + current_size_bytes BIGINT; + growth_rate_bytes_per_day NUMERIC; +BEGIN + -- Get current size + SELECT SUM(pg_total_relation_size(schemaname||'.'||relname)) + INTO current_size_bytes + FROM pg_stat_user_tables + WHERE schemaname = 'public'; + + -- Calculate 7-day growth rate + SELECT + (COUNT(*) FILTER (WHERE created_at >= NOW() - INTERVAL '7 days')) * + AVG(pg_column_size(properties)) * (1.0/7) + INTO growth_rate_bytes_per_day + FROM telemetry_events; + + RETURN QUERY + SELECT + ROUND((current_size_bytes / 1024.0 / 1024.0)::numeric, 2) as total_size_mb, + 500.0 as limit_mb, + ROUND((current_size_bytes / 1024.0 / 1024.0 / 500.0 * 100)::numeric, 2) as percent_used, + ROUND((((500.0 * 1024 * 1024) - current_size_bytes) / NULLIF(growth_rate_bytes_per_day, 0))::numeric, 1) as days_until_full; +END; +$$ LANGUAGE plpgsql; + +-- Alert function (integrate with external monitoring) +CREATE OR REPLACE FUNCTION alert_if_size_critical() +RETURNS void AS $$ +DECLARE + size_pct NUMERIC; +BEGIN + SELECT percent_used INTO size_pct FROM check_database_size(); + + IF size_pct > 90 THEN + -- Log critical alert + INSERT INTO telemetry_events (user_id, event, properties) + VALUES ('system', 'database_size_critical', + json_build_object('percent_used', size_pct, 'timestamp', NOW())::jsonb); + END IF; +END; +$$ LANGUAGE plpgsql; +``` + +--- + +## 6. Priority Order for Implementation + +### Priority 1: URGENT (Day 1) +1. **Execute Emergency Pruning** - Delete data older than 7 days + - Impact: 47.5 MB saved immediately + - Risk: Low - data already analyzed + - SQL: Provided in Phase 1 + +### Priority 2: HIGH (Week 1) +2. **Implement Automated Retention Policy** + - Impact: Prevents future overflow + - Risk: Low with proper testing + - Implementation: Phase 2 function + +3. **Run VACUUM FULL** + - Impact: 6-7 MB reclaimed from dead rows + - Risk: Low but locks tables briefly + - Command: `VACUUM FULL telemetry_workflows;` + +### Priority 3: MEDIUM (Week 2) +4. **Create Aggregation Tables** + - Impact: Preserves insights, enables longer-term pruning + - Risk: Low - additive only + - Implementation: Phase 3 tables and functions + +5. **Implement Monitoring** + - Impact: Prevents future surprises + - Risk: None + - Implementation: Phase 4 monitoring functions + +### Priority 4: LOW (Month 1) +6. **Optimize Properties Fields** + - Impact: 2-3 MB additional savings + - Risk: Medium - requires code changes + - Action: Truncate verbose error messages + +7. **Investigate tool_sequence null properties** + - Impact: 10-15 MB potential savings + - Risk: Medium - requires application changes + - Action: Code review and optimization + +--- + +## 7. Risk Assessment + +### Strategy B (7-Day Retention): Risks and Mitigations + +| Risk | Likelihood | Impact | Mitigation | +|------|-----------|---------|------------| +| Loss of debugging data for old issues | Medium | Medium | Keep error_occurred for 30 days; aggregate validation stats | +| Unable to analyze long-term trends | Low | Low | Implement aggregation tables before pruning | +| Accidental deletion of critical data | Low | High | Test on staging; implement backups; add rollback capability | +| Performance impact during deletion | Medium | Low | Run during off-peak hours (2 AM UTC) | +| VACUUM locks table briefly | Low | Low | Schedule during low-usage window | + +### Strategy C (Hybrid Tiered): Risks and Mitigations + +| Risk | Likelihood | Impact | Mitigation | +|------|-----------|---------|------------| +| Complex logic leads to bugs | Medium | Medium | Thorough testing; monitoring; gradual rollout | +| Different retention per event type confusing | Low | Low | Document clearly; add comments in code | +| Tiered approach still insufficient | Low | High | Monitor growth; adjust retention if needed | + +--- + +## 8. Monitoring Metrics + +### Key Metrics to Track Post-Implementation + +1. **Database Size Trend** + ```sql + SELECT * FROM check_database_size(); + ``` + - Target: Stay under 300 MB (60% of limit) + - Alert threshold: 90% (450 MB) + +2. **Daily Growth Rate** + ```sql + SELECT + DATE(created_at) as date, + COUNT(*) as events, + pg_size_pretty(SUM(pg_column_size(properties))::bigint) as daily_size + FROM telemetry_events + WHERE created_at >= NOW() - INTERVAL '7 days' + GROUP BY DATE(created_at) + ORDER BY date DESC; + ``` + - Target: < 8 MB/day average + - Alert threshold: > 12 MB/day sustained + +3. **Retention Policy Execution** + ```sql + -- Add logging to retention policy function + CREATE TABLE retention_policy_log ( + executed_at TIMESTAMPTZ DEFAULT NOW(), + events_deleted INTEGER, + workflows_deleted INTEGER, + space_reclaimed_mb NUMERIC + ); + ``` + - Monitor: Daily successful execution + - Alert: If job fails or deletes 0 rows unexpectedly + +4. **Data Availability Check** + ```sql + -- Ensure sufficient data for analysis + SELECT + event, + COUNT(*) as available_records, + MIN(created_at) as oldest_record, + MAX(created_at) as newest_record + FROM telemetry_events + GROUP BY event; + ``` + - Target: 7 days of data always available + - Alert: If oldest_record > 8 days ago (retention policy failing) + +--- + +## 9. Recommended Action Plan + +### Immediate Actions (Today) + +**Step 1:** Execute emergency pruning +```sql +-- Backup first (optional but recommended) +-- Create a copy of current stats +CREATE TABLE telemetry_events_stats_backup AS +SELECT event, COUNT(*), MIN(created_at), MAX(created_at) +FROM telemetry_events +GROUP BY event; + +-- Execute pruning +DELETE FROM telemetry_events WHERE created_at < NOW() - INTERVAL '7 days'; +DELETE FROM telemetry_workflows WHERE created_at < NOW() - INTERVAL '7 days' AND complexity = 'simple'; +VACUUM FULL telemetry_events; +VACUUM FULL telemetry_workflows; +``` + +**Step 2:** Verify results +```sql +SELECT * FROM check_database_size(); +``` + +**Expected outcome:** Database size ~210-220 MB (58-60% buffer remaining) + +### Week 1 Actions + +**Step 3:** Implement automated retention policy +- Create retention policy function (Phase 2 code) +- Test function on staging/development environment +- Schedule daily execution via pg_cron + +**Step 4:** Set up monitoring +- Create monitoring functions (Phase 4 code) +- Configure alerts for size thresholds +- Document escalation procedures + +### Week 2 Actions + +**Step 5:** Create aggregation tables +- Implement summary tables (Phase 3 code) +- Backfill historical aggregations if needed +- Update retention policy to aggregate before pruning + +**Step 6:** Optimize and tune +- Review query performance post-pruning +- Adjust retention periods if needed based on actual usage +- Document any issues or improvements + +### Monthly Maintenance + +**Step 7:** Regular review +- Monthly review of database growth trends +- Quarterly review of retention policy effectiveness +- Adjust retention periods based on product needs + +--- + +## 10. SQL Execution Scripts + +### Script 1: Emergency Pruning (Run First) + +```sql +-- ============================================ +-- EMERGENCY PRUNING SCRIPT +-- Expected savings: ~50 MB +-- Execution time: 2-5 minutes +-- ============================================ + +BEGIN; + +-- Create backup of current state +CREATE TABLE IF NOT EXISTS pruning_audit ( + executed_at TIMESTAMPTZ DEFAULT NOW(), + action TEXT, + records_affected INTEGER, + size_before_mb NUMERIC, + size_after_mb NUMERIC +); + +-- Record size before +INSERT INTO pruning_audit (action, size_before_mb) +SELECT 'before_pruning', + pg_total_relation_size('telemetry_events')::numeric / 1024 / 1024; + +-- Delete old events (keep last 7 days) +WITH deleted AS ( + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '7 days' + RETURNING * +) +INSERT INTO pruning_audit (action, records_affected) +SELECT 'delete_events_7d', COUNT(*) FROM deleted; + +-- Delete old simple workflows (keep last 7 days) +WITH deleted AS ( + DELETE FROM telemetry_workflows + WHERE created_at < NOW() - INTERVAL '7 days' + AND complexity = 'simple' + RETURNING * +) +INSERT INTO pruning_audit (action, records_affected) +SELECT 'delete_workflows_simple_7d', COUNT(*) FROM deleted; + +-- Record size after +UPDATE pruning_audit +SET size_after_mb = pg_total_relation_size('telemetry_events')::numeric / 1024 / 1024 +WHERE action = 'before_pruning'; + +COMMIT; + +-- Cleanup dead space +VACUUM FULL telemetry_events; +VACUUM FULL telemetry_workflows; + +-- Verify results +SELECT * FROM pruning_audit ORDER BY executed_at DESC LIMIT 5; +SELECT * FROM check_database_size(); +``` + +### Script 2: Create Retention Policy (Run After Testing) + +```sql +-- ============================================ +-- AUTOMATED RETENTION POLICY +-- Schedule: Daily at 2 AM UTC +-- ============================================ + +CREATE OR REPLACE FUNCTION apply_retention_policy() +RETURNS TABLE( + action TEXT, + records_deleted INTEGER, + execution_time_ms INTEGER +) AS $$ +DECLARE + start_time TIMESTAMPTZ; + end_time TIMESTAMPTZ; + deleted_count INTEGER; +BEGIN + -- Tier 4: 7-day retention (high volume, low long-term value) + start_time := clock_timestamp(); + + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '7 days' + AND event IN ('tool_sequence', 'tool_used', 'session_start', + 'workflow_validation_failed', 'search_query'); + GET DIAGNOSTICS deleted_count = ROW_COUNT; + + end_time := clock_timestamp(); + action := 'delete_tier4_7d'; + records_deleted := deleted_count; + execution_time_ms := EXTRACT(MILLISECONDS FROM (end_time - start_time))::INTEGER; + RETURN NEXT; + + -- Tier 3: 14-day retention (medium value) + start_time := clock_timestamp(); + + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '14 days' + AND event IN ('validation_details', 'workflow_created'); + GET DIAGNOSTICS deleted_count = ROW_COUNT; + + end_time := clock_timestamp(); + action := 'delete_tier3_14d'; + records_deleted := deleted_count; + execution_time_ms := EXTRACT(MILLISECONDS FROM (end_time - start_time))::INTEGER; + RETURN NEXT; + + -- Tier 1: 30-day retention (errors - keep longer) + start_time := clock_timestamp(); + + DELETE FROM telemetry_events + WHERE created_at < NOW() - INTERVAL '30 days' + AND event = 'error_occurred'; + GET DIAGNOSTICS deleted_count = ROW_COUNT; + + end_time := clock_timestamp(); + action := 'delete_errors_30d'; + records_deleted := deleted_count; + execution_time_ms := EXTRACT(MILLISECONDS FROM (end_time - start_time))::INTEGER; + RETURN NEXT; + + -- Workflow pruning by complexity + start_time := clock_timestamp(); + + DELETE FROM telemetry_workflows + WHERE created_at < NOW() - INTERVAL '7 days' + AND complexity = 'simple'; + GET DIAGNOSTICS deleted_count = ROW_COUNT; + + end_time := clock_timestamp(); + action := 'delete_workflows_simple_7d'; + records_deleted := deleted_count; + execution_time_ms := EXTRACT(MILLISECONDS FROM (end_time - start_time))::INTEGER; + RETURN NEXT; + + start_time := clock_timestamp(); + + DELETE FROM telemetry_workflows + WHERE created_at < NOW() - INTERVAL '14 days' + AND complexity = 'medium'; + GET DIAGNOSTICS deleted_count = ROW_COUNT; + + end_time := clock_timestamp(); + action := 'delete_workflows_medium_14d'; + records_deleted := deleted_count; + execution_time_ms := EXTRACT(MILLISECONDS FROM (end_time - start_time))::INTEGER; + RETURN NEXT; + + start_time := clock_timestamp(); + + DELETE FROM telemetry_workflows + WHERE created_at < NOW() - INTERVAL '30 days' + AND complexity = 'complex'; + GET DIAGNOSTICS deleted_count = ROW_COUNT; + + end_time := clock_timestamp(); + action := 'delete_workflows_complex_30d'; + records_deleted := deleted_count; + execution_time_ms := EXTRACT(MILLISECONDS FROM (end_time - start_time))::INTEGER; + RETURN NEXT; + + -- Vacuum to reclaim space + start_time := clock_timestamp(); + VACUUM telemetry_events; + VACUUM telemetry_workflows; + end_time := clock_timestamp(); + + action := 'vacuum_tables'; + records_deleted := 0; + execution_time_ms := EXTRACT(MILLISECONDS FROM (end_time - start_time))::INTEGER; + RETURN NEXT; +END; +$$ LANGUAGE plpgsql; + +-- Test the function (dry run - won't schedule yet) +SELECT * FROM apply_retention_policy(); + +-- After testing, schedule with pg_cron +-- Requires pg_cron extension: CREATE EXTENSION IF NOT EXISTS pg_cron; +-- SELECT cron.schedule('retention-policy', '0 2 * * *', 'SELECT apply_retention_policy()'); +``` + +### Script 3: Create Monitoring Dashboard + +```sql +-- ============================================ +-- MONITORING QUERIES +-- Run these regularly to track database health +-- ============================================ + +-- Query 1: Current database size and projections +SELECT + 'Current Size' as metric, + pg_size_pretty(SUM(pg_total_relation_size(schemaname||'.'||relname))) as value +FROM pg_stat_user_tables +WHERE schemaname = 'public' +UNION ALL +SELECT + 'Free Tier Limit' as metric, + '500 MB' as value +UNION ALL +SELECT + 'Percent Used' as metric, + CONCAT( + ROUND( + (SUM(pg_total_relation_size(schemaname||'.'||relname))::numeric / + (500.0 * 1024 * 1024) * 100), + 2 + ), + '%' + ) as value +FROM pg_stat_user_tables +WHERE schemaname = 'public'; + +-- Query 2: Data age distribution +SELECT + event, + COUNT(*) as total_records, + MIN(created_at) as oldest_record, + MAX(created_at) as newest_record, + ROUND(EXTRACT(EPOCH FROM (MAX(created_at) - MIN(created_at))) / 86400, 2) as age_days +FROM telemetry_events +GROUP BY event +ORDER BY total_records DESC; + +-- Query 3: Daily growth tracking (last 7 days) +SELECT + DATE(created_at) as date, + COUNT(*) as daily_events, + pg_size_pretty(SUM(pg_column_size(properties))::bigint) as daily_data_size, + COUNT(DISTINCT user_id) as active_users +FROM telemetry_events +WHERE created_at >= NOW() - INTERVAL '7 days' +GROUP BY DATE(created_at) +ORDER BY date DESC; + +-- Query 4: Retention policy effectiveness +SELECT + DATE(executed_at) as execution_date, + action, + records_deleted, + execution_time_ms +FROM ( + SELECT * FROM apply_retention_policy() +) AS policy_run +ORDER BY execution_date DESC; +``` + +--- + +## Conclusion + +**Immediate Action Required:** Implement Strategy B (7-day retention) immediately to avoid database overflow within 2 weeks. + +**Long-Term Strategy:** Transition to Strategy C (Hybrid Tiered Retention) with automated aggregation to balance data preservation with storage constraints. + +**Expected Outcomes:** +- Immediate: 50+ MB saved (26% reduction) +- Ongoing: Database stabilized at 200-220 MB (40-44% of limit) +- Buffer: 30-40 days before limit with current growth rate +- Risk: Low with proper testing and monitoring + +**Success Metrics:** +1. Database size < 300 MB consistently +2. 7+ days of detailed event data always available +3. No impact on product analytics capabilities +4. Automated retention policy runs daily without errors + +--- + +**Analysis completed:** 2025-10-10 +**Next review date:** 2025-11-10 (monthly check) +**Escalation:** If database exceeds 400 MB, consider upgrading to paid tier or implementing more aggressive pruning diff --git a/tests/integration/session-persistence.test.ts b/tests/integration/session-persistence.test.ts new file mode 100644 index 0000000..98b8a94 --- /dev/null +++ b/tests/integration/session-persistence.test.ts @@ -0,0 +1,600 @@ +/** + * Integration tests for session persistence (Phase 1) + * + * Tests the complete session restoration flow end-to-end, + * simulating real-world scenarios like container restarts and multi-tenant usage. + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { N8NMCPEngine } from '../../src/mcp-engine'; +import { SingleSessionHTTPServer } from '../../src/http-server-single-session'; +import { InstanceContext } from '../../src/types/instance-context'; +import { SessionRestoreHook, SessionState } from '../../src/types/session-restoration'; +import type { Request, Response } from 'express'; + +// In-memory session storage for testing +const sessionStorage: Map = new Map(); + +/** + * Simulates a backend database for session persistence + */ +class MockSessionStore { + async saveSession(sessionState: SessionState): Promise { + sessionStorage.set(sessionState.sessionId, { + ...sessionState, + // Only update lastAccess and expiresAt if not provided + lastAccess: sessionState.lastAccess || new Date(), + expiresAt: sessionState.expiresAt || new Date(Date.now() + 30 * 60 * 1000) // 30 minutes + }); + } + + async loadSession(sessionId: string): Promise { + const session = sessionStorage.get(sessionId); + if (!session) return null; + + // Check if expired + if (session.expiresAt < new Date()) { + sessionStorage.delete(sessionId); + return null; + } + + // Update last access + session.lastAccess = new Date(); + session.expiresAt = new Date(Date.now() + 30 * 60 * 1000); + sessionStorage.set(sessionId, session); + + return session; + } + + async deleteSession(sessionId: string): Promise { + sessionStorage.delete(sessionId); + } + + async cleanExpired(): Promise { + const now = new Date(); + let count = 0; + + for (const [sessionId, session] of sessionStorage.entries()) { + if (session.expiresAt < now) { + sessionStorage.delete(sessionId); + count++; + } + } + + return count; + } + + getAllSessions(): Map { + return new Map(sessionStorage); + } + + clear(): void { + sessionStorage.clear(); + } +} + +describe('Session Persistence Integration Tests', () => { + const TEST_AUTH_TOKEN = 'integration-test-token-with-32-chars-min-length'; + let mockStore: MockSessionStore; + let originalEnv: NodeJS.ProcessEnv; + + beforeEach(() => { + // Save and set environment + originalEnv = { ...process.env }; + process.env.AUTH_TOKEN = TEST_AUTH_TOKEN; + process.env.PORT = '0'; + process.env.NODE_ENV = 'test'; + + // Clear session storage + mockStore = new MockSessionStore(); + mockStore.clear(); + }); + + afterEach(() => { + // Restore environment + process.env = originalEnv; + mockStore.clear(); + }); + + // Helper to create properly mocked Request and Response objects + function createMockReqRes(sessionId?: string, body?: any) { + const req = { + method: 'POST', + path: '/mcp', + url: '/mcp', + originalUrl: '/mcp', + headers: { + 'authorization': `Bearer ${TEST_AUTH_TOKEN}`, + ...(sessionId && { 'mcp-session-id': sessionId }) + } as Record, + body: body || { + jsonrpc: '2.0', + method: 'tools/list', + params: {}, + id: 1 + }, + ip: '127.0.0.1', + readable: true, + readableEnded: false, + complete: true, + get: vi.fn((header: string) => req.headers[header.toLowerCase()]), + on: vi.fn((event: string, handler: Function) => {}), + removeListener: vi.fn((event: string, handler: Function) => {}) + } as any as Request; + + const res = { + status: vi.fn().mockReturnThis(), + json: vi.fn().mockReturnThis(), + setHeader: vi.fn(), + send: vi.fn().mockReturnThis(), + headersSent: false, + finished: false + } as any as Response; + + return { req, res }; + } + + describe('Container Restart Simulation', () => { + it('should restore session after simulated container restart', async () => { + // PHASE 1: Initial session creation + const context: InstanceContext = { + n8nApiUrl: 'https://tenant1.n8n.cloud', + n8nApiKey: 'tenant1-api-key', + instanceId: 'tenant-1' + }; + + const sessionId = 'instance-tenant-1-abc-550e8400-e29b-41d4-a716-446655440000'; + + // Simulate session being persisted by the backend + await mockStore.saveSession({ + sessionId, + instanceContext: context, + createdAt: new Date(), + lastAccess: new Date(), + expiresAt: new Date(Date.now() + 30 * 60 * 1000) + }); + + // PHASE 2: Simulate container restart (create new engine) + const restorationHook: SessionRestoreHook = async (sid) => { + const session = await mockStore.loadSession(sid); + return session ? session.instanceContext : null; + }; + + const engine = new N8NMCPEngine({ + onSessionNotFound: restorationHook, + sessionRestorationTimeout: 5000 + }); + + // PHASE 3: Client tries to use old session ID + const { req: mockReq, res: mockRes } = createMockReqRes(sessionId); + + // Should successfully restore and process request + await engine.processRequest(mockReq, mockRes, context); + + // Session should be restored (not return 400 for unknown session) + expect(mockRes.status).not.toHaveBeenCalledWith(400); + expect(mockRes.status).not.toHaveBeenCalledWith(404); + + await engine.shutdown(); + }); + + it('should reject expired sessions after container restart', async () => { + const context: InstanceContext = { + n8nApiUrl: 'https://tenant1.n8n.cloud', + n8nApiKey: 'tenant1-api-key', + instanceId: 'tenant-1' + }; + + const sessionId = '550e8400-e29b-41d4-a716-446655440000'; + + // Save session with past expiration + await mockStore.saveSession({ + sessionId, + instanceContext: context, + createdAt: new Date(Date.now() - 60 * 60 * 1000), // 1 hour ago + lastAccess: new Date(Date.now() - 45 * 60 * 1000), // 45 minutes ago + expiresAt: new Date(Date.now() - 15 * 60 * 1000) // Expired 15 minutes ago + }); + + const restorationHook: SessionRestoreHook = async (sid) => { + const session = await mockStore.loadSession(sid); + return session ? session.instanceContext : null; + }; + + const engine = new N8NMCPEngine({ + onSessionNotFound: restorationHook, + sessionRestorationTimeout: 5000 + }); + + const { req: mockReq, res: mockRes } = createMockReqRes(sessionId); + + await engine.processRequest(mockReq, mockRes); + + // Should reject expired session + expect(mockRes.status).toHaveBeenCalledWith(400); + expect(mockRes.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.objectContaining({ + message: expect.stringMatching(/session|not found/i) + }) + }) + ); + + await engine.shutdown(); + }); + }); + + describe('Multi-Tenant Session Restoration', () => { + it('should restore correct instance context for each tenant', async () => { + // Create sessions for multiple tenants + const tenant1Context: InstanceContext = { + n8nApiUrl: 'https://tenant1.n8n.cloud', + n8nApiKey: 'tenant1-key', + instanceId: 'tenant-1' + }; + + const tenant2Context: InstanceContext = { + n8nApiUrl: 'https://tenant2.n8n.cloud', + n8nApiKey: 'tenant2-key', + instanceId: 'tenant-2' + }; + + const sessionId1 = 'instance-tenant-1-abc-550e8400-e29b-41d4-a716-446655440000'; + const sessionId2 = 'instance-tenant-2-xyz-f47ac10b-58cc-4372-a567-0e02b2c3d479'; + + await mockStore.saveSession({ + sessionId: sessionId1, + instanceContext: tenant1Context, + createdAt: new Date(), + lastAccess: new Date(), + expiresAt: new Date(Date.now() + 30 * 60 * 1000) + }); + + await mockStore.saveSession({ + sessionId: sessionId2, + instanceContext: tenant2Context, + createdAt: new Date(), + lastAccess: new Date(), + expiresAt: new Date(Date.now() + 30 * 60 * 1000) + }); + + const restorationHook: SessionRestoreHook = async (sid) => { + const session = await mockStore.loadSession(sid); + return session ? session.instanceContext : null; + }; + + const engine = new N8NMCPEngine({ + onSessionNotFound: restorationHook, + sessionRestorationTimeout: 5000 + }); + + // Verify each tenant gets their own context + const session1 = await mockStore.loadSession(sessionId1); + const session2 = await mockStore.loadSession(sessionId2); + + expect(session1?.instanceContext.instanceId).toBe('tenant-1'); + expect(session1?.instanceContext.n8nApiUrl).toBe('https://tenant1.n8n.cloud'); + + expect(session2?.instanceContext.instanceId).toBe('tenant-2'); + expect(session2?.instanceContext.n8nApiUrl).toBe('https://tenant2.n8n.cloud'); + + await engine.shutdown(); + }); + + it('should isolate sessions between tenants', async () => { + const tenant1Context: InstanceContext = { + n8nApiUrl: 'https://tenant1.n8n.cloud', + n8nApiKey: 'tenant1-key', + instanceId: 'tenant-1' + }; + + const sessionId = 'instance-tenant-1-abc-550e8400-e29b-41d4-a716-446655440000'; + + await mockStore.saveSession({ + sessionId, + instanceContext: tenant1Context, + createdAt: new Date(), + lastAccess: new Date(), + expiresAt: new Date(Date.now() + 30 * 60 * 1000) + }); + + const restorationHook: SessionRestoreHook = async (sid) => { + const session = await mockStore.loadSession(sid); + return session ? session.instanceContext : null; + }; + + const engine = new N8NMCPEngine({ + onSessionNotFound: restorationHook + }); + + // Tenant 2 tries to use tenant 1's session ID + const wrongSessionId = sessionId; // Tenant 1's ID + const { req: tenant2Request, res: mockRes } = createMockReqRes(wrongSessionId); + + // The restoration will succeed (session exists), but the backend + // should implement authorization checks to prevent cross-tenant access + await engine.processRequest(tenant2Request, mockRes); + + // Restoration should work (this test verifies the session CAN be restored) + // Authorization is the backend's responsibility + expect(mockRes.status).not.toHaveBeenCalledWith(404); + + await engine.shutdown(); + }); + }); + + describe('Concurrent Restoration Requests', () => { + it('should handle multiple concurrent restoration requests for same session', async () => { + const context: InstanceContext = { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-key', + instanceId: 'test-instance' + }; + + const sessionId = '550e8400-e29b-41d4-a716-446655440000'; + + await mockStore.saveSession({ + sessionId, + instanceContext: context, + createdAt: new Date(), + lastAccess: new Date(), + expiresAt: new Date(Date.now() + 30 * 60 * 1000) + }); + + let hookCallCount = 0; + const restorationHook: SessionRestoreHook = async (sid) => { + hookCallCount++; + // Simulate slow database query + await new Promise(resolve => setTimeout(resolve, 50)); + const session = await mockStore.loadSession(sid); + return session ? session.instanceContext : null; + }; + + const engine = new N8NMCPEngine({ + onSessionNotFound: restorationHook, + sessionRestorationTimeout: 5000 + }); + + // Simulate 5 concurrent requests with same unknown session ID + const requests = Array.from({ length: 5 }, (_, i) => { + const { req: mockReq, res: mockRes } = createMockReqRes(sessionId, { + jsonrpc: '2.0', + method: 'tools/list', + params: {}, + id: i + 1 + }); + + return engine.processRequest(mockReq, mockRes, context); + }); + + // All should complete without error + await Promise.all(requests); + + // Hook should be called multiple times (no built-in deduplication) + // This is expected - the idempotent session creation prevents duplicates + expect(hookCallCount).toBeGreaterThan(0); + + await engine.shutdown(); + }); + }); + + describe('Database Failure Scenarios', () => { + it('should handle database connection failures gracefully', async () => { + const failingHook: SessionRestoreHook = async () => { + throw new Error('Database connection failed'); + }; + + const engine = new N8NMCPEngine({ + onSessionNotFound: failingHook, + sessionRestorationTimeout: 5000 + }); + + const { req: mockReq, res: mockRes } = createMockReqRes('550e8400-e29b-41d4-a716-446655440000'); + + await engine.processRequest(mockReq, mockRes); + + // Should return 500 for database errors + expect(mockRes.status).toHaveBeenCalledWith(500); + expect(mockRes.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.objectContaining({ + message: expect.stringMatching(/restoration failed|error/i) + }) + }) + ); + + await engine.shutdown(); + }); + + it('should timeout on slow database queries', async () => { + const slowHook: SessionRestoreHook = async () => { + // Simulate very slow database query + await new Promise(resolve => setTimeout(resolve, 10000)); + return { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-key', + instanceId: 'test' + }; + }; + + const engine = new N8NMCPEngine({ + onSessionNotFound: slowHook, + sessionRestorationTimeout: 100 // 100ms timeout + }); + + const { req: mockReq, res: mockRes } = createMockReqRes('550e8400-e29b-41d4-a716-446655440000'); + + await engine.processRequest(mockReq, mockRes); + + // Should return 408 for timeout + expect(mockRes.status).toHaveBeenCalledWith(408); + expect(mockRes.json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.objectContaining({ + message: expect.stringMatching(/timeout|timed out/i) + }) + }) + ); + + await engine.shutdown(); + }); + }); + + describe('Session Metadata Tracking', () => { + it('should track session metadata correctly', async () => { + const context: InstanceContext = { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-key', + instanceId: 'test-instance', + metadata: { + userId: 'user-123', + plan: 'premium' + } + }; + + const sessionId = '550e8400-e29b-41d4-a716-446655440000'; + + await mockStore.saveSession({ + sessionId, + instanceContext: context, + createdAt: new Date(), + lastAccess: new Date(), + expiresAt: new Date(Date.now() + 30 * 60 * 1000), + metadata: { + userAgent: 'test-client/1.0', + ip: '192.168.1.1' + } + }); + + const session = await mockStore.loadSession(sessionId); + + expect(session).toBeDefined(); + expect(session?.instanceContext.metadata).toEqual({ + userId: 'user-123', + plan: 'premium' + }); + expect(session?.metadata).toEqual({ + userAgent: 'test-client/1.0', + ip: '192.168.1.1' + }); + }); + + it('should update last access time on restoration', async () => { + const context: InstanceContext = { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-key', + instanceId: 'test-instance' + }; + + const sessionId = '550e8400-e29b-41d4-a716-446655440000'; + const originalLastAccess = new Date(Date.now() - 10 * 60 * 1000); // 10 minutes ago + + await mockStore.saveSession({ + sessionId, + instanceContext: context, + createdAt: new Date(Date.now() - 20 * 60 * 1000), + lastAccess: originalLastAccess, + expiresAt: new Date(Date.now() + 20 * 60 * 1000) + }); + + // Wait a bit + await new Promise(resolve => setTimeout(resolve, 100)); + + // Load session (simulates restoration) + const session = await mockStore.loadSession(sessionId); + + expect(session).toBeDefined(); + expect(session!.lastAccess.getTime()).toBeGreaterThan(originalLastAccess.getTime()); + }); + }); + + describe('Session Cleanup', () => { + it('should clean up expired sessions', async () => { + // Add multiple sessions with different expiration times + await mockStore.saveSession({ + sessionId: 'session-1', + instanceContext: { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'key1', + instanceId: 'instance-1' + }, + createdAt: new Date(Date.now() - 60 * 60 * 1000), + lastAccess: new Date(Date.now() - 45 * 60 * 1000), + expiresAt: new Date(Date.now() - 15 * 60 * 1000) // Expired + }); + + await mockStore.saveSession({ + sessionId: 'session-2', + instanceContext: { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'key2', + instanceId: 'instance-2' + }, + createdAt: new Date(), + lastAccess: new Date(), + expiresAt: new Date(Date.now() + 30 * 60 * 1000) // Valid + }); + + const cleanedCount = await mockStore.cleanExpired(); + + expect(cleanedCount).toBe(1); + expect(mockStore.getAllSessions().size).toBe(1); + expect(mockStore.getAllSessions().has('session-2')).toBe(true); + expect(mockStore.getAllSessions().has('session-1')).toBe(false); + }); + }); + + describe('Backwards Compatibility', () => { + it('should work without restoration hook (legacy behavior)', async () => { + // Engine without restoration hook should work normally + const engine = new N8NMCPEngine(); + + const sessionInfo = engine.getSessionInfo(); + + expect(sessionInfo).toBeDefined(); + expect(sessionInfo.active).toBeDefined(); + + await engine.shutdown(); + }); + + it('should not break existing session creation flow', async () => { + const engine = new N8NMCPEngine({ + onSessionNotFound: async () => null + }); + + // Creating sessions should work normally + const sessionInfo = engine.getSessionInfo(); + + expect(sessionInfo).toBeDefined(); + + await engine.shutdown(); + }); + }); + + describe('Security Validation', () => { + it('should validate restored context before using it', async () => { + const invalidHook: SessionRestoreHook = async () => { + // Return context with malformed URL (truly invalid) + return { + n8nApiUrl: 'not-a-valid-url', + n8nApiKey: 'test-key', + instanceId: 'test' + } as any; + }; + + const engine = new N8NMCPEngine({ + onSessionNotFound: invalidHook, + sessionRestorationTimeout: 5000 + }); + + const { req: mockReq, res: mockRes } = createMockReqRes('550e8400-e29b-41d4-a716-446655440000'); + + await engine.processRequest(mockReq, mockRes); + + // Should reject invalid context + expect(mockRes.status).toHaveBeenCalledWith(400); + + await engine.shutdown(); + }); + }); +}); diff --git a/tests/unit/session-management-api.test.ts b/tests/unit/session-management-api.test.ts new file mode 100644 index 0000000..25d8406 --- /dev/null +++ b/tests/unit/session-management-api.test.ts @@ -0,0 +1,333 @@ +/** + * Unit tests for Session Management API (Phase 2 - REQ-5) + * Tests the public API methods for session management in v2.19.0 + */ +import { describe, it, expect, beforeEach } from 'vitest'; +import { N8NMCPEngine } from '../../src/mcp-engine'; +import { InstanceContext } from '../../src/types/instance-context'; + +describe('Session Management API (Phase 2 - REQ-5)', () => { + let engine: N8NMCPEngine; + const testContext: InstanceContext = { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-api-key', + instanceId: 'test-instance' + }; + + beforeEach(() => { + // Set required AUTH_TOKEN environment variable for testing + process.env.AUTH_TOKEN = 'test-token-for-session-management-testing-32chars'; + + // Create engine with session restoration disabled for these tests + engine = new N8NMCPEngine({ + sessionTimeout: 30 * 60 * 1000 // 30 minutes + }); + }); + + describe('getActiveSessions()', () => { + it('should return empty array when no sessions exist', () => { + const sessionIds = engine.getActiveSessions(); + expect(sessionIds).toEqual([]); + }); + + it('should return session IDs after session creation via restoreSession', () => { + // Create session using direct API (not through HTTP request) + const sessionId = 'instance-test-abc123-uuid-session-test-1'; + engine.restoreSession(sessionId, testContext); + + const sessionIds = engine.getActiveSessions(); + expect(sessionIds.length).toBe(1); + expect(sessionIds).toContain(sessionId); + }); + + it('should return multiple session IDs when multiple sessions exist', () => { + // Create multiple sessions using direct API + const sessions = [ + { id: 'instance-test1-abc123-uuid-session-1', context: { ...testContext, instanceId: 'instance-1' } }, + { id: 'instance-test2-abc123-uuid-session-2', context: { ...testContext, instanceId: 'instance-2' } } + ]; + + sessions.forEach(({ id, context }) => { + engine.restoreSession(id, context); + }); + + const sessionIds = engine.getActiveSessions(); + expect(sessionIds.length).toBe(2); + expect(sessionIds).toContain(sessions[0].id); + expect(sessionIds).toContain(sessions[1].id); + }); + }); + + describe('getSessionState()', () => { + it('should return null for non-existent session', () => { + const state = engine.getSessionState('non-existent-session-id'); + expect(state).toBeNull(); + }); + + it('should return session state for existing session', () => { + // Create a session using direct API + const sessionId = 'instance-test-abc123-uuid-session-state-test'; + engine.restoreSession(sessionId, testContext); + + const state = engine.getSessionState(sessionId); + expect(state).not.toBeNull(); + expect(state).toMatchObject({ + sessionId: sessionId, + instanceContext: expect.objectContaining({ + n8nApiUrl: testContext.n8nApiUrl, + n8nApiKey: testContext.n8nApiKey, + instanceId: testContext.instanceId + }), + createdAt: expect.any(Date), + lastAccess: expect.any(Date), + expiresAt: expect.any(Date) + }); + }); + + it('should include metadata in session state if available', () => { + const contextWithMetadata: InstanceContext = { + ...testContext, + metadata: { userId: 'user-123', tier: 'premium' } + }; + + const sessionId = 'instance-test-abc123-uuid-metadata-test'; + engine.restoreSession(sessionId, contextWithMetadata); + + const state = engine.getSessionState(sessionId); + + expect(state?.metadata).toEqual({ userId: 'user-123', tier: 'premium' }); + }); + + it('should calculate correct expiration time', () => { + const sessionId = 'instance-test-abc123-uuid-expiry-test'; + engine.restoreSession(sessionId, testContext); + + const state = engine.getSessionState(sessionId); + + expect(state).not.toBeNull(); + if (state) { + const expectedExpiry = new Date(state.lastAccess.getTime() + 30 * 60 * 1000); + const actualExpiry = state.expiresAt; + + // Allow 1 second difference for test timing + expect(Math.abs(actualExpiry.getTime() - expectedExpiry.getTime())).toBeLessThan(1000); + } + }); + }); + + describe('getAllSessionStates()', () => { + it('should return empty array when no sessions exist', () => { + const states = engine.getAllSessionStates(); + expect(states).toEqual([]); + }); + + it('should return all session states', () => { + // Create two sessions using direct API + const session1Id = 'instance-test1-abc123-uuid-all-states-1'; + const session2Id = 'instance-test2-abc123-uuid-all-states-2'; + + engine.restoreSession(session1Id, { + ...testContext, + instanceId: 'instance-1' + }); + + engine.restoreSession(session2Id, { + ...testContext, + instanceId: 'instance-2' + }); + + const states = engine.getAllSessionStates(); + expect(states.length).toBe(2); + expect(states[0]).toMatchObject({ + sessionId: expect.any(String), + instanceContext: expect.objectContaining({ + n8nApiUrl: testContext.n8nApiUrl + }), + createdAt: expect.any(Date), + lastAccess: expect.any(Date), + expiresAt: expect.any(Date) + }); + }); + + it('should filter out sessions without state', () => { + // Create session using direct API + const sessionId = 'instance-test-abc123-uuid-filter-test'; + engine.restoreSession(sessionId, testContext); + + // Get states + const states = engine.getAllSessionStates(); + expect(states.length).toBe(1); + + // All returned states should be non-null + states.forEach(state => { + expect(state).not.toBeNull(); + }); + }); + }); + + describe('restoreSession()', () => { + it('should create a new session with provided ID and context', () => { + const sessionId = 'instance-test-abc123-uuid-test-session-id'; + const result = engine.restoreSession(sessionId, testContext); + + expect(result).toBe(true); + expect(engine.getActiveSessions()).toContain(sessionId); + }); + + it('should be idempotent - return true for existing session', () => { + const sessionId = 'instance-test-abc123-uuid-test-session-id2'; + + // First restoration + const result1 = engine.restoreSession(sessionId, testContext); + expect(result1).toBe(true); + + // Second restoration with same ID + const result2 = engine.restoreSession(sessionId, testContext); + expect(result2).toBe(true); + + // Should still only have one session + const sessionIds = engine.getActiveSessions(); + expect(sessionIds.filter(id => id === sessionId).length).toBe(1); + }); + + it('should return false for invalid session ID format', () => { + const invalidSessionIds = [ + 'short', // Too short (5 chars) + 'a'.repeat(101), // Too long (101 chars) + "'; DROP TABLE sessions--", // SQL injection attempt (invalid characters) + '../../../etc/passwd', // Path traversal attempt (invalid characters) + 'only-nineteen-chars' // Too short (19 chars, need 20+) + ]; + + invalidSessionIds.forEach(sessionId => { + const result = engine.restoreSession(sessionId, testContext); + expect(result).toBe(false); + }); + }); + + it('should return false for invalid instance context', () => { + const sessionId = 'instance-test-abc123-uuid-test-session-id3'; + const invalidContext = { + n8nApiUrl: 'not-a-valid-url', // Invalid URL + n8nApiKey: 'test-key', + instanceId: 'test' + } as any; + + const result = engine.restoreSession(sessionId, invalidContext); + expect(result).toBe(false); + }); + + it('should create session that can be retrieved with getSessionState', () => { + const sessionId = 'instance-test-abc123-uuid-test-session-id4'; + engine.restoreSession(sessionId, testContext); + + const state = engine.getSessionState(sessionId); + expect(state).not.toBeNull(); + expect(state?.sessionId).toBe(sessionId); + expect(state?.instanceContext).toEqual(testContext); + }); + }); + + describe('deleteSession()', () => { + it('should return false for non-existent session', () => { + const result = engine.deleteSession('non-existent-session-id'); + expect(result).toBe(false); + }); + + it('should delete existing session and return true', () => { + // Create a session using direct API + const sessionId = 'instance-test-abc123-uuid-delete-test'; + engine.restoreSession(sessionId, testContext); + + // Delete the session + const result = engine.deleteSession(sessionId); + expect(result).toBe(true); + + // Session should no longer exist + expect(engine.getActiveSessions()).not.toContain(sessionId); + expect(engine.getSessionState(sessionId)).toBeNull(); + }); + + it('should return false when trying to delete already deleted session', () => { + // Create and delete session using direct API + const sessionId = 'instance-test-abc123-uuid-double-delete-test'; + engine.restoreSession(sessionId, testContext); + + engine.deleteSession(sessionId); + + // Try to delete again + const result = engine.deleteSession(sessionId); + expect(result).toBe(false); + }); + }); + + describe('Integration workflows', () => { + it('should support periodic backup workflow', () => { + // Create multiple sessions using direct API + for (let i = 0; i < 3; i++) { + const sessionId = `instance-test${i}-abc123-uuid-backup-${i}`; + engine.restoreSession(sessionId, { + ...testContext, + instanceId: `instance-${i}` + }); + } + + // Simulate periodic backup + const states = engine.getAllSessionStates(); + expect(states.length).toBe(3); + + // Each state should be serializable + states.forEach(state => { + const serialized = JSON.stringify(state); + expect(serialized).toBeTruthy(); + + const deserialized = JSON.parse(serialized); + expect(deserialized.sessionId).toBe(state.sessionId); + }); + }); + + it('should support bulk restore workflow', () => { + const sessionData = [ + { sessionId: 'instance-test1-abc123-uuid-bulk-session-1', context: { ...testContext, instanceId: 'user-1' } }, + { sessionId: 'instance-test2-abc123-uuid-bulk-session-2', context: { ...testContext, instanceId: 'user-2' } }, + { sessionId: 'instance-test3-abc123-uuid-bulk-session-3', context: { ...testContext, instanceId: 'user-3' } } + ]; + + // Restore all sessions + for (const { sessionId, context } of sessionData) { + const restored = engine.restoreSession(sessionId, context); + expect(restored).toBe(true); + } + + // Verify all sessions exist + const sessionIds = engine.getActiveSessions(); + expect(sessionIds.length).toBe(3); + + sessionData.forEach(({ sessionId }) => { + expect(sessionIds).toContain(sessionId); + }); + }); + + it('should support session lifecycle workflow (create โ†’ get โ†’ delete)', () => { + // 1. Create session using direct API + const sessionId = 'instance-test-abc123-uuid-lifecycle-test'; + engine.restoreSession(sessionId, testContext); + + // 2. Get session state + const state = engine.getSessionState(sessionId); + expect(state).not.toBeNull(); + + // 3. Simulate saving to database (serialization test) + const serialized = JSON.stringify(state); + expect(serialized).toBeTruthy(); + + // 4. Delete session + const deleted = engine.deleteSession(sessionId); + expect(deleted).toBe(true); + + // 5. Verify deletion + expect(engine.getSessionState(sessionId)).toBeNull(); + expect(engine.getActiveSessions()).not.toContain(sessionId); + }); + }); +}); diff --git a/tests/unit/session-restoration.test.ts b/tests/unit/session-restoration.test.ts new file mode 100644 index 0000000..5334289 --- /dev/null +++ b/tests/unit/session-restoration.test.ts @@ -0,0 +1,545 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { SingleSessionHTTPServer } from '../../src/http-server-single-session'; +import { InstanceContext } from '../../src/types/instance-context'; +import { SessionRestoreHook } from '../../src/types/session-restoration'; + +// Mock dependencies +vi.mock('../../src/utils/logger', () => ({ + logger: { + info: vi.fn(), + error: vi.fn(), + warn: vi.fn(), + debug: vi.fn() + } +})); + +vi.mock('dotenv'); + +// Mock UUID generation to make tests predictable +vi.mock('uuid', () => ({ + v4: vi.fn(() => 'test-session-id-1234-5678-9012-345678901234') +})); + +// Mock transport +vi.mock('@modelcontextprotocol/sdk/server/streamableHttp.js', () => ({ + StreamableHTTPServerTransport: vi.fn().mockImplementation((options: any) => { + const mockTransport = { + handleRequest: vi.fn().mockImplementation(async (req: any, res: any, body?: any) => { + if (body && body.method === 'initialize') { + res.setHeader('Mcp-Session-Id', mockTransport.sessionId || 'test-session-id'); + } + res.status(200).json({ + jsonrpc: '2.0', + result: { success: true }, + id: body?.id || 1 + }); + }), + close: vi.fn().mockResolvedValue(undefined), + sessionId: null as string | null, + onclose: null as (() => void) | null + }; + + if (options?.sessionIdGenerator) { + const sessionId = options.sessionIdGenerator(); + mockTransport.sessionId = sessionId; + + if (options.onsessioninitialized) { + setTimeout(() => { + options.onsessioninitialized(sessionId); + }, 0); + } + } + + return mockTransport; + }) +})); + +vi.mock('@modelcontextprotocol/sdk/server/sse.js', () => ({ + SSEServerTransport: vi.fn().mockImplementation(() => ({ + close: vi.fn().mockResolvedValue(undefined) + })) +})); + +vi.mock('../../src/mcp/server', () => ({ + N8NDocumentationMCPServer: vi.fn().mockImplementation(() => ({ + connect: vi.fn().mockResolvedValue(undefined) + })) +})); + +const mockConsoleManager = { + wrapOperation: vi.fn().mockImplementation(async (fn: () => Promise) => { + return await fn(); + }) +}; + +vi.mock('../../src/utils/console-manager', () => ({ + ConsoleManager: vi.fn(() => mockConsoleManager) +})); + +vi.mock('../../src/utils/url-detector', () => ({ + getStartupBaseUrl: vi.fn((host: string, port: number) => `http://localhost:${port || 3000}`), + formatEndpointUrls: vi.fn((baseUrl: string) => ({ + health: `${baseUrl}/health`, + mcp: `${baseUrl}/mcp` + })), + detectBaseUrl: vi.fn((req: any, host: string, port: number) => `http://localhost:${port || 3000}`) +})); + +vi.mock('../../src/utils/version', () => ({ + PROJECT_VERSION: '2.19.0' +})); + +vi.mock('@modelcontextprotocol/sdk/types.js', () => ({ + isInitializeRequest: vi.fn((request: any) => { + return request && request.method === 'initialize'; + }) +})); + +// Create handlers storage for Express mock +const mockHandlers: { [key: string]: any[] } = { + get: [], + post: [], + delete: [], + use: [] +}; + +// Mock Express +vi.mock('express', () => { + const mockExpressApp = { + get: vi.fn((path: string, ...handlers: any[]) => { + mockHandlers.get.push({ path, handlers }); + return mockExpressApp; + }), + post: vi.fn((path: string, ...handlers: any[]) => { + mockHandlers.post.push({ path, handlers }); + return mockExpressApp; + }), + delete: vi.fn((path: string, ...handlers: any[]) => { + mockHandlers.delete.push({ path, handlers }); + return mockExpressApp; + }), + use: vi.fn((handler: any) => { + mockHandlers.use.push(handler); + return mockExpressApp; + }), + set: vi.fn(), + listen: vi.fn((port: number, host: string, callback?: () => void) => { + if (callback) callback(); + return { + on: vi.fn(), + close: vi.fn((cb: () => void) => cb()), + address: () => ({ port: 3000 }) + }; + }) + }; + + interface ExpressMock { + (): typeof mockExpressApp; + json(): (req: any, res: any, next: any) => void; + } + + const expressMock = vi.fn(() => mockExpressApp) as unknown as ExpressMock; + expressMock.json = vi.fn(() => (req: any, res: any, next: any) => { + req.body = req.body || {}; + next(); + }); + + return { + default: expressMock, + Request: {}, + Response: {}, + NextFunction: {} + }; +}); + +describe('Session Restoration (Phase 1 - REQ-1, REQ-2, REQ-8)', () => { + const originalEnv = process.env; + const TEST_AUTH_TOKEN = 'test-auth-token-with-more-than-32-characters'; + let server: SingleSessionHTTPServer; + let consoleLogSpy: any; + let consoleWarnSpy: any; + let consoleErrorSpy: any; + + beforeEach(() => { + // Reset environment + process.env = { ...originalEnv }; + process.env.AUTH_TOKEN = TEST_AUTH_TOKEN; + process.env.PORT = '0'; + process.env.NODE_ENV = 'test'; + + // Mock console methods + consoleLogSpy = vi.spyOn(console, 'log').mockImplementation(() => {}); + consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {}); + consoleErrorSpy = vi.spyOn(console, 'error').mockImplementation(() => {}); + + // Clear all mocks and handlers + vi.clearAllMocks(); + mockHandlers.get = []; + mockHandlers.post = []; + mockHandlers.delete = []; + mockHandlers.use = []; + }); + + afterEach(async () => { + // Restore environment + process.env = originalEnv; + + // Restore console methods + consoleLogSpy.mockRestore(); + consoleWarnSpy.mockRestore(); + consoleErrorSpy.mockRestore(); + + // Shutdown server if running + if (server) { + await server.shutdown(); + server = null as any; + } + }); + + // Helper functions + function findHandler(method: 'get' | 'post' | 'delete', path: string) { + const routes = mockHandlers[method]; + const route = routes.find(r => r.path === path); + return route ? route.handlers[route.handlers.length - 1] : null; + } + + function createMockReqRes() { + const headers: { [key: string]: string } = {}; + const res = { + status: vi.fn().mockReturnThis(), + json: vi.fn().mockReturnThis(), + send: vi.fn().mockReturnThis(), + setHeader: vi.fn((key: string, value: string) => { + headers[key.toLowerCase()] = value; + }), + sendStatus: vi.fn().mockReturnThis(), + headersSent: false, + finished: false, + statusCode: 200, + getHeader: (key: string) => headers[key.toLowerCase()], + headers + }; + + const req = { + method: 'POST', + path: '/mcp', + url: '/mcp', + originalUrl: '/mcp', + headers: {} as Record, + body: {}, + ip: '127.0.0.1', + readable: true, + readableEnded: false, + complete: true, + get: vi.fn((header: string) => (req.headers as Record)[header.toLowerCase()]) + }; + + return { req, res }; + } + + describe('REQ-8: Security-Hardened Session ID Validation', () => { + it('should accept valid UUIDv4 session IDs', () => { + server = new SingleSessionHTTPServer(); + + const validUUIDs = [ + '550e8400-e29b-41d4-a716-446655440000', + 'f47ac10b-58cc-4372-a567-0e02b2c3d479', + 'a1b2c3d4-e5f6-4789-abcd-1234567890ab' + ]; + + for (const sessionId of validUUIDs) { + expect((server as any).isValidSessionId(sessionId)).toBe(true); + } + }); + + it('should accept multi-tenant instance session IDs', () => { + server = new SingleSessionHTTPServer(); + + const multiTenantIds = [ + 'instance-user123-abc-550e8400-e29b-41d4-a716-446655440000', + 'instance-tenant456-xyz-f47ac10b-58cc-4372-a567-0e02b2c3d479' + ]; + + for (const sessionId of multiTenantIds) { + expect((server as any).isValidSessionId(sessionId)).toBe(true); + } + }); + + it('should reject session IDs with SQL injection patterns', () => { + server = new SingleSessionHTTPServer(); + + const sqlInjectionIds = [ + "'; DROP TABLE sessions; --", + "1' OR '1'='1", + "admin'--", + "1'; DELETE FROM sessions WHERE '1'='1" + ]; + + for (const sessionId of sqlInjectionIds) { + expect((server as any).isValidSessionId(sessionId)).toBe(false); + } + }); + + it('should reject session IDs with NoSQL injection patterns', () => { + server = new SingleSessionHTTPServer(); + + const nosqlInjectionIds = [ + '{"$ne": null}', + '{"$gt": ""}', + '{$where: "1==1"}', + '[$regex]' + ]; + + for (const sessionId of nosqlInjectionIds) { + expect((server as any).isValidSessionId(sessionId)).toBe(false); + } + }); + + it('should reject session IDs with path traversal attempts', () => { + server = new SingleSessionHTTPServer(); + + const pathTraversalIds = [ + '../../../etc/passwd', + '..\\..\\..\\windows\\system32', + 'session/../admin', + 'session/./../../config' + ]; + + for (const sessionId of pathTraversalIds) { + expect((server as any).isValidSessionId(sessionId)).toBe(false); + } + }); + + it('should reject session IDs that are too short (DoS protection)', () => { + server = new SingleSessionHTTPServer(); + + const tooShortIds = [ + 'a', + 'ab', + '123', + '12345678901234567' // 17 chars (minimum is 20) + ]; + + for (const sessionId of tooShortIds) { + expect((server as any).isValidSessionId(sessionId)).toBe(false); + } + }); + + it('should reject session IDs that are too long (DoS protection)', () => { + server = new SingleSessionHTTPServer(); + + const tooLongId = 'a'.repeat(101); // Maximum is 100 chars + expect((server as any).isValidSessionId(tooLongId)).toBe(false); + }); + + it('should reject empty or null session IDs', () => { + server = new SingleSessionHTTPServer(); + + expect((server as any).isValidSessionId('')).toBe(false); + expect((server as any).isValidSessionId(null)).toBe(false); + expect((server as any).isValidSessionId(undefined)).toBe(false); + }); + + it('should reject session IDs with special characters', () => { + server = new SingleSessionHTTPServer(); + + const specialCharIds = [ + 'session', + 'session!@#$%^&*()', + 'session\x00null-byte', + 'session\r\nnewline' + ]; + + for (const sessionId of specialCharIds) { + expect((server as any).isValidSessionId(sessionId)).toBe(false); + } + }); + }); + + describe('REQ-2: Idempotent Session Creation', () => { + it('should return same session ID for multiple concurrent createSession calls', async () => { + const mockContext: InstanceContext = { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-api-key', + instanceId: 'tenant-123' + }; + + server = new SingleSessionHTTPServer(); + + const sessionId = 'instance-tenant123-abc-550e8400-e29b-41d4-a716-446655440000'; + + // Call createSession multiple times with same session ID + const id1 = (server as any).createSession(mockContext, sessionId); + const id2 = (server as any).createSession(mockContext, sessionId); + const id3 = (server as any).createSession(mockContext, sessionId); + + // All calls should return the same session ID (idempotent) + expect(id1).toBe(sessionId); + expect(id2).toBe(sessionId); + expect(id3).toBe(sessionId); + + // NOTE: Transport creation is async via callback - tested in integration tests + }); + + it('should skip session creation if session already exists', async () => { + const mockContext: InstanceContext = { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-api-key', + instanceId: 'tenant-123' + }; + + server = new SingleSessionHTTPServer(); + + const sessionId = '550e8400-e29b-41d4-a716-446655440000'; + + // Create session first time + (server as any).createSession(mockContext, sessionId); + const transport1 = (server as any).transports[sessionId]; + + // Try to create again + (server as any).createSession(mockContext, sessionId); + const transport2 = (server as any).transports[sessionId]; + + // Should be the same transport instance + expect(transport1).toBe(transport2); + }); + + it('should validate session ID format when provided externally', async () => { + const mockContext: InstanceContext = { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-api-key', + instanceId: 'tenant-123' + }; + + server = new SingleSessionHTTPServer(); + + const invalidSessionId = "'; DROP TABLE sessions; --"; + + expect(() => { + (server as any).createSession(mockContext, invalidSessionId); + }).toThrow('Invalid session ID format'); + }); + }); + + describe('REQ-1: Session Restoration Hook Configuration', () => { + it('should store restoration hook when provided', () => { + const mockHook: SessionRestoreHook = vi.fn().mockResolvedValue({ + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-api-key', + instanceId: 'tenant-123' + }); + + server = new SingleSessionHTTPServer({ + onSessionNotFound: mockHook, + sessionRestorationTimeout: 5000 + }); + + // Verify hook is stored + expect((server as any).onSessionNotFound).toBe(mockHook); + expect((server as any).sessionRestorationTimeout).toBe(5000); + }); + + it('should work without restoration hook (backward compatible)', () => { + server = new SingleSessionHTTPServer(); + + // Verify hook is not configured + expect((server as any).onSessionNotFound).toBeUndefined(); + }); + + // NOTE: Full restoration flow tests (success, failure, timeout, validation) + // are in tests/integration/session-persistence.test.ts which tests the complete + // end-to-end flow with real HTTP requests + }); + + describe('Backwards Compatibility', () => { + it('should use default timeout when not specified', () => { + server = new SingleSessionHTTPServer({ + onSessionNotFound: vi.fn() + }); + + expect((server as any).sessionRestorationTimeout).toBe(5000); + }); + + it('should use custom timeout when specified', () => { + server = new SingleSessionHTTPServer({ + onSessionNotFound: vi.fn(), + sessionRestorationTimeout: 10000 + }); + + expect((server as any).sessionRestorationTimeout).toBe(10000); + }); + + it('should work without any restoration options', () => { + server = new SingleSessionHTTPServer(); + + expect((server as any).onSessionNotFound).toBeUndefined(); + expect((server as any).sessionRestorationTimeout).toBe(5000); + }); + }); + + describe('Timeout Utility Method', () => { + it('should reject after specified timeout', async () => { + server = new SingleSessionHTTPServer(); + + const timeoutPromise = (server as any).timeout(100); + + await expect(timeoutPromise).rejects.toThrow('Operation timed out after 100ms'); + }); + + it('should create TimeoutError', async () => { + server = new SingleSessionHTTPServer(); + + try { + await (server as any).timeout(50); + expect.fail('Should have thrown TimeoutError'); + } catch (error: any) { + expect(error.name).toBe('TimeoutError'); + expect(error.message).toContain('timed out'); + } + }); + }); + + describe('Session ID Generation', () => { + it('should generate valid session IDs', () => { + // Set environment for multi-tenant mode + process.env.ENABLE_MULTI_TENANT = 'true'; + process.env.MULTI_TENANT_SESSION_STRATEGY = 'instance'; + + server = new SingleSessionHTTPServer(); + + const context: InstanceContext = { + n8nApiUrl: 'https://test.n8n.cloud', + n8nApiKey: 'test-api-key', + instanceId: 'tenant-123' + }; + + const sessionId = (server as any).generateSessionId(context); + + // Should generate instance-prefixed ID in multi-tenant mode + expect(sessionId).toContain('instance-'); + expect((server as any).isValidSessionId(sessionId)).toBe(true); + + // Clean up env + delete process.env.ENABLE_MULTI_TENANT; + delete process.env.MULTI_TENANT_SESSION_STRATEGY; + }); + + it('should generate standard UUIDs when not in multi-tenant mode', () => { + // Ensure multi-tenant mode is disabled + delete process.env.ENABLE_MULTI_TENANT; + + server = new SingleSessionHTTPServer(); + + const sessionId = (server as any).generateSessionId(); + + // Should be a UUID format (mocked in tests but should be non-empty string with hyphens) + expect(sessionId).toBeTruthy(); + expect(typeof sessionId).toBe('string'); + expect(sessionId.length).toBeGreaterThan(20); // At minimum should be longer than minimum session ID length + expect(sessionId).toContain('-'); + + // NOTE: In tests, UUID is mocked so it may not pass strict validation + // In production, generateSessionId uses real uuid.v4() which generates valid UUIDs + }); + }); +});