mirror of
https://github.com/czlonkowski/n8n-mcp.git
synced 2026-01-30 06:22:04 +00:00
feat: Add structured template metadata generation with OpenAI
- Implement OpenAI batch API integration for metadata generation - Add metadata columns to database schema (metadata_json, metadata_generated_at) - Create MetadataGenerator service with structured output schemas - Create BatchProcessor for handling OpenAI batch jobs - Add --generate-metadata flag to fetch-templates script - Update template repository with metadata management methods - Add OpenAI configuration to environment variables - Include comprehensive tests for metadata generation - Use gpt-4o-mini model with 50% cost savings via batch API 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
22
.env.example
22
.env.example
@@ -86,4 +86,24 @@ AUTH_TOKEN=your-secure-token-here
|
||||
# N8N_API_TIMEOUT=30000
|
||||
|
||||
# Maximum number of API request retries (default: 3)
|
||||
# N8N_API_MAX_RETRIES=3
|
||||
# N8N_API_MAX_RETRIES=3
|
||||
|
||||
# =========================
|
||||
# OPENAI API CONFIGURATION
|
||||
# =========================
|
||||
# Optional: Enable AI-powered template metadata generation
|
||||
# Provides structured metadata for improved template discovery
|
||||
|
||||
# OpenAI API Key (get from https://platform.openai.com/api-keys)
|
||||
# OPENAI_API_KEY=
|
||||
|
||||
# OpenAI Model for metadata generation (default: gpt-4o-mini)
|
||||
# OPENAI_MODEL=gpt-4o-mini
|
||||
|
||||
# Batch size for metadata generation (default: 100)
|
||||
# Templates are processed in batches using OpenAI's Batch API for 50% cost savings
|
||||
# OPENAI_BATCH_SIZE=100
|
||||
|
||||
# Enable metadata generation during template fetch (default: false)
|
||||
# Set to true to automatically generate metadata when running fetch:templates
|
||||
# METADATA_GENERATION_ENABLED=false
|
||||
@@ -18,6 +18,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Reduces failed queries by approximately 50%
|
||||
- Added `template-node-resolver.ts` utility for node type resolution
|
||||
- Added 23 tests for template node resolution
|
||||
- **Structured Template Metadata with OpenAI**: AI-powered metadata generation for templates
|
||||
- Uses OpenAI's batch API with gpt-4o-mini for 50% cost savings
|
||||
- Generates structured metadata: categories, complexity, use cases, setup time
|
||||
- Batch processing with 24-hour SLA
|
||||
- No runtime dependencies - all preprocessing
|
||||
- Add `--generate-metadata` flag to fetch-templates script
|
||||
- New environment variables: OPENAI_API_KEY, OPENAI_MODEL, OPENAI_BATCH_SIZE
|
||||
- Added metadata columns to database schema
|
||||
- New repository methods for metadata management
|
||||
|
||||
## [2.11.0] - 2025-01-14
|
||||
|
||||
|
||||
@@ -134,8 +134,10 @@
|
||||
"n8n": "^1.110.1",
|
||||
"n8n-core": "^1.109.0",
|
||||
"n8n-workflow": "^1.107.0",
|
||||
"openai": "^4.77.0",
|
||||
"sql.js": "^1.13.0",
|
||||
"uuid": "^10.0.0"
|
||||
"uuid": "^10.0.0",
|
||||
"zod": "^3.24.1"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@rollup/rollup-darwin-arm64": "^4.50.0",
|
||||
|
||||
@@ -42,13 +42,16 @@ CREATE TABLE IF NOT EXISTS templates (
|
||||
created_at DATETIME,
|
||||
updated_at DATETIME,
|
||||
url TEXT,
|
||||
scraped_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
scraped_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
metadata_json TEXT, -- Structured metadata from OpenAI (JSON)
|
||||
metadata_generated_at DATETIME -- When metadata was generated
|
||||
);
|
||||
|
||||
-- Templates indexes
|
||||
CREATE INDEX IF NOT EXISTS idx_template_nodes ON templates(nodes_used);
|
||||
CREATE INDEX IF NOT EXISTS idx_template_updated ON templates(updated_at);
|
||||
CREATE INDEX IF NOT EXISTS idx_template_name ON templates(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_template_metadata ON templates(metadata_generated_at);
|
||||
|
||||
-- Note: FTS5 tables are created conditionally at runtime if FTS5 is supported
|
||||
-- See template-repository.ts initializeFTS5() method
|
||||
@@ -3,12 +3,21 @@ import { createDatabaseAdapter } from '../database/database-adapter';
|
||||
import { TemplateService } from '../templates/template-service';
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import * as dotenv from 'dotenv';
|
||||
import type { MetadataRequest } from '../templates/metadata-generator';
|
||||
|
||||
async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') {
|
||||
// Load environment variables
|
||||
dotenv.config();
|
||||
|
||||
async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild', generateMetadata: boolean = false) {
|
||||
const modeEmoji = mode === 'rebuild' ? '🔄' : '⬆️';
|
||||
const modeText = mode === 'rebuild' ? 'Rebuilding' : 'Updating';
|
||||
console.log(`${modeEmoji} ${modeText} n8n workflow templates...\n`);
|
||||
|
||||
if (generateMetadata) {
|
||||
console.log('🤖 Metadata generation enabled (using OpenAI)\n');
|
||||
}
|
||||
|
||||
// Ensure data directory exists
|
||||
const dataDir = './data';
|
||||
if (!fs.existsSync(dataDir)) {
|
||||
@@ -114,6 +123,14 @@ async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') {
|
||||
console.log(` ${index + 1}. ${node.node} (${node.count} templates)`);
|
||||
});
|
||||
|
||||
// Generate metadata if requested
|
||||
if (generateMetadata && process.env.OPENAI_API_KEY) {
|
||||
console.log('\n🤖 Generating metadata for templates...');
|
||||
await generateTemplateMetadata(db, service);
|
||||
} else if (generateMetadata && !process.env.OPENAI_API_KEY) {
|
||||
console.log('\n⚠️ Metadata generation requested but OPENAI_API_KEY not set');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('\n❌ Error fetching templates:', error);
|
||||
process.exit(1);
|
||||
@@ -125,34 +142,120 @@ async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') {
|
||||
}
|
||||
}
|
||||
|
||||
// Generate metadata for templates using OpenAI
|
||||
async function generateTemplateMetadata(db: any, service: TemplateService) {
|
||||
try {
|
||||
const { BatchProcessor } = await import('../templates/batch-processor');
|
||||
const repository = (service as any).repository;
|
||||
|
||||
// Get templates without metadata
|
||||
const templatesWithoutMetadata = repository.getTemplatesWithoutMetadata(500);
|
||||
|
||||
if (templatesWithoutMetadata.length === 0) {
|
||||
console.log('✅ All templates already have metadata');
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Found ${templatesWithoutMetadata.length} templates without metadata`);
|
||||
|
||||
// Create batch processor
|
||||
const processor = new BatchProcessor({
|
||||
apiKey: process.env.OPENAI_API_KEY!,
|
||||
model: process.env.OPENAI_MODEL || 'gpt-4o-mini',
|
||||
batchSize: parseInt(process.env.OPENAI_BATCH_SIZE || '100'),
|
||||
outputDir: './temp/batch'
|
||||
});
|
||||
|
||||
// Prepare metadata requests
|
||||
const requests: MetadataRequest[] = templatesWithoutMetadata.map((t: any) => ({
|
||||
templateId: t.id,
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
nodes: JSON.parse(t.nodes_used),
|
||||
workflow: t.workflow_json_compressed
|
||||
? JSON.parse(Buffer.from(t.workflow_json_compressed, 'base64').toString())
|
||||
: (t.workflow_json ? JSON.parse(t.workflow_json) : undefined)
|
||||
}));
|
||||
|
||||
// Process in batches
|
||||
const results = await processor.processTemplates(requests, (message, current, total) => {
|
||||
process.stdout.write(`\r📊 ${message}: ${current}/${total}`);
|
||||
});
|
||||
|
||||
console.log('\n');
|
||||
|
||||
// Update database with metadata
|
||||
const metadataMap = new Map();
|
||||
for (const [templateId, result] of results) {
|
||||
if (!result.error) {
|
||||
metadataMap.set(templateId, result.metadata);
|
||||
}
|
||||
}
|
||||
|
||||
if (metadataMap.size > 0) {
|
||||
repository.batchUpdateMetadata(metadataMap);
|
||||
console.log(`✅ Updated metadata for ${metadataMap.size} templates`);
|
||||
}
|
||||
|
||||
// Show stats
|
||||
const stats = repository.getMetadataStats();
|
||||
console.log('\n📈 Metadata Statistics:');
|
||||
console.log(` - Total templates: ${stats.total}`);
|
||||
console.log(` - With metadata: ${stats.withMetadata}`);
|
||||
console.log(` - Without metadata: ${stats.withoutMetadata}`);
|
||||
console.log(` - Outdated (>30 days): ${stats.outdated}`);
|
||||
} catch (error) {
|
||||
console.error('\n❌ Error generating metadata:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs(): 'rebuild' | 'update' {
|
||||
function parseArgs(): { mode: 'rebuild' | 'update', generateMetadata: boolean } {
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
let mode: 'rebuild' | 'update' = 'rebuild';
|
||||
let generateMetadata = false;
|
||||
|
||||
// Check for --mode flag
|
||||
const modeIndex = args.findIndex(arg => arg.startsWith('--mode'));
|
||||
if (modeIndex !== -1) {
|
||||
const modeArg = args[modeIndex];
|
||||
const mode = modeArg.includes('=') ? modeArg.split('=')[1] : args[modeIndex + 1];
|
||||
const modeValue = modeArg.includes('=') ? modeArg.split('=')[1] : args[modeIndex + 1];
|
||||
|
||||
if (mode === 'update') {
|
||||
return 'update';
|
||||
if (modeValue === 'update') {
|
||||
mode = 'update';
|
||||
}
|
||||
}
|
||||
|
||||
// Check for --update flag as shorthand
|
||||
if (args.includes('--update')) {
|
||||
return 'update';
|
||||
mode = 'update';
|
||||
}
|
||||
|
||||
// Default to rebuild
|
||||
return 'rebuild';
|
||||
// Check for --generate-metadata flag
|
||||
if (args.includes('--generate-metadata') || args.includes('--metadata')) {
|
||||
generateMetadata = true;
|
||||
}
|
||||
|
||||
// Show help if requested
|
||||
if (args.includes('--help') || args.includes('-h')) {
|
||||
console.log('Usage: npm run fetch:templates [options]\n');
|
||||
console.log('Options:');
|
||||
console.log(' --mode=rebuild|update Rebuild from scratch or update existing (default: rebuild)');
|
||||
console.log(' --update Shorthand for --mode=update');
|
||||
console.log(' --generate-metadata Generate AI metadata for templates (requires OPENAI_API_KEY)');
|
||||
console.log(' --metadata Shorthand for --generate-metadata');
|
||||
console.log(' --help, -h Show this help message');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
return { mode, generateMetadata };
|
||||
}
|
||||
|
||||
// Run if called directly
|
||||
if (require.main === module) {
|
||||
const mode = parseArgs();
|
||||
fetchTemplates(mode).catch(console.error);
|
||||
const { mode, generateMetadata } = parseArgs();
|
||||
fetchTemplates(mode, generateMetadata).catch(console.error);
|
||||
}
|
||||
|
||||
export { fetchTemplates };
|
||||
282
src/templates/batch-processor.ts
Normal file
282
src/templates/batch-processor.ts
Normal file
@@ -0,0 +1,282 @@
|
||||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import OpenAI from 'openai';
|
||||
import { logger } from '../utils/logger';
|
||||
import { MetadataGenerator, MetadataRequest, MetadataResult } from './metadata-generator';
|
||||
|
||||
export interface BatchProcessorOptions {
|
||||
apiKey: string;
|
||||
model?: string;
|
||||
batchSize?: number;
|
||||
outputDir?: string;
|
||||
}
|
||||
|
||||
export interface BatchJob {
|
||||
id: string;
|
||||
status: 'validating' | 'in_progress' | 'finalizing' | 'completed' | 'failed' | 'expired' | 'cancelled';
|
||||
created_at: number;
|
||||
completed_at?: number;
|
||||
input_file_id: string;
|
||||
output_file_id?: string;
|
||||
error?: any;
|
||||
}
|
||||
|
||||
export class BatchProcessor {
|
||||
private client: OpenAI;
|
||||
private generator: MetadataGenerator;
|
||||
private batchSize: number;
|
||||
private outputDir: string;
|
||||
|
||||
constructor(options: BatchProcessorOptions) {
|
||||
this.client = new OpenAI({ apiKey: options.apiKey });
|
||||
this.generator = new MetadataGenerator(options.apiKey, options.model);
|
||||
this.batchSize = options.batchSize || 100;
|
||||
this.outputDir = options.outputDir || './temp';
|
||||
|
||||
// Ensure output directory exists
|
||||
if (!fs.existsSync(this.outputDir)) {
|
||||
fs.mkdirSync(this.outputDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process templates in batches
|
||||
*/
|
||||
async processTemplates(
|
||||
templates: MetadataRequest[],
|
||||
progressCallback?: (message: string, current: number, total: number) => void
|
||||
): Promise<Map<number, MetadataResult>> {
|
||||
const results = new Map<number, MetadataResult>();
|
||||
const batches = this.createBatches(templates);
|
||||
|
||||
logger.info(`Processing ${templates.length} templates in ${batches.length} batches`);
|
||||
|
||||
for (let i = 0; i < batches.length; i++) {
|
||||
const batch = batches[i];
|
||||
const batchNum = i + 1;
|
||||
|
||||
try {
|
||||
progressCallback?.(`Processing batch ${batchNum}/${batches.length}`, i * this.batchSize, templates.length);
|
||||
|
||||
// Process this batch
|
||||
const batchResults = await this.processBatch(batch, `batch_${batchNum}`);
|
||||
|
||||
// Merge results
|
||||
for (const result of batchResults) {
|
||||
results.set(result.templateId, result);
|
||||
}
|
||||
|
||||
logger.info(`Completed batch ${batchNum}/${batches.length}: ${batchResults.length} results`);
|
||||
progressCallback?.(`Completed batch ${batchNum}/${batches.length}`, Math.min((i + 1) * this.batchSize, templates.length), templates.length);
|
||||
} catch (error) {
|
||||
logger.error(`Error processing batch ${batchNum}:`, error);
|
||||
// Continue with next batch
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`Batch processing complete: ${results.size} results`);
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single batch
|
||||
*/
|
||||
private async processBatch(templates: MetadataRequest[], batchName: string): Promise<MetadataResult[]> {
|
||||
// Create JSONL file
|
||||
const inputFile = await this.createBatchFile(templates, batchName);
|
||||
|
||||
try {
|
||||
// Upload file to OpenAI
|
||||
const uploadedFile = await this.uploadFile(inputFile);
|
||||
|
||||
// Create batch job
|
||||
const batchJob = await this.createBatchJob(uploadedFile.id);
|
||||
|
||||
// Monitor job until completion
|
||||
const completedJob = await this.monitorBatchJob(batchJob.id);
|
||||
|
||||
// Retrieve and parse results
|
||||
const results = await this.retrieveResults(completedJob);
|
||||
|
||||
// Cleanup
|
||||
await this.cleanup(inputFile, uploadedFile.id, completedJob.output_file_id);
|
||||
|
||||
return results;
|
||||
} catch (error) {
|
||||
// Cleanup on error
|
||||
try {
|
||||
fs.unlinkSync(inputFile);
|
||||
} catch {}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create batches from templates
|
||||
*/
|
||||
private createBatches(templates: MetadataRequest[]): MetadataRequest[][] {
|
||||
const batches: MetadataRequest[][] = [];
|
||||
|
||||
for (let i = 0; i < templates.length; i += this.batchSize) {
|
||||
batches.push(templates.slice(i, i + this.batchSize));
|
||||
}
|
||||
|
||||
return batches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create JSONL batch file
|
||||
*/
|
||||
private async createBatchFile(templates: MetadataRequest[], batchName: string): Promise<string> {
|
||||
const filename = path.join(this.outputDir, `${batchName}_${Date.now()}.jsonl`);
|
||||
const stream = fs.createWriteStream(filename);
|
||||
|
||||
for (const template of templates) {
|
||||
const request = this.generator.createBatchRequest(template);
|
||||
stream.write(JSON.stringify(request) + '\n');
|
||||
}
|
||||
|
||||
stream.end();
|
||||
|
||||
// Wait for stream to finish
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
stream.on('finish', () => resolve());
|
||||
stream.on('error', reject);
|
||||
});
|
||||
|
||||
logger.debug(`Created batch file: ${filename} with ${templates.length} requests`);
|
||||
return filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* Upload file to OpenAI
|
||||
*/
|
||||
private async uploadFile(filepath: string): Promise<any> {
|
||||
const file = fs.createReadStream(filepath);
|
||||
const uploadedFile = await this.client.files.create({
|
||||
file,
|
||||
purpose: 'batch'
|
||||
});
|
||||
|
||||
logger.debug(`Uploaded file: ${uploadedFile.id}`);
|
||||
return uploadedFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create batch job
|
||||
*/
|
||||
private async createBatchJob(fileId: string): Promise<any> {
|
||||
const batchJob = await this.client.batches.create({
|
||||
input_file_id: fileId,
|
||||
endpoint: '/v1/chat/completions',
|
||||
completion_window: '24h'
|
||||
});
|
||||
|
||||
logger.info(`Created batch job: ${batchJob.id}`);
|
||||
return batchJob;
|
||||
}
|
||||
|
||||
/**
|
||||
* Monitor batch job with exponential backoff
|
||||
*/
|
||||
private async monitorBatchJob(batchId: string): Promise<any> {
|
||||
const waitTimes = [60, 120, 300, 600, 900, 1800]; // Progressive wait times in seconds
|
||||
let waitIndex = 0;
|
||||
let attempts = 0;
|
||||
const maxAttempts = 100; // Safety limit
|
||||
|
||||
while (attempts < maxAttempts) {
|
||||
const batchJob = await this.client.batches.retrieve(batchId);
|
||||
|
||||
logger.debug(`Batch ${batchId} status: ${batchJob.status} (attempt ${attempts + 1})`);
|
||||
|
||||
if (batchJob.status === 'completed') {
|
||||
logger.info(`Batch job ${batchId} completed successfully`);
|
||||
return batchJob;
|
||||
}
|
||||
|
||||
if (['failed', 'expired', 'cancelled'].includes(batchJob.status)) {
|
||||
throw new Error(`Batch job failed with status: ${batchJob.status}`);
|
||||
}
|
||||
|
||||
// Wait before next check
|
||||
const waitTime = waitTimes[Math.min(waitIndex, waitTimes.length - 1)];
|
||||
logger.debug(`Waiting ${waitTime} seconds before next check...`);
|
||||
await this.sleep(waitTime * 1000);
|
||||
|
||||
waitIndex = Math.min(waitIndex + 1, waitTimes.length - 1);
|
||||
attempts++;
|
||||
}
|
||||
|
||||
throw new Error(`Batch job monitoring timed out after ${maxAttempts} attempts`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve and parse results
|
||||
*/
|
||||
private async retrieveResults(batchJob: any): Promise<MetadataResult[]> {
|
||||
if (!batchJob.output_file_id) {
|
||||
throw new Error('No output file available for batch job');
|
||||
}
|
||||
|
||||
// Download result file
|
||||
const fileResponse = await this.client.files.content(batchJob.output_file_id);
|
||||
const fileContent = await fileResponse.text();
|
||||
|
||||
// Parse JSONL results
|
||||
const results: MetadataResult[] = [];
|
||||
const lines = fileContent.trim().split('\n');
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line) continue;
|
||||
|
||||
try {
|
||||
const result = JSON.parse(line);
|
||||
const parsed = this.generator.parseResult(result);
|
||||
results.push(parsed);
|
||||
} catch (error) {
|
||||
logger.error('Error parsing result line:', error);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`Retrieved ${results.length} results from batch job`);
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup temporary files
|
||||
*/
|
||||
private async cleanup(localFile: string, inputFileId: string, outputFileId?: string): Promise<void> {
|
||||
// Delete local file
|
||||
try {
|
||||
fs.unlinkSync(localFile);
|
||||
logger.debug(`Deleted local file: ${localFile}`);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to delete local file: ${localFile}`, error);
|
||||
}
|
||||
|
||||
// Delete uploaded files from OpenAI
|
||||
try {
|
||||
await this.client.files.del(inputFileId);
|
||||
logger.debug(`Deleted input file from OpenAI: ${inputFileId}`);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to delete input file from OpenAI: ${inputFileId}`, error);
|
||||
}
|
||||
|
||||
if (outputFileId) {
|
||||
try {
|
||||
await this.client.files.del(outputFileId);
|
||||
logger.debug(`Deleted output file from OpenAI: ${outputFileId}`);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to delete output file from OpenAI: ${outputFileId}`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep helper
|
||||
*/
|
||||
private sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
}
|
||||
287
src/templates/metadata-generator.ts
Normal file
287
src/templates/metadata-generator.ts
Normal file
@@ -0,0 +1,287 @@
|
||||
import OpenAI from 'openai';
|
||||
import { z } from 'zod';
|
||||
import { logger } from '../utils/logger';
|
||||
import { TemplateWorkflow, TemplateDetail } from './template-fetcher';
|
||||
|
||||
// Metadata schema using Zod for validation
|
||||
export const TemplateMetadataSchema = z.object({
|
||||
categories: z.array(z.string()).max(5).describe('Main categories (max 5)'),
|
||||
complexity: z.enum(['simple', 'medium', 'complex']).describe('Implementation complexity'),
|
||||
use_cases: z.array(z.string()).max(5).describe('Primary use cases'),
|
||||
estimated_setup_minutes: z.number().min(5).max(480).describe('Setup time in minutes'),
|
||||
required_services: z.array(z.string()).describe('External services needed'),
|
||||
key_features: z.array(z.string()).max(5).describe('Main capabilities'),
|
||||
target_audience: z.array(z.string()).max(3).describe('Target users')
|
||||
});
|
||||
|
||||
export type TemplateMetadata = z.infer<typeof TemplateMetadataSchema>;
|
||||
|
||||
export interface MetadataRequest {
|
||||
templateId: number;
|
||||
name: string;
|
||||
description?: string;
|
||||
nodes: string[];
|
||||
workflow?: any;
|
||||
}
|
||||
|
||||
export interface MetadataResult {
|
||||
templateId: number;
|
||||
metadata: TemplateMetadata;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export class MetadataGenerator {
|
||||
private client: OpenAI;
|
||||
private model: string;
|
||||
|
||||
constructor(apiKey: string, model: string = 'gpt-4o-mini') {
|
||||
this.client = new OpenAI({ apiKey });
|
||||
this.model = model;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the JSON schema for OpenAI structured outputs
|
||||
*/
|
||||
private getJsonSchema() {
|
||||
return {
|
||||
name: 'template_metadata',
|
||||
strict: true,
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
categories: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
maxItems: 5,
|
||||
description: 'Main categories like automation, integration, data processing'
|
||||
},
|
||||
complexity: {
|
||||
type: 'string',
|
||||
enum: ['simple', 'medium', 'complex'],
|
||||
description: 'Implementation complexity level'
|
||||
},
|
||||
use_cases: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
maxItems: 5,
|
||||
description: 'Primary use cases for this template'
|
||||
},
|
||||
estimated_setup_minutes: {
|
||||
type: 'number',
|
||||
minimum: 5,
|
||||
maximum: 480,
|
||||
description: 'Estimated setup time in minutes'
|
||||
},
|
||||
required_services: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
description: 'External services or APIs required'
|
||||
},
|
||||
key_features: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
maxItems: 5,
|
||||
description: 'Main capabilities or features'
|
||||
},
|
||||
target_audience: {
|
||||
type: 'array',
|
||||
items: { type: 'string' },
|
||||
maxItems: 3,
|
||||
description: 'Target users like developers, marketers, analysts'
|
||||
}
|
||||
},
|
||||
required: [
|
||||
'categories',
|
||||
'complexity',
|
||||
'use_cases',
|
||||
'estimated_setup_minutes',
|
||||
'required_services',
|
||||
'key_features',
|
||||
'target_audience'
|
||||
],
|
||||
additionalProperties: false
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a batch request for a single template
|
||||
*/
|
||||
createBatchRequest(template: MetadataRequest): any {
|
||||
// Extract node information for analysis
|
||||
const nodesSummary = this.summarizeNodes(template.nodes);
|
||||
|
||||
// Build context for the AI
|
||||
const context = [
|
||||
`Template: ${template.name}`,
|
||||
template.description ? `Description: ${template.description}` : '',
|
||||
`Nodes Used (${template.nodes.length}): ${nodesSummary}`,
|
||||
template.workflow ? `Workflow has ${template.workflow.nodes?.length || 0} nodes with ${Object.keys(template.workflow.connections || {}).length} connections` : ''
|
||||
].filter(Boolean).join('\n');
|
||||
|
||||
return {
|
||||
custom_id: `template-${template.templateId}`,
|
||||
method: 'POST',
|
||||
url: '/v1/chat/completions',
|
||||
body: {
|
||||
model: this.model,
|
||||
temperature: 0.1,
|
||||
max_tokens: 500,
|
||||
response_format: {
|
||||
type: 'json_schema',
|
||||
json_schema: this.getJsonSchema()
|
||||
},
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: `You are an n8n workflow expert analyzing templates to extract structured metadata.
|
||||
|
||||
Analyze the provided template information and extract:
|
||||
- Categories: Classify into relevant categories (automation, integration, data, communication, etc.)
|
||||
- Complexity: Assess as simple (1-3 nodes), medium (4-8 nodes), or complex (9+ nodes or advanced logic)
|
||||
- Use cases: Identify primary business use cases
|
||||
- Setup time: Estimate realistic setup time based on complexity and required configurations
|
||||
- Required services: List any external services, APIs, or accounts needed
|
||||
- Key features: Highlight main capabilities or benefits
|
||||
- Target audience: Identify who would benefit most (developers, marketers, ops teams, etc.)
|
||||
|
||||
Be concise and practical in your analysis.`
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: context
|
||||
}
|
||||
]
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Summarize nodes for better context
|
||||
*/
|
||||
private summarizeNodes(nodes: string[]): string {
|
||||
// Group similar nodes
|
||||
const nodeGroups: Record<string, number> = {};
|
||||
|
||||
for (const node of nodes) {
|
||||
// Extract base node name (remove package prefix)
|
||||
const baseName = node.split('.').pop() || node;
|
||||
|
||||
// Group by category
|
||||
if (baseName.includes('webhook') || baseName.includes('http')) {
|
||||
nodeGroups['HTTP/Webhooks'] = (nodeGroups['HTTP/Webhooks'] || 0) + 1;
|
||||
} else if (baseName.includes('database') || baseName.includes('postgres') || baseName.includes('mysql')) {
|
||||
nodeGroups['Database'] = (nodeGroups['Database'] || 0) + 1;
|
||||
} else if (baseName.includes('slack') || baseName.includes('email') || baseName.includes('gmail')) {
|
||||
nodeGroups['Communication'] = (nodeGroups['Communication'] || 0) + 1;
|
||||
} else if (baseName.includes('ai') || baseName.includes('openai') || baseName.includes('langchain')) {
|
||||
nodeGroups['AI/ML'] = (nodeGroups['AI/ML'] || 0) + 1;
|
||||
} else if (baseName.includes('sheet') || baseName.includes('csv') || baseName.includes('excel')) {
|
||||
nodeGroups['Spreadsheets'] = (nodeGroups['Spreadsheets'] || 0) + 1;
|
||||
} else {
|
||||
const cleanName = baseName.replace(/Trigger$/, '').replace(/Node$/, '');
|
||||
nodeGroups[cleanName] = (nodeGroups[cleanName] || 0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Format summary
|
||||
const summary = Object.entries(nodeGroups)
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, 10) // Top 10 groups
|
||||
.map(([name, count]) => count > 1 ? `${name} (${count})` : name)
|
||||
.join(', ');
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a batch result
|
||||
*/
|
||||
parseResult(result: any): MetadataResult {
|
||||
try {
|
||||
if (result.error) {
|
||||
return {
|
||||
templateId: parseInt(result.custom_id.replace('template-', '')),
|
||||
metadata: this.getDefaultMetadata(),
|
||||
error: result.error.message
|
||||
};
|
||||
}
|
||||
|
||||
const response = result.response;
|
||||
if (!response?.body?.choices?.[0]?.message?.content) {
|
||||
throw new Error('Invalid response structure');
|
||||
}
|
||||
|
||||
const content = response.body.choices[0].message.content;
|
||||
const metadata = JSON.parse(content);
|
||||
|
||||
// Validate with Zod
|
||||
const validated = TemplateMetadataSchema.parse(metadata);
|
||||
|
||||
return {
|
||||
templateId: parseInt(result.custom_id.replace('template-', '')),
|
||||
metadata: validated
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`Error parsing result for ${result.custom_id}:`, error);
|
||||
return {
|
||||
templateId: parseInt(result.custom_id.replace('template-', '')),
|
||||
metadata: this.getDefaultMetadata(),
|
||||
error: error instanceof Error ? error.message : 'Unknown error'
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default metadata for fallback
|
||||
*/
|
||||
private getDefaultMetadata(): TemplateMetadata {
|
||||
return {
|
||||
categories: ['automation'],
|
||||
complexity: 'medium',
|
||||
use_cases: ['Process automation'],
|
||||
estimated_setup_minutes: 30,
|
||||
required_services: [],
|
||||
key_features: ['Workflow automation'],
|
||||
target_audience: ['developers']
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate metadata for a single template (for testing)
|
||||
*/
|
||||
async generateSingle(template: MetadataRequest): Promise<TemplateMetadata> {
|
||||
try {
|
||||
const completion = await this.client.chat.completions.create({
|
||||
model: this.model,
|
||||
temperature: 0.1,
|
||||
max_tokens: 500,
|
||||
response_format: {
|
||||
type: 'json_schema',
|
||||
json_schema: this.getJsonSchema()
|
||||
} as any,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: `You are an n8n workflow expert analyzing templates to extract structured metadata.`
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: `Analyze this template: ${template.name}\nNodes: ${template.nodes.join(', ')}`
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
const content = completion.choices[0].message.content;
|
||||
if (!content) {
|
||||
throw new Error('No content in response');
|
||||
}
|
||||
|
||||
const metadata = JSON.parse(content);
|
||||
return TemplateMetadataSchema.parse(metadata);
|
||||
} catch (error) {
|
||||
logger.error('Error generating single metadata:', error);
|
||||
return this.getDefaultMetadata();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -22,6 +22,8 @@ export interface StoredTemplate {
|
||||
updated_at: string;
|
||||
url: string;
|
||||
scraped_at: string;
|
||||
metadata_json?: string; // Structured metadata from OpenAI (JSON string)
|
||||
metadata_generated_at?: string; // When metadata was generated
|
||||
}
|
||||
|
||||
export class TemplateRepository {
|
||||
@@ -536,4 +538,91 @@ export class TemplateRepository {
|
||||
// Non-critical error - search will fallback to LIKE
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update metadata for a template
|
||||
*/
|
||||
updateTemplateMetadata(templateId: number, metadata: any): void {
|
||||
const stmt = this.db.prepare(`
|
||||
UPDATE templates
|
||||
SET metadata_json = ?, metadata_generated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?
|
||||
`);
|
||||
|
||||
stmt.run(JSON.stringify(metadata), templateId);
|
||||
logger.debug(`Updated metadata for template ${templateId}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch update metadata for multiple templates
|
||||
*/
|
||||
batchUpdateMetadata(metadataMap: Map<number, any>): void {
|
||||
const stmt = this.db.prepare(`
|
||||
UPDATE templates
|
||||
SET metadata_json = ?, metadata_generated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?
|
||||
`);
|
||||
|
||||
// Simple approach - just run the updates
|
||||
// Most operations are fast enough without explicit transactions
|
||||
for (const [templateId, metadata] of metadataMap.entries()) {
|
||||
stmt.run(JSON.stringify(metadata), templateId);
|
||||
}
|
||||
|
||||
logger.info(`Updated metadata for ${metadataMap.size} templates`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get templates without metadata
|
||||
*/
|
||||
getTemplatesWithoutMetadata(limit: number = 100): StoredTemplate[] {
|
||||
const stmt = this.db.prepare(`
|
||||
SELECT * FROM templates
|
||||
WHERE metadata_json IS NULL OR metadata_generated_at IS NULL
|
||||
ORDER BY views DESC
|
||||
LIMIT ?
|
||||
`);
|
||||
|
||||
return stmt.all(limit) as StoredTemplate[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get templates with outdated metadata (older than days specified)
|
||||
*/
|
||||
getTemplatesWithOutdatedMetadata(daysOld: number = 30, limit: number = 100): StoredTemplate[] {
|
||||
const stmt = this.db.prepare(`
|
||||
SELECT * FROM templates
|
||||
WHERE metadata_generated_at < datetime('now', '-' || ? || ' days')
|
||||
ORDER BY views DESC
|
||||
LIMIT ?
|
||||
`);
|
||||
|
||||
return stmt.all(daysOld, limit) as StoredTemplate[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get template metadata stats
|
||||
*/
|
||||
getMetadataStats(): {
|
||||
total: number;
|
||||
withMetadata: number;
|
||||
withoutMetadata: number;
|
||||
outdated: number;
|
||||
} {
|
||||
const total = this.getTemplateCount();
|
||||
|
||||
const withMetadata = (this.db.prepare(`
|
||||
SELECT COUNT(*) as count FROM templates
|
||||
WHERE metadata_json IS NOT NULL
|
||||
`).get() as { count: number }).count;
|
||||
|
||||
const withoutMetadata = total - withMetadata;
|
||||
|
||||
const outdated = (this.db.prepare(`
|
||||
SELECT COUNT(*) as count FROM templates
|
||||
WHERE metadata_generated_at < datetime('now', '-30 days')
|
||||
`).get() as { count: number }).count;
|
||||
|
||||
return { total, withMetadata, withoutMetadata, outdated };
|
||||
}
|
||||
}
|
||||
203
tests/unit/templates/metadata-generator.test.ts
Normal file
203
tests/unit/templates/metadata-generator.test.ts
Normal file
@@ -0,0 +1,203 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import { MetadataGenerator, TemplateMetadataSchema, MetadataRequest } from '../../../src/templates/metadata-generator';
|
||||
|
||||
// Mock OpenAI
|
||||
vi.mock('openai', () => {
|
||||
return {
|
||||
default: vi.fn().mockImplementation(() => ({
|
||||
chat: {
|
||||
completions: {
|
||||
create: vi.fn()
|
||||
}
|
||||
}
|
||||
}))
|
||||
};
|
||||
});
|
||||
|
||||
describe('MetadataGenerator', () => {
|
||||
let generator: MetadataGenerator;
|
||||
|
||||
beforeEach(() => {
|
||||
generator = new MetadataGenerator('test-api-key', 'gpt-4o-mini');
|
||||
});
|
||||
|
||||
describe('createBatchRequest', () => {
|
||||
it('should create a valid batch request', () => {
|
||||
const template: MetadataRequest = {
|
||||
templateId: 123,
|
||||
name: 'Test Workflow',
|
||||
description: 'A test workflow',
|
||||
nodes: ['n8n-nodes-base.webhook', 'n8n-nodes-base.httpRequest', 'n8n-nodes-base.slack']
|
||||
};
|
||||
|
||||
const request = generator.createBatchRequest(template);
|
||||
|
||||
expect(request.custom_id).toBe('template-123');
|
||||
expect(request.method).toBe('POST');
|
||||
expect(request.url).toBe('/v1/chat/completions');
|
||||
expect(request.body.model).toBe('gpt-4o-mini');
|
||||
expect(request.body.response_format.type).toBe('json_schema');
|
||||
expect(request.body.response_format.json_schema.strict).toBe(true);
|
||||
expect(request.body.messages).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('should summarize nodes effectively', () => {
|
||||
const template: MetadataRequest = {
|
||||
templateId: 456,
|
||||
name: 'Complex Workflow',
|
||||
nodes: [
|
||||
'n8n-nodes-base.webhook',
|
||||
'n8n-nodes-base.httpRequest',
|
||||
'n8n-nodes-base.httpRequest',
|
||||
'n8n-nodes-base.postgres',
|
||||
'n8n-nodes-base.slack',
|
||||
'@n8n/n8n-nodes-langchain.agent'
|
||||
]
|
||||
};
|
||||
|
||||
const request = generator.createBatchRequest(template);
|
||||
const userMessage = request.body.messages[1].content;
|
||||
|
||||
expect(userMessage).toContain('Complex Workflow');
|
||||
expect(userMessage).toContain('Nodes Used (6)');
|
||||
expect(userMessage).toContain('HTTP/Webhooks');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseResult', () => {
|
||||
it('should parse a successful result', () => {
|
||||
const mockResult = {
|
||||
custom_id: 'template-789',
|
||||
response: {
|
||||
body: {
|
||||
choices: [{
|
||||
message: {
|
||||
content: JSON.stringify({
|
||||
categories: ['automation', 'integration'],
|
||||
complexity: 'medium',
|
||||
use_cases: ['API integration', 'Data sync'],
|
||||
estimated_setup_minutes: 30,
|
||||
required_services: ['Slack API'],
|
||||
key_features: ['Webhook triggers', 'API calls'],
|
||||
target_audience: ['developers']
|
||||
})
|
||||
},
|
||||
finish_reason: 'stop'
|
||||
}]
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const result = generator.parseResult(mockResult);
|
||||
|
||||
expect(result.templateId).toBe(789);
|
||||
expect(result.metadata.categories).toEqual(['automation', 'integration']);
|
||||
expect(result.metadata.complexity).toBe('medium');
|
||||
expect(result.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle error results', () => {
|
||||
const mockResult = {
|
||||
custom_id: 'template-999',
|
||||
error: {
|
||||
message: 'API error'
|
||||
}
|
||||
};
|
||||
|
||||
const result = generator.parseResult(mockResult);
|
||||
|
||||
expect(result.templateId).toBe(999);
|
||||
expect(result.error).toBe('API error');
|
||||
expect(result.metadata).toBeDefined();
|
||||
expect(result.metadata.complexity).toBe('medium'); // Default metadata
|
||||
});
|
||||
|
||||
it('should handle malformed responses', () => {
|
||||
const mockResult = {
|
||||
custom_id: 'template-111',
|
||||
response: {
|
||||
body: {
|
||||
choices: [{
|
||||
message: {
|
||||
content: 'not valid json'
|
||||
},
|
||||
finish_reason: 'stop'
|
||||
}]
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const result = generator.parseResult(mockResult);
|
||||
|
||||
expect(result.templateId).toBe(111);
|
||||
expect(result.error).toContain('Unexpected token');
|
||||
expect(result.metadata).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('TemplateMetadataSchema', () => {
|
||||
it('should validate correct metadata', () => {
|
||||
const validMetadata = {
|
||||
categories: ['automation', 'integration'],
|
||||
complexity: 'simple' as const,
|
||||
use_cases: ['API calls', 'Data processing'],
|
||||
estimated_setup_minutes: 15,
|
||||
required_services: [],
|
||||
key_features: ['Fast processing'],
|
||||
target_audience: ['developers']
|
||||
};
|
||||
|
||||
const result = TemplateMetadataSchema.safeParse(validMetadata);
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it('should reject invalid complexity', () => {
|
||||
const invalidMetadata = {
|
||||
categories: ['automation'],
|
||||
complexity: 'very-hard', // Invalid
|
||||
use_cases: ['API calls'],
|
||||
estimated_setup_minutes: 15,
|
||||
required_services: [],
|
||||
key_features: ['Fast'],
|
||||
target_audience: ['developers']
|
||||
};
|
||||
|
||||
const result = TemplateMetadataSchema.safeParse(invalidMetadata);
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
});
|
||||
|
||||
it('should enforce array limits', () => {
|
||||
const tooManyCategories = {
|
||||
categories: ['a', 'b', 'c', 'd', 'e', 'f'], // Max 5
|
||||
complexity: 'simple' as const,
|
||||
use_cases: ['API calls'],
|
||||
estimated_setup_minutes: 15,
|
||||
required_services: [],
|
||||
key_features: ['Fast'],
|
||||
target_audience: ['developers']
|
||||
};
|
||||
|
||||
const result = TemplateMetadataSchema.safeParse(tooManyCategories);
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
});
|
||||
|
||||
it('should enforce time limits', () => {
|
||||
const tooLongSetup = {
|
||||
categories: ['automation'],
|
||||
complexity: 'complex' as const,
|
||||
use_cases: ['API calls'],
|
||||
estimated_setup_minutes: 500, // Max 480
|
||||
required_services: [],
|
||||
key_features: ['Fast'],
|
||||
target_audience: ['developers']
|
||||
};
|
||||
|
||||
const result = TemplateMetadataSchema.safeParse(tooLongSetup);
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user