diff --git a/.env.example b/.env.example index a6bbfee..205056b 100644 --- a/.env.example +++ b/.env.example @@ -86,4 +86,24 @@ AUTH_TOKEN=your-secure-token-here # N8N_API_TIMEOUT=30000 # Maximum number of API request retries (default: 3) -# N8N_API_MAX_RETRIES=3 \ No newline at end of file +# N8N_API_MAX_RETRIES=3 + +# ========================= +# OPENAI API CONFIGURATION +# ========================= +# Optional: Enable AI-powered template metadata generation +# Provides structured metadata for improved template discovery + +# OpenAI API Key (get from https://platform.openai.com/api-keys) +# OPENAI_API_KEY= + +# OpenAI Model for metadata generation (default: gpt-4o-mini) +# OPENAI_MODEL=gpt-4o-mini + +# Batch size for metadata generation (default: 100) +# Templates are processed in batches using OpenAI's Batch API for 50% cost savings +# OPENAI_BATCH_SIZE=100 + +# Enable metadata generation during template fetch (default: false) +# Set to true to automatically generate metadata when running fetch:templates +# METADATA_GENERATION_ENABLED=false \ No newline at end of file diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 9a8e8e7..6f60586 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -18,6 +18,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Reduces failed queries by approximately 50% - Added `template-node-resolver.ts` utility for node type resolution - Added 23 tests for template node resolution +- **Structured Template Metadata with OpenAI**: AI-powered metadata generation for templates + - Uses OpenAI's batch API with gpt-4o-mini for 50% cost savings + - Generates structured metadata: categories, complexity, use cases, setup time + - Batch processing with 24-hour SLA + - No runtime dependencies - all preprocessing + - Add `--generate-metadata` flag to fetch-templates script + - New environment variables: OPENAI_API_KEY, OPENAI_MODEL, OPENAI_BATCH_SIZE + - Added metadata columns to database schema + - New repository methods for metadata management ## [2.11.0] - 2025-01-14 diff --git a/package.json b/package.json index 33dd743..766e746 100644 --- a/package.json +++ b/package.json @@ -134,8 +134,10 @@ "n8n": "^1.110.1", "n8n-core": "^1.109.0", "n8n-workflow": "^1.107.0", + "openai": "^4.77.0", "sql.js": "^1.13.0", - "uuid": "^10.0.0" + "uuid": "^10.0.0", + "zod": "^3.24.1" }, "optionalDependencies": { "@rollup/rollup-darwin-arm64": "^4.50.0", diff --git a/src/database/schema.sql b/src/database/schema.sql index 0e5acdf..a6e8856 100644 --- a/src/database/schema.sql +++ b/src/database/schema.sql @@ -42,13 +42,16 @@ CREATE TABLE IF NOT EXISTS templates ( created_at DATETIME, updated_at DATETIME, url TEXT, - scraped_at DATETIME DEFAULT CURRENT_TIMESTAMP + scraped_at DATETIME DEFAULT CURRENT_TIMESTAMP, + metadata_json TEXT, -- Structured metadata from OpenAI (JSON) + metadata_generated_at DATETIME -- When metadata was generated ); -- Templates indexes CREATE INDEX IF NOT EXISTS idx_template_nodes ON templates(nodes_used); CREATE INDEX IF NOT EXISTS idx_template_updated ON templates(updated_at); CREATE INDEX IF NOT EXISTS idx_template_name ON templates(name); +CREATE INDEX IF NOT EXISTS idx_template_metadata ON templates(metadata_generated_at); -- Note: FTS5 tables are created conditionally at runtime if FTS5 is supported -- See template-repository.ts initializeFTS5() method \ No newline at end of file diff --git a/src/scripts/fetch-templates.ts b/src/scripts/fetch-templates.ts index ffcc91d..bd20f29 100644 --- a/src/scripts/fetch-templates.ts +++ b/src/scripts/fetch-templates.ts @@ -3,12 +3,21 @@ import { createDatabaseAdapter } from '../database/database-adapter'; import { TemplateService } from '../templates/template-service'; import * as fs from 'fs'; import * as path from 'path'; +import * as dotenv from 'dotenv'; +import type { MetadataRequest } from '../templates/metadata-generator'; -async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') { +// Load environment variables +dotenv.config(); + +async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild', generateMetadata: boolean = false) { const modeEmoji = mode === 'rebuild' ? 'šŸ”„' : 'ā¬†ļø'; const modeText = mode === 'rebuild' ? 'Rebuilding' : 'Updating'; console.log(`${modeEmoji} ${modeText} n8n workflow templates...\n`); + if (generateMetadata) { + console.log('šŸ¤– Metadata generation enabled (using OpenAI)\n'); + } + // Ensure data directory exists const dataDir = './data'; if (!fs.existsSync(dataDir)) { @@ -114,6 +123,14 @@ async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') { console.log(` ${index + 1}. ${node.node} (${node.count} templates)`); }); + // Generate metadata if requested + if (generateMetadata && process.env.OPENAI_API_KEY) { + console.log('\nšŸ¤– Generating metadata for templates...'); + await generateTemplateMetadata(db, service); + } else if (generateMetadata && !process.env.OPENAI_API_KEY) { + console.log('\nāš ļø Metadata generation requested but OPENAI_API_KEY not set'); + } + } catch (error) { console.error('\nāŒ Error fetching templates:', error); process.exit(1); @@ -125,34 +142,120 @@ async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') { } } +// Generate metadata for templates using OpenAI +async function generateTemplateMetadata(db: any, service: TemplateService) { + try { + const { BatchProcessor } = await import('../templates/batch-processor'); + const repository = (service as any).repository; + + // Get templates without metadata + const templatesWithoutMetadata = repository.getTemplatesWithoutMetadata(500); + + if (templatesWithoutMetadata.length === 0) { + console.log('āœ… All templates already have metadata'); + return; + } + + console.log(`Found ${templatesWithoutMetadata.length} templates without metadata`); + + // Create batch processor + const processor = new BatchProcessor({ + apiKey: process.env.OPENAI_API_KEY!, + model: process.env.OPENAI_MODEL || 'gpt-4o-mini', + batchSize: parseInt(process.env.OPENAI_BATCH_SIZE || '100'), + outputDir: './temp/batch' + }); + + // Prepare metadata requests + const requests: MetadataRequest[] = templatesWithoutMetadata.map((t: any) => ({ + templateId: t.id, + name: t.name, + description: t.description, + nodes: JSON.parse(t.nodes_used), + workflow: t.workflow_json_compressed + ? JSON.parse(Buffer.from(t.workflow_json_compressed, 'base64').toString()) + : (t.workflow_json ? JSON.parse(t.workflow_json) : undefined) + })); + + // Process in batches + const results = await processor.processTemplates(requests, (message, current, total) => { + process.stdout.write(`\ršŸ“Š ${message}: ${current}/${total}`); + }); + + console.log('\n'); + + // Update database with metadata + const metadataMap = new Map(); + for (const [templateId, result] of results) { + if (!result.error) { + metadataMap.set(templateId, result.metadata); + } + } + + if (metadataMap.size > 0) { + repository.batchUpdateMetadata(metadataMap); + console.log(`āœ… Updated metadata for ${metadataMap.size} templates`); + } + + // Show stats + const stats = repository.getMetadataStats(); + console.log('\nšŸ“ˆ Metadata Statistics:'); + console.log(` - Total templates: ${stats.total}`); + console.log(` - With metadata: ${stats.withMetadata}`); + console.log(` - Without metadata: ${stats.withoutMetadata}`); + console.log(` - Outdated (>30 days): ${stats.outdated}`); + } catch (error) { + console.error('\nāŒ Error generating metadata:', error); + } +} + // Parse command line arguments -function parseArgs(): 'rebuild' | 'update' { +function parseArgs(): { mode: 'rebuild' | 'update', generateMetadata: boolean } { const args = process.argv.slice(2); + let mode: 'rebuild' | 'update' = 'rebuild'; + let generateMetadata = false; + // Check for --mode flag const modeIndex = args.findIndex(arg => arg.startsWith('--mode')); if (modeIndex !== -1) { const modeArg = args[modeIndex]; - const mode = modeArg.includes('=') ? modeArg.split('=')[1] : args[modeIndex + 1]; + const modeValue = modeArg.includes('=') ? modeArg.split('=')[1] : args[modeIndex + 1]; - if (mode === 'update') { - return 'update'; + if (modeValue === 'update') { + mode = 'update'; } } // Check for --update flag as shorthand if (args.includes('--update')) { - return 'update'; + mode = 'update'; } - // Default to rebuild - return 'rebuild'; + // Check for --generate-metadata flag + if (args.includes('--generate-metadata') || args.includes('--metadata')) { + generateMetadata = true; + } + + // Show help if requested + if (args.includes('--help') || args.includes('-h')) { + console.log('Usage: npm run fetch:templates [options]\n'); + console.log('Options:'); + console.log(' --mode=rebuild|update Rebuild from scratch or update existing (default: rebuild)'); + console.log(' --update Shorthand for --mode=update'); + console.log(' --generate-metadata Generate AI metadata for templates (requires OPENAI_API_KEY)'); + console.log(' --metadata Shorthand for --generate-metadata'); + console.log(' --help, -h Show this help message'); + process.exit(0); + } + + return { mode, generateMetadata }; } // Run if called directly if (require.main === module) { - const mode = parseArgs(); - fetchTemplates(mode).catch(console.error); + const { mode, generateMetadata } = parseArgs(); + fetchTemplates(mode, generateMetadata).catch(console.error); } export { fetchTemplates }; \ No newline at end of file diff --git a/src/templates/batch-processor.ts b/src/templates/batch-processor.ts new file mode 100644 index 0000000..a993cef --- /dev/null +++ b/src/templates/batch-processor.ts @@ -0,0 +1,282 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import OpenAI from 'openai'; +import { logger } from '../utils/logger'; +import { MetadataGenerator, MetadataRequest, MetadataResult } from './metadata-generator'; + +export interface BatchProcessorOptions { + apiKey: string; + model?: string; + batchSize?: number; + outputDir?: string; +} + +export interface BatchJob { + id: string; + status: 'validating' | 'in_progress' | 'finalizing' | 'completed' | 'failed' | 'expired' | 'cancelled'; + created_at: number; + completed_at?: number; + input_file_id: string; + output_file_id?: string; + error?: any; +} + +export class BatchProcessor { + private client: OpenAI; + private generator: MetadataGenerator; + private batchSize: number; + private outputDir: string; + + constructor(options: BatchProcessorOptions) { + this.client = new OpenAI({ apiKey: options.apiKey }); + this.generator = new MetadataGenerator(options.apiKey, options.model); + this.batchSize = options.batchSize || 100; + this.outputDir = options.outputDir || './temp'; + + // Ensure output directory exists + if (!fs.existsSync(this.outputDir)) { + fs.mkdirSync(this.outputDir, { recursive: true }); + } + } + + /** + * Process templates in batches + */ + async processTemplates( + templates: MetadataRequest[], + progressCallback?: (message: string, current: number, total: number) => void + ): Promise> { + const results = new Map(); + const batches = this.createBatches(templates); + + logger.info(`Processing ${templates.length} templates in ${batches.length} batches`); + + for (let i = 0; i < batches.length; i++) { + const batch = batches[i]; + const batchNum = i + 1; + + try { + progressCallback?.(`Processing batch ${batchNum}/${batches.length}`, i * this.batchSize, templates.length); + + // Process this batch + const batchResults = await this.processBatch(batch, `batch_${batchNum}`); + + // Merge results + for (const result of batchResults) { + results.set(result.templateId, result); + } + + logger.info(`Completed batch ${batchNum}/${batches.length}: ${batchResults.length} results`); + progressCallback?.(`Completed batch ${batchNum}/${batches.length}`, Math.min((i + 1) * this.batchSize, templates.length), templates.length); + } catch (error) { + logger.error(`Error processing batch ${batchNum}:`, error); + // Continue with next batch + } + } + + logger.info(`Batch processing complete: ${results.size} results`); + return results; + } + + /** + * Process a single batch + */ + private async processBatch(templates: MetadataRequest[], batchName: string): Promise { + // Create JSONL file + const inputFile = await this.createBatchFile(templates, batchName); + + try { + // Upload file to OpenAI + const uploadedFile = await this.uploadFile(inputFile); + + // Create batch job + const batchJob = await this.createBatchJob(uploadedFile.id); + + // Monitor job until completion + const completedJob = await this.monitorBatchJob(batchJob.id); + + // Retrieve and parse results + const results = await this.retrieveResults(completedJob); + + // Cleanup + await this.cleanup(inputFile, uploadedFile.id, completedJob.output_file_id); + + return results; + } catch (error) { + // Cleanup on error + try { + fs.unlinkSync(inputFile); + } catch {} + throw error; + } + } + + /** + * Create batches from templates + */ + private createBatches(templates: MetadataRequest[]): MetadataRequest[][] { + const batches: MetadataRequest[][] = []; + + for (let i = 0; i < templates.length; i += this.batchSize) { + batches.push(templates.slice(i, i + this.batchSize)); + } + + return batches; + } + + /** + * Create JSONL batch file + */ + private async createBatchFile(templates: MetadataRequest[], batchName: string): Promise { + const filename = path.join(this.outputDir, `${batchName}_${Date.now()}.jsonl`); + const stream = fs.createWriteStream(filename); + + for (const template of templates) { + const request = this.generator.createBatchRequest(template); + stream.write(JSON.stringify(request) + '\n'); + } + + stream.end(); + + // Wait for stream to finish + await new Promise((resolve, reject) => { + stream.on('finish', () => resolve()); + stream.on('error', reject); + }); + + logger.debug(`Created batch file: ${filename} with ${templates.length} requests`); + return filename; + } + + /** + * Upload file to OpenAI + */ + private async uploadFile(filepath: string): Promise { + const file = fs.createReadStream(filepath); + const uploadedFile = await this.client.files.create({ + file, + purpose: 'batch' + }); + + logger.debug(`Uploaded file: ${uploadedFile.id}`); + return uploadedFile; + } + + /** + * Create batch job + */ + private async createBatchJob(fileId: string): Promise { + const batchJob = await this.client.batches.create({ + input_file_id: fileId, + endpoint: '/v1/chat/completions', + completion_window: '24h' + }); + + logger.info(`Created batch job: ${batchJob.id}`); + return batchJob; + } + + /** + * Monitor batch job with exponential backoff + */ + private async monitorBatchJob(batchId: string): Promise { + const waitTimes = [60, 120, 300, 600, 900, 1800]; // Progressive wait times in seconds + let waitIndex = 0; + let attempts = 0; + const maxAttempts = 100; // Safety limit + + while (attempts < maxAttempts) { + const batchJob = await this.client.batches.retrieve(batchId); + + logger.debug(`Batch ${batchId} status: ${batchJob.status} (attempt ${attempts + 1})`); + + if (batchJob.status === 'completed') { + logger.info(`Batch job ${batchId} completed successfully`); + return batchJob; + } + + if (['failed', 'expired', 'cancelled'].includes(batchJob.status)) { + throw new Error(`Batch job failed with status: ${batchJob.status}`); + } + + // Wait before next check + const waitTime = waitTimes[Math.min(waitIndex, waitTimes.length - 1)]; + logger.debug(`Waiting ${waitTime} seconds before next check...`); + await this.sleep(waitTime * 1000); + + waitIndex = Math.min(waitIndex + 1, waitTimes.length - 1); + attempts++; + } + + throw new Error(`Batch job monitoring timed out after ${maxAttempts} attempts`); + } + + /** + * Retrieve and parse results + */ + private async retrieveResults(batchJob: any): Promise { + if (!batchJob.output_file_id) { + throw new Error('No output file available for batch job'); + } + + // Download result file + const fileResponse = await this.client.files.content(batchJob.output_file_id); + const fileContent = await fileResponse.text(); + + // Parse JSONL results + const results: MetadataResult[] = []; + const lines = fileContent.trim().split('\n'); + + for (const line of lines) { + if (!line) continue; + + try { + const result = JSON.parse(line); + const parsed = this.generator.parseResult(result); + results.push(parsed); + } catch (error) { + logger.error('Error parsing result line:', error); + } + } + + logger.info(`Retrieved ${results.length} results from batch job`); + return results; + } + + /** + * Cleanup temporary files + */ + private async cleanup(localFile: string, inputFileId: string, outputFileId?: string): Promise { + // Delete local file + try { + fs.unlinkSync(localFile); + logger.debug(`Deleted local file: ${localFile}`); + } catch (error) { + logger.warn(`Failed to delete local file: ${localFile}`, error); + } + + // Delete uploaded files from OpenAI + try { + await this.client.files.del(inputFileId); + logger.debug(`Deleted input file from OpenAI: ${inputFileId}`); + } catch (error) { + logger.warn(`Failed to delete input file from OpenAI: ${inputFileId}`, error); + } + + if (outputFileId) { + try { + await this.client.files.del(outputFileId); + logger.debug(`Deleted output file from OpenAI: ${outputFileId}`); + } catch (error) { + logger.warn(`Failed to delete output file from OpenAI: ${outputFileId}`, error); + } + } + } + + /** + * Sleep helper + */ + private sleep(ms: number): Promise { + return new Promise(resolve => setTimeout(resolve, ms)); + } +} \ No newline at end of file diff --git a/src/templates/metadata-generator.ts b/src/templates/metadata-generator.ts new file mode 100644 index 0000000..4e5a3a3 --- /dev/null +++ b/src/templates/metadata-generator.ts @@ -0,0 +1,287 @@ +import OpenAI from 'openai'; +import { z } from 'zod'; +import { logger } from '../utils/logger'; +import { TemplateWorkflow, TemplateDetail } from './template-fetcher'; + +// Metadata schema using Zod for validation +export const TemplateMetadataSchema = z.object({ + categories: z.array(z.string()).max(5).describe('Main categories (max 5)'), + complexity: z.enum(['simple', 'medium', 'complex']).describe('Implementation complexity'), + use_cases: z.array(z.string()).max(5).describe('Primary use cases'), + estimated_setup_minutes: z.number().min(5).max(480).describe('Setup time in minutes'), + required_services: z.array(z.string()).describe('External services needed'), + key_features: z.array(z.string()).max(5).describe('Main capabilities'), + target_audience: z.array(z.string()).max(3).describe('Target users') +}); + +export type TemplateMetadata = z.infer; + +export interface MetadataRequest { + templateId: number; + name: string; + description?: string; + nodes: string[]; + workflow?: any; +} + +export interface MetadataResult { + templateId: number; + metadata: TemplateMetadata; + error?: string; +} + +export class MetadataGenerator { + private client: OpenAI; + private model: string; + + constructor(apiKey: string, model: string = 'gpt-4o-mini') { + this.client = new OpenAI({ apiKey }); + this.model = model; + } + + /** + * Generate the JSON schema for OpenAI structured outputs + */ + private getJsonSchema() { + return { + name: 'template_metadata', + strict: true, + schema: { + type: 'object', + properties: { + categories: { + type: 'array', + items: { type: 'string' }, + maxItems: 5, + description: 'Main categories like automation, integration, data processing' + }, + complexity: { + type: 'string', + enum: ['simple', 'medium', 'complex'], + description: 'Implementation complexity level' + }, + use_cases: { + type: 'array', + items: { type: 'string' }, + maxItems: 5, + description: 'Primary use cases for this template' + }, + estimated_setup_minutes: { + type: 'number', + minimum: 5, + maximum: 480, + description: 'Estimated setup time in minutes' + }, + required_services: { + type: 'array', + items: { type: 'string' }, + description: 'External services or APIs required' + }, + key_features: { + type: 'array', + items: { type: 'string' }, + maxItems: 5, + description: 'Main capabilities or features' + }, + target_audience: { + type: 'array', + items: { type: 'string' }, + maxItems: 3, + description: 'Target users like developers, marketers, analysts' + } + }, + required: [ + 'categories', + 'complexity', + 'use_cases', + 'estimated_setup_minutes', + 'required_services', + 'key_features', + 'target_audience' + ], + additionalProperties: false + } + }; + } + + /** + * Create a batch request for a single template + */ + createBatchRequest(template: MetadataRequest): any { + // Extract node information for analysis + const nodesSummary = this.summarizeNodes(template.nodes); + + // Build context for the AI + const context = [ + `Template: ${template.name}`, + template.description ? `Description: ${template.description}` : '', + `Nodes Used (${template.nodes.length}): ${nodesSummary}`, + template.workflow ? `Workflow has ${template.workflow.nodes?.length || 0} nodes with ${Object.keys(template.workflow.connections || {}).length} connections` : '' + ].filter(Boolean).join('\n'); + + return { + custom_id: `template-${template.templateId}`, + method: 'POST', + url: '/v1/chat/completions', + body: { + model: this.model, + temperature: 0.1, + max_tokens: 500, + response_format: { + type: 'json_schema', + json_schema: this.getJsonSchema() + }, + messages: [ + { + role: 'system', + content: `You are an n8n workflow expert analyzing templates to extract structured metadata. + + Analyze the provided template information and extract: + - Categories: Classify into relevant categories (automation, integration, data, communication, etc.) + - Complexity: Assess as simple (1-3 nodes), medium (4-8 nodes), or complex (9+ nodes or advanced logic) + - Use cases: Identify primary business use cases + - Setup time: Estimate realistic setup time based on complexity and required configurations + - Required services: List any external services, APIs, or accounts needed + - Key features: Highlight main capabilities or benefits + - Target audience: Identify who would benefit most (developers, marketers, ops teams, etc.) + + Be concise and practical in your analysis.` + }, + { + role: 'user', + content: context + } + ] + } + }; + } + + /** + * Summarize nodes for better context + */ + private summarizeNodes(nodes: string[]): string { + // Group similar nodes + const nodeGroups: Record = {}; + + for (const node of nodes) { + // Extract base node name (remove package prefix) + const baseName = node.split('.').pop() || node; + + // Group by category + if (baseName.includes('webhook') || baseName.includes('http')) { + nodeGroups['HTTP/Webhooks'] = (nodeGroups['HTTP/Webhooks'] || 0) + 1; + } else if (baseName.includes('database') || baseName.includes('postgres') || baseName.includes('mysql')) { + nodeGroups['Database'] = (nodeGroups['Database'] || 0) + 1; + } else if (baseName.includes('slack') || baseName.includes('email') || baseName.includes('gmail')) { + nodeGroups['Communication'] = (nodeGroups['Communication'] || 0) + 1; + } else if (baseName.includes('ai') || baseName.includes('openai') || baseName.includes('langchain')) { + nodeGroups['AI/ML'] = (nodeGroups['AI/ML'] || 0) + 1; + } else if (baseName.includes('sheet') || baseName.includes('csv') || baseName.includes('excel')) { + nodeGroups['Spreadsheets'] = (nodeGroups['Spreadsheets'] || 0) + 1; + } else { + const cleanName = baseName.replace(/Trigger$/, '').replace(/Node$/, ''); + nodeGroups[cleanName] = (nodeGroups[cleanName] || 0) + 1; + } + } + + // Format summary + const summary = Object.entries(nodeGroups) + .sort((a, b) => b[1] - a[1]) + .slice(0, 10) // Top 10 groups + .map(([name, count]) => count > 1 ? `${name} (${count})` : name) + .join(', '); + + return summary; + } + + /** + * Parse a batch result + */ + parseResult(result: any): MetadataResult { + try { + if (result.error) { + return { + templateId: parseInt(result.custom_id.replace('template-', '')), + metadata: this.getDefaultMetadata(), + error: result.error.message + }; + } + + const response = result.response; + if (!response?.body?.choices?.[0]?.message?.content) { + throw new Error('Invalid response structure'); + } + + const content = response.body.choices[0].message.content; + const metadata = JSON.parse(content); + + // Validate with Zod + const validated = TemplateMetadataSchema.parse(metadata); + + return { + templateId: parseInt(result.custom_id.replace('template-', '')), + metadata: validated + }; + } catch (error) { + logger.error(`Error parsing result for ${result.custom_id}:`, error); + return { + templateId: parseInt(result.custom_id.replace('template-', '')), + metadata: this.getDefaultMetadata(), + error: error instanceof Error ? error.message : 'Unknown error' + }; + } + } + + /** + * Get default metadata for fallback + */ + private getDefaultMetadata(): TemplateMetadata { + return { + categories: ['automation'], + complexity: 'medium', + use_cases: ['Process automation'], + estimated_setup_minutes: 30, + required_services: [], + key_features: ['Workflow automation'], + target_audience: ['developers'] + }; + } + + /** + * Generate metadata for a single template (for testing) + */ + async generateSingle(template: MetadataRequest): Promise { + try { + const completion = await this.client.chat.completions.create({ + model: this.model, + temperature: 0.1, + max_tokens: 500, + response_format: { + type: 'json_schema', + json_schema: this.getJsonSchema() + } as any, + messages: [ + { + role: 'system', + content: `You are an n8n workflow expert analyzing templates to extract structured metadata.` + }, + { + role: 'user', + content: `Analyze this template: ${template.name}\nNodes: ${template.nodes.join(', ')}` + } + ] + }); + + const content = completion.choices[0].message.content; + if (!content) { + throw new Error('No content in response'); + } + + const metadata = JSON.parse(content); + return TemplateMetadataSchema.parse(metadata); + } catch (error) { + logger.error('Error generating single metadata:', error); + return this.getDefaultMetadata(); + } + } +} \ No newline at end of file diff --git a/src/templates/template-repository.ts b/src/templates/template-repository.ts index f297d21..b1424b5 100644 --- a/src/templates/template-repository.ts +++ b/src/templates/template-repository.ts @@ -22,6 +22,8 @@ export interface StoredTemplate { updated_at: string; url: string; scraped_at: string; + metadata_json?: string; // Structured metadata from OpenAI (JSON string) + metadata_generated_at?: string; // When metadata was generated } export class TemplateRepository { @@ -536,4 +538,91 @@ export class TemplateRepository { // Non-critical error - search will fallback to LIKE } } + + /** + * Update metadata for a template + */ + updateTemplateMetadata(templateId: number, metadata: any): void { + const stmt = this.db.prepare(` + UPDATE templates + SET metadata_json = ?, metadata_generated_at = CURRENT_TIMESTAMP + WHERE id = ? + `); + + stmt.run(JSON.stringify(metadata), templateId); + logger.debug(`Updated metadata for template ${templateId}`); + } + + /** + * Batch update metadata for multiple templates + */ + batchUpdateMetadata(metadataMap: Map): void { + const stmt = this.db.prepare(` + UPDATE templates + SET metadata_json = ?, metadata_generated_at = CURRENT_TIMESTAMP + WHERE id = ? + `); + + // Simple approach - just run the updates + // Most operations are fast enough without explicit transactions + for (const [templateId, metadata] of metadataMap.entries()) { + stmt.run(JSON.stringify(metadata), templateId); + } + + logger.info(`Updated metadata for ${metadataMap.size} templates`); + } + + /** + * Get templates without metadata + */ + getTemplatesWithoutMetadata(limit: number = 100): StoredTemplate[] { + const stmt = this.db.prepare(` + SELECT * FROM templates + WHERE metadata_json IS NULL OR metadata_generated_at IS NULL + ORDER BY views DESC + LIMIT ? + `); + + return stmt.all(limit) as StoredTemplate[]; + } + + /** + * Get templates with outdated metadata (older than days specified) + */ + getTemplatesWithOutdatedMetadata(daysOld: number = 30, limit: number = 100): StoredTemplate[] { + const stmt = this.db.prepare(` + SELECT * FROM templates + WHERE metadata_generated_at < datetime('now', '-' || ? || ' days') + ORDER BY views DESC + LIMIT ? + `); + + return stmt.all(daysOld, limit) as StoredTemplate[]; + } + + /** + * Get template metadata stats + */ + getMetadataStats(): { + total: number; + withMetadata: number; + withoutMetadata: number; + outdated: number; + } { + const total = this.getTemplateCount(); + + const withMetadata = (this.db.prepare(` + SELECT COUNT(*) as count FROM templates + WHERE metadata_json IS NOT NULL + `).get() as { count: number }).count; + + const withoutMetadata = total - withMetadata; + + const outdated = (this.db.prepare(` + SELECT COUNT(*) as count FROM templates + WHERE metadata_generated_at < datetime('now', '-30 days') + `).get() as { count: number }).count; + + return { total, withMetadata, withoutMetadata, outdated }; + } } \ No newline at end of file diff --git a/tests/unit/templates/metadata-generator.test.ts b/tests/unit/templates/metadata-generator.test.ts new file mode 100644 index 0000000..6eebf68 --- /dev/null +++ b/tests/unit/templates/metadata-generator.test.ts @@ -0,0 +1,203 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import { MetadataGenerator, TemplateMetadataSchema, MetadataRequest } from '../../../src/templates/metadata-generator'; + +// Mock OpenAI +vi.mock('openai', () => { + return { + default: vi.fn().mockImplementation(() => ({ + chat: { + completions: { + create: vi.fn() + } + } + })) + }; +}); + +describe('MetadataGenerator', () => { + let generator: MetadataGenerator; + + beforeEach(() => { + generator = new MetadataGenerator('test-api-key', 'gpt-4o-mini'); + }); + + describe('createBatchRequest', () => { + it('should create a valid batch request', () => { + const template: MetadataRequest = { + templateId: 123, + name: 'Test Workflow', + description: 'A test workflow', + nodes: ['n8n-nodes-base.webhook', 'n8n-nodes-base.httpRequest', 'n8n-nodes-base.slack'] + }; + + const request = generator.createBatchRequest(template); + + expect(request.custom_id).toBe('template-123'); + expect(request.method).toBe('POST'); + expect(request.url).toBe('/v1/chat/completions'); + expect(request.body.model).toBe('gpt-4o-mini'); + expect(request.body.response_format.type).toBe('json_schema'); + expect(request.body.response_format.json_schema.strict).toBe(true); + expect(request.body.messages).toHaveLength(2); + }); + + it('should summarize nodes effectively', () => { + const template: MetadataRequest = { + templateId: 456, + name: 'Complex Workflow', + nodes: [ + 'n8n-nodes-base.webhook', + 'n8n-nodes-base.httpRequest', + 'n8n-nodes-base.httpRequest', + 'n8n-nodes-base.postgres', + 'n8n-nodes-base.slack', + '@n8n/n8n-nodes-langchain.agent' + ] + }; + + const request = generator.createBatchRequest(template); + const userMessage = request.body.messages[1].content; + + expect(userMessage).toContain('Complex Workflow'); + expect(userMessage).toContain('Nodes Used (6)'); + expect(userMessage).toContain('HTTP/Webhooks'); + }); + }); + + describe('parseResult', () => { + it('should parse a successful result', () => { + const mockResult = { + custom_id: 'template-789', + response: { + body: { + choices: [{ + message: { + content: JSON.stringify({ + categories: ['automation', 'integration'], + complexity: 'medium', + use_cases: ['API integration', 'Data sync'], + estimated_setup_minutes: 30, + required_services: ['Slack API'], + key_features: ['Webhook triggers', 'API calls'], + target_audience: ['developers'] + }) + }, + finish_reason: 'stop' + }] + } + } + }; + + const result = generator.parseResult(mockResult); + + expect(result.templateId).toBe(789); + expect(result.metadata.categories).toEqual(['automation', 'integration']); + expect(result.metadata.complexity).toBe('medium'); + expect(result.error).toBeUndefined(); + }); + + it('should handle error results', () => { + const mockResult = { + custom_id: 'template-999', + error: { + message: 'API error' + } + }; + + const result = generator.parseResult(mockResult); + + expect(result.templateId).toBe(999); + expect(result.error).toBe('API error'); + expect(result.metadata).toBeDefined(); + expect(result.metadata.complexity).toBe('medium'); // Default metadata + }); + + it('should handle malformed responses', () => { + const mockResult = { + custom_id: 'template-111', + response: { + body: { + choices: [{ + message: { + content: 'not valid json' + }, + finish_reason: 'stop' + }] + } + } + }; + + const result = generator.parseResult(mockResult); + + expect(result.templateId).toBe(111); + expect(result.error).toContain('Unexpected token'); + expect(result.metadata).toBeDefined(); + }); + }); + + describe('TemplateMetadataSchema', () => { + it('should validate correct metadata', () => { + const validMetadata = { + categories: ['automation', 'integration'], + complexity: 'simple' as const, + use_cases: ['API calls', 'Data processing'], + estimated_setup_minutes: 15, + required_services: [], + key_features: ['Fast processing'], + target_audience: ['developers'] + }; + + const result = TemplateMetadataSchema.safeParse(validMetadata); + + expect(result.success).toBe(true); + }); + + it('should reject invalid complexity', () => { + const invalidMetadata = { + categories: ['automation'], + complexity: 'very-hard', // Invalid + use_cases: ['API calls'], + estimated_setup_minutes: 15, + required_services: [], + key_features: ['Fast'], + target_audience: ['developers'] + }; + + const result = TemplateMetadataSchema.safeParse(invalidMetadata); + + expect(result.success).toBe(false); + }); + + it('should enforce array limits', () => { + const tooManyCategories = { + categories: ['a', 'b', 'c', 'd', 'e', 'f'], // Max 5 + complexity: 'simple' as const, + use_cases: ['API calls'], + estimated_setup_minutes: 15, + required_services: [], + key_features: ['Fast'], + target_audience: ['developers'] + }; + + const result = TemplateMetadataSchema.safeParse(tooManyCategories); + + expect(result.success).toBe(false); + }); + + it('should enforce time limits', () => { + const tooLongSetup = { + categories: ['automation'], + complexity: 'complex' as const, + use_cases: ['API calls'], + estimated_setup_minutes: 500, // Max 480 + required_services: [], + key_features: ['Fast'], + target_audience: ['developers'] + }; + + const result = TemplateMetadataSchema.safeParse(tooLongSetup); + + expect(result.success).toBe(false); + }); + }); +}); \ No newline at end of file