feat: Add structured template metadata generation with OpenAI

- Implement OpenAI batch API integration for metadata generation
- Add metadata columns to database schema (metadata_json, metadata_generated_at)
- Create MetadataGenerator service with structured output schemas
- Create BatchProcessor for handling OpenAI batch jobs
- Add --generate-metadata flag to fetch-templates script
- Update template repository with metadata management methods
- Add OpenAI configuration to environment variables
- Include comprehensive tests for metadata generation
- Use gpt-4o-mini model with 50% cost savings via batch API

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
czlonkowski
2025-09-14 20:00:39 +02:00
parent d49416fc58
commit 6e24da722b
9 changed files with 1011 additions and 13 deletions

View File

@@ -87,3 +87,23 @@ AUTH_TOKEN=your-secure-token-here
# Maximum number of API request retries (default: 3)
# N8N_API_MAX_RETRIES=3
# =========================
# OPENAI API CONFIGURATION
# =========================
# Optional: Enable AI-powered template metadata generation
# Provides structured metadata for improved template discovery
# OpenAI API Key (get from https://platform.openai.com/api-keys)
# OPENAI_API_KEY=
# OpenAI Model for metadata generation (default: gpt-4o-mini)
# OPENAI_MODEL=gpt-4o-mini
# Batch size for metadata generation (default: 100)
# Templates are processed in batches using OpenAI's Batch API for 50% cost savings
# OPENAI_BATCH_SIZE=100
# Enable metadata generation during template fetch (default: false)
# Set to true to automatically generate metadata when running fetch:templates
# METADATA_GENERATION_ENABLED=false

View File

@@ -18,6 +18,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Reduces failed queries by approximately 50%
- Added `template-node-resolver.ts` utility for node type resolution
- Added 23 tests for template node resolution
- **Structured Template Metadata with OpenAI**: AI-powered metadata generation for templates
- Uses OpenAI's batch API with gpt-4o-mini for 50% cost savings
- Generates structured metadata: categories, complexity, use cases, setup time
- Batch processing with 24-hour SLA
- No runtime dependencies - all preprocessing
- Add `--generate-metadata` flag to fetch-templates script
- New environment variables: OPENAI_API_KEY, OPENAI_MODEL, OPENAI_BATCH_SIZE
- Added metadata columns to database schema
- New repository methods for metadata management
## [2.11.0] - 2025-01-14

View File

@@ -134,8 +134,10 @@
"n8n": "^1.110.1",
"n8n-core": "^1.109.0",
"n8n-workflow": "^1.107.0",
"openai": "^4.77.0",
"sql.js": "^1.13.0",
"uuid": "^10.0.0"
"uuid": "^10.0.0",
"zod": "^3.24.1"
},
"optionalDependencies": {
"@rollup/rollup-darwin-arm64": "^4.50.0",

View File

@@ -42,13 +42,16 @@ CREATE TABLE IF NOT EXISTS templates (
created_at DATETIME,
updated_at DATETIME,
url TEXT,
scraped_at DATETIME DEFAULT CURRENT_TIMESTAMP
scraped_at DATETIME DEFAULT CURRENT_TIMESTAMP,
metadata_json TEXT, -- Structured metadata from OpenAI (JSON)
metadata_generated_at DATETIME -- When metadata was generated
);
-- Templates indexes
CREATE INDEX IF NOT EXISTS idx_template_nodes ON templates(nodes_used);
CREATE INDEX IF NOT EXISTS idx_template_updated ON templates(updated_at);
CREATE INDEX IF NOT EXISTS idx_template_name ON templates(name);
CREATE INDEX IF NOT EXISTS idx_template_metadata ON templates(metadata_generated_at);
-- Note: FTS5 tables are created conditionally at runtime if FTS5 is supported
-- See template-repository.ts initializeFTS5() method

View File

@@ -3,12 +3,21 @@ import { createDatabaseAdapter } from '../database/database-adapter';
import { TemplateService } from '../templates/template-service';
import * as fs from 'fs';
import * as path from 'path';
import * as dotenv from 'dotenv';
import type { MetadataRequest } from '../templates/metadata-generator';
async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') {
// Load environment variables
dotenv.config();
async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild', generateMetadata: boolean = false) {
const modeEmoji = mode === 'rebuild' ? '🔄' : '⬆️';
const modeText = mode === 'rebuild' ? 'Rebuilding' : 'Updating';
console.log(`${modeEmoji} ${modeText} n8n workflow templates...\n`);
if (generateMetadata) {
console.log('🤖 Metadata generation enabled (using OpenAI)\n');
}
// Ensure data directory exists
const dataDir = './data';
if (!fs.existsSync(dataDir)) {
@@ -114,6 +123,14 @@ async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') {
console.log(` ${index + 1}. ${node.node} (${node.count} templates)`);
});
// Generate metadata if requested
if (generateMetadata && process.env.OPENAI_API_KEY) {
console.log('\n🤖 Generating metadata for templates...');
await generateTemplateMetadata(db, service);
} else if (generateMetadata && !process.env.OPENAI_API_KEY) {
console.log('\n⚠ Metadata generation requested but OPENAI_API_KEY not set');
}
} catch (error) {
console.error('\n❌ Error fetching templates:', error);
process.exit(1);
@@ -125,34 +142,120 @@ async function fetchTemplates(mode: 'rebuild' | 'update' = 'rebuild') {
}
}
// Generate metadata for templates using OpenAI
async function generateTemplateMetadata(db: any, service: TemplateService) {
try {
const { BatchProcessor } = await import('../templates/batch-processor');
const repository = (service as any).repository;
// Get templates without metadata
const templatesWithoutMetadata = repository.getTemplatesWithoutMetadata(500);
if (templatesWithoutMetadata.length === 0) {
console.log('✅ All templates already have metadata');
return;
}
console.log(`Found ${templatesWithoutMetadata.length} templates without metadata`);
// Create batch processor
const processor = new BatchProcessor({
apiKey: process.env.OPENAI_API_KEY!,
model: process.env.OPENAI_MODEL || 'gpt-4o-mini',
batchSize: parseInt(process.env.OPENAI_BATCH_SIZE || '100'),
outputDir: './temp/batch'
});
// Prepare metadata requests
const requests: MetadataRequest[] = templatesWithoutMetadata.map((t: any) => ({
templateId: t.id,
name: t.name,
description: t.description,
nodes: JSON.parse(t.nodes_used),
workflow: t.workflow_json_compressed
? JSON.parse(Buffer.from(t.workflow_json_compressed, 'base64').toString())
: (t.workflow_json ? JSON.parse(t.workflow_json) : undefined)
}));
// Process in batches
const results = await processor.processTemplates(requests, (message, current, total) => {
process.stdout.write(`\r📊 ${message}: ${current}/${total}`);
});
console.log('\n');
// Update database with metadata
const metadataMap = new Map();
for (const [templateId, result] of results) {
if (!result.error) {
metadataMap.set(templateId, result.metadata);
}
}
if (metadataMap.size > 0) {
repository.batchUpdateMetadata(metadataMap);
console.log(`✅ Updated metadata for ${metadataMap.size} templates`);
}
// Show stats
const stats = repository.getMetadataStats();
console.log('\n📈 Metadata Statistics:');
console.log(` - Total templates: ${stats.total}`);
console.log(` - With metadata: ${stats.withMetadata}`);
console.log(` - Without metadata: ${stats.withoutMetadata}`);
console.log(` - Outdated (>30 days): ${stats.outdated}`);
} catch (error) {
console.error('\n❌ Error generating metadata:', error);
}
}
// Parse command line arguments
function parseArgs(): 'rebuild' | 'update' {
function parseArgs(): { mode: 'rebuild' | 'update', generateMetadata: boolean } {
const args = process.argv.slice(2);
let mode: 'rebuild' | 'update' = 'rebuild';
let generateMetadata = false;
// Check for --mode flag
const modeIndex = args.findIndex(arg => arg.startsWith('--mode'));
if (modeIndex !== -1) {
const modeArg = args[modeIndex];
const mode = modeArg.includes('=') ? modeArg.split('=')[1] : args[modeIndex + 1];
const modeValue = modeArg.includes('=') ? modeArg.split('=')[1] : args[modeIndex + 1];
if (mode === 'update') {
return 'update';
if (modeValue === 'update') {
mode = 'update';
}
}
// Check for --update flag as shorthand
if (args.includes('--update')) {
return 'update';
mode = 'update';
}
// Default to rebuild
return 'rebuild';
// Check for --generate-metadata flag
if (args.includes('--generate-metadata') || args.includes('--metadata')) {
generateMetadata = true;
}
// Show help if requested
if (args.includes('--help') || args.includes('-h')) {
console.log('Usage: npm run fetch:templates [options]\n');
console.log('Options:');
console.log(' --mode=rebuild|update Rebuild from scratch or update existing (default: rebuild)');
console.log(' --update Shorthand for --mode=update');
console.log(' --generate-metadata Generate AI metadata for templates (requires OPENAI_API_KEY)');
console.log(' --metadata Shorthand for --generate-metadata');
console.log(' --help, -h Show this help message');
process.exit(0);
}
return { mode, generateMetadata };
}
// Run if called directly
if (require.main === module) {
const mode = parseArgs();
fetchTemplates(mode).catch(console.error);
const { mode, generateMetadata } = parseArgs();
fetchTemplates(mode, generateMetadata).catch(console.error);
}
export { fetchTemplates };

View File

@@ -0,0 +1,282 @@
import * as fs from 'fs';
import * as path from 'path';
import OpenAI from 'openai';
import { logger } from '../utils/logger';
import { MetadataGenerator, MetadataRequest, MetadataResult } from './metadata-generator';
export interface BatchProcessorOptions {
apiKey: string;
model?: string;
batchSize?: number;
outputDir?: string;
}
export interface BatchJob {
id: string;
status: 'validating' | 'in_progress' | 'finalizing' | 'completed' | 'failed' | 'expired' | 'cancelled';
created_at: number;
completed_at?: number;
input_file_id: string;
output_file_id?: string;
error?: any;
}
export class BatchProcessor {
private client: OpenAI;
private generator: MetadataGenerator;
private batchSize: number;
private outputDir: string;
constructor(options: BatchProcessorOptions) {
this.client = new OpenAI({ apiKey: options.apiKey });
this.generator = new MetadataGenerator(options.apiKey, options.model);
this.batchSize = options.batchSize || 100;
this.outputDir = options.outputDir || './temp';
// Ensure output directory exists
if (!fs.existsSync(this.outputDir)) {
fs.mkdirSync(this.outputDir, { recursive: true });
}
}
/**
* Process templates in batches
*/
async processTemplates(
templates: MetadataRequest[],
progressCallback?: (message: string, current: number, total: number) => void
): Promise<Map<number, MetadataResult>> {
const results = new Map<number, MetadataResult>();
const batches = this.createBatches(templates);
logger.info(`Processing ${templates.length} templates in ${batches.length} batches`);
for (let i = 0; i < batches.length; i++) {
const batch = batches[i];
const batchNum = i + 1;
try {
progressCallback?.(`Processing batch ${batchNum}/${batches.length}`, i * this.batchSize, templates.length);
// Process this batch
const batchResults = await this.processBatch(batch, `batch_${batchNum}`);
// Merge results
for (const result of batchResults) {
results.set(result.templateId, result);
}
logger.info(`Completed batch ${batchNum}/${batches.length}: ${batchResults.length} results`);
progressCallback?.(`Completed batch ${batchNum}/${batches.length}`, Math.min((i + 1) * this.batchSize, templates.length), templates.length);
} catch (error) {
logger.error(`Error processing batch ${batchNum}:`, error);
// Continue with next batch
}
}
logger.info(`Batch processing complete: ${results.size} results`);
return results;
}
/**
* Process a single batch
*/
private async processBatch(templates: MetadataRequest[], batchName: string): Promise<MetadataResult[]> {
// Create JSONL file
const inputFile = await this.createBatchFile(templates, batchName);
try {
// Upload file to OpenAI
const uploadedFile = await this.uploadFile(inputFile);
// Create batch job
const batchJob = await this.createBatchJob(uploadedFile.id);
// Monitor job until completion
const completedJob = await this.monitorBatchJob(batchJob.id);
// Retrieve and parse results
const results = await this.retrieveResults(completedJob);
// Cleanup
await this.cleanup(inputFile, uploadedFile.id, completedJob.output_file_id);
return results;
} catch (error) {
// Cleanup on error
try {
fs.unlinkSync(inputFile);
} catch {}
throw error;
}
}
/**
* Create batches from templates
*/
private createBatches(templates: MetadataRequest[]): MetadataRequest[][] {
const batches: MetadataRequest[][] = [];
for (let i = 0; i < templates.length; i += this.batchSize) {
batches.push(templates.slice(i, i + this.batchSize));
}
return batches;
}
/**
* Create JSONL batch file
*/
private async createBatchFile(templates: MetadataRequest[], batchName: string): Promise<string> {
const filename = path.join(this.outputDir, `${batchName}_${Date.now()}.jsonl`);
const stream = fs.createWriteStream(filename);
for (const template of templates) {
const request = this.generator.createBatchRequest(template);
stream.write(JSON.stringify(request) + '\n');
}
stream.end();
// Wait for stream to finish
await new Promise<void>((resolve, reject) => {
stream.on('finish', () => resolve());
stream.on('error', reject);
});
logger.debug(`Created batch file: ${filename} with ${templates.length} requests`);
return filename;
}
/**
* Upload file to OpenAI
*/
private async uploadFile(filepath: string): Promise<any> {
const file = fs.createReadStream(filepath);
const uploadedFile = await this.client.files.create({
file,
purpose: 'batch'
});
logger.debug(`Uploaded file: ${uploadedFile.id}`);
return uploadedFile;
}
/**
* Create batch job
*/
private async createBatchJob(fileId: string): Promise<any> {
const batchJob = await this.client.batches.create({
input_file_id: fileId,
endpoint: '/v1/chat/completions',
completion_window: '24h'
});
logger.info(`Created batch job: ${batchJob.id}`);
return batchJob;
}
/**
* Monitor batch job with exponential backoff
*/
private async monitorBatchJob(batchId: string): Promise<any> {
const waitTimes = [60, 120, 300, 600, 900, 1800]; // Progressive wait times in seconds
let waitIndex = 0;
let attempts = 0;
const maxAttempts = 100; // Safety limit
while (attempts < maxAttempts) {
const batchJob = await this.client.batches.retrieve(batchId);
logger.debug(`Batch ${batchId} status: ${batchJob.status} (attempt ${attempts + 1})`);
if (batchJob.status === 'completed') {
logger.info(`Batch job ${batchId} completed successfully`);
return batchJob;
}
if (['failed', 'expired', 'cancelled'].includes(batchJob.status)) {
throw new Error(`Batch job failed with status: ${batchJob.status}`);
}
// Wait before next check
const waitTime = waitTimes[Math.min(waitIndex, waitTimes.length - 1)];
logger.debug(`Waiting ${waitTime} seconds before next check...`);
await this.sleep(waitTime * 1000);
waitIndex = Math.min(waitIndex + 1, waitTimes.length - 1);
attempts++;
}
throw new Error(`Batch job monitoring timed out after ${maxAttempts} attempts`);
}
/**
* Retrieve and parse results
*/
private async retrieveResults(batchJob: any): Promise<MetadataResult[]> {
if (!batchJob.output_file_id) {
throw new Error('No output file available for batch job');
}
// Download result file
const fileResponse = await this.client.files.content(batchJob.output_file_id);
const fileContent = await fileResponse.text();
// Parse JSONL results
const results: MetadataResult[] = [];
const lines = fileContent.trim().split('\n');
for (const line of lines) {
if (!line) continue;
try {
const result = JSON.parse(line);
const parsed = this.generator.parseResult(result);
results.push(parsed);
} catch (error) {
logger.error('Error parsing result line:', error);
}
}
logger.info(`Retrieved ${results.length} results from batch job`);
return results;
}
/**
* Cleanup temporary files
*/
private async cleanup(localFile: string, inputFileId: string, outputFileId?: string): Promise<void> {
// Delete local file
try {
fs.unlinkSync(localFile);
logger.debug(`Deleted local file: ${localFile}`);
} catch (error) {
logger.warn(`Failed to delete local file: ${localFile}`, error);
}
// Delete uploaded files from OpenAI
try {
await this.client.files.del(inputFileId);
logger.debug(`Deleted input file from OpenAI: ${inputFileId}`);
} catch (error) {
logger.warn(`Failed to delete input file from OpenAI: ${inputFileId}`, error);
}
if (outputFileId) {
try {
await this.client.files.del(outputFileId);
logger.debug(`Deleted output file from OpenAI: ${outputFileId}`);
} catch (error) {
logger.warn(`Failed to delete output file from OpenAI: ${outputFileId}`, error);
}
}
}
/**
* Sleep helper
*/
private sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}

View File

@@ -0,0 +1,287 @@
import OpenAI from 'openai';
import { z } from 'zod';
import { logger } from '../utils/logger';
import { TemplateWorkflow, TemplateDetail } from './template-fetcher';
// Metadata schema using Zod for validation
export const TemplateMetadataSchema = z.object({
categories: z.array(z.string()).max(5).describe('Main categories (max 5)'),
complexity: z.enum(['simple', 'medium', 'complex']).describe('Implementation complexity'),
use_cases: z.array(z.string()).max(5).describe('Primary use cases'),
estimated_setup_minutes: z.number().min(5).max(480).describe('Setup time in minutes'),
required_services: z.array(z.string()).describe('External services needed'),
key_features: z.array(z.string()).max(5).describe('Main capabilities'),
target_audience: z.array(z.string()).max(3).describe('Target users')
});
export type TemplateMetadata = z.infer<typeof TemplateMetadataSchema>;
export interface MetadataRequest {
templateId: number;
name: string;
description?: string;
nodes: string[];
workflow?: any;
}
export interface MetadataResult {
templateId: number;
metadata: TemplateMetadata;
error?: string;
}
export class MetadataGenerator {
private client: OpenAI;
private model: string;
constructor(apiKey: string, model: string = 'gpt-4o-mini') {
this.client = new OpenAI({ apiKey });
this.model = model;
}
/**
* Generate the JSON schema for OpenAI structured outputs
*/
private getJsonSchema() {
return {
name: 'template_metadata',
strict: true,
schema: {
type: 'object',
properties: {
categories: {
type: 'array',
items: { type: 'string' },
maxItems: 5,
description: 'Main categories like automation, integration, data processing'
},
complexity: {
type: 'string',
enum: ['simple', 'medium', 'complex'],
description: 'Implementation complexity level'
},
use_cases: {
type: 'array',
items: { type: 'string' },
maxItems: 5,
description: 'Primary use cases for this template'
},
estimated_setup_minutes: {
type: 'number',
minimum: 5,
maximum: 480,
description: 'Estimated setup time in minutes'
},
required_services: {
type: 'array',
items: { type: 'string' },
description: 'External services or APIs required'
},
key_features: {
type: 'array',
items: { type: 'string' },
maxItems: 5,
description: 'Main capabilities or features'
},
target_audience: {
type: 'array',
items: { type: 'string' },
maxItems: 3,
description: 'Target users like developers, marketers, analysts'
}
},
required: [
'categories',
'complexity',
'use_cases',
'estimated_setup_minutes',
'required_services',
'key_features',
'target_audience'
],
additionalProperties: false
}
};
}
/**
* Create a batch request for a single template
*/
createBatchRequest(template: MetadataRequest): any {
// Extract node information for analysis
const nodesSummary = this.summarizeNodes(template.nodes);
// Build context for the AI
const context = [
`Template: ${template.name}`,
template.description ? `Description: ${template.description}` : '',
`Nodes Used (${template.nodes.length}): ${nodesSummary}`,
template.workflow ? `Workflow has ${template.workflow.nodes?.length || 0} nodes with ${Object.keys(template.workflow.connections || {}).length} connections` : ''
].filter(Boolean).join('\n');
return {
custom_id: `template-${template.templateId}`,
method: 'POST',
url: '/v1/chat/completions',
body: {
model: this.model,
temperature: 0.1,
max_tokens: 500,
response_format: {
type: 'json_schema',
json_schema: this.getJsonSchema()
},
messages: [
{
role: 'system',
content: `You are an n8n workflow expert analyzing templates to extract structured metadata.
Analyze the provided template information and extract:
- Categories: Classify into relevant categories (automation, integration, data, communication, etc.)
- Complexity: Assess as simple (1-3 nodes), medium (4-8 nodes), or complex (9+ nodes or advanced logic)
- Use cases: Identify primary business use cases
- Setup time: Estimate realistic setup time based on complexity and required configurations
- Required services: List any external services, APIs, or accounts needed
- Key features: Highlight main capabilities or benefits
- Target audience: Identify who would benefit most (developers, marketers, ops teams, etc.)
Be concise and practical in your analysis.`
},
{
role: 'user',
content: context
}
]
}
};
}
/**
* Summarize nodes for better context
*/
private summarizeNodes(nodes: string[]): string {
// Group similar nodes
const nodeGroups: Record<string, number> = {};
for (const node of nodes) {
// Extract base node name (remove package prefix)
const baseName = node.split('.').pop() || node;
// Group by category
if (baseName.includes('webhook') || baseName.includes('http')) {
nodeGroups['HTTP/Webhooks'] = (nodeGroups['HTTP/Webhooks'] || 0) + 1;
} else if (baseName.includes('database') || baseName.includes('postgres') || baseName.includes('mysql')) {
nodeGroups['Database'] = (nodeGroups['Database'] || 0) + 1;
} else if (baseName.includes('slack') || baseName.includes('email') || baseName.includes('gmail')) {
nodeGroups['Communication'] = (nodeGroups['Communication'] || 0) + 1;
} else if (baseName.includes('ai') || baseName.includes('openai') || baseName.includes('langchain')) {
nodeGroups['AI/ML'] = (nodeGroups['AI/ML'] || 0) + 1;
} else if (baseName.includes('sheet') || baseName.includes('csv') || baseName.includes('excel')) {
nodeGroups['Spreadsheets'] = (nodeGroups['Spreadsheets'] || 0) + 1;
} else {
const cleanName = baseName.replace(/Trigger$/, '').replace(/Node$/, '');
nodeGroups[cleanName] = (nodeGroups[cleanName] || 0) + 1;
}
}
// Format summary
const summary = Object.entries(nodeGroups)
.sort((a, b) => b[1] - a[1])
.slice(0, 10) // Top 10 groups
.map(([name, count]) => count > 1 ? `${name} (${count})` : name)
.join(', ');
return summary;
}
/**
* Parse a batch result
*/
parseResult(result: any): MetadataResult {
try {
if (result.error) {
return {
templateId: parseInt(result.custom_id.replace('template-', '')),
metadata: this.getDefaultMetadata(),
error: result.error.message
};
}
const response = result.response;
if (!response?.body?.choices?.[0]?.message?.content) {
throw new Error('Invalid response structure');
}
const content = response.body.choices[0].message.content;
const metadata = JSON.parse(content);
// Validate with Zod
const validated = TemplateMetadataSchema.parse(metadata);
return {
templateId: parseInt(result.custom_id.replace('template-', '')),
metadata: validated
};
} catch (error) {
logger.error(`Error parsing result for ${result.custom_id}:`, error);
return {
templateId: parseInt(result.custom_id.replace('template-', '')),
metadata: this.getDefaultMetadata(),
error: error instanceof Error ? error.message : 'Unknown error'
};
}
}
/**
* Get default metadata for fallback
*/
private getDefaultMetadata(): TemplateMetadata {
return {
categories: ['automation'],
complexity: 'medium',
use_cases: ['Process automation'],
estimated_setup_minutes: 30,
required_services: [],
key_features: ['Workflow automation'],
target_audience: ['developers']
};
}
/**
* Generate metadata for a single template (for testing)
*/
async generateSingle(template: MetadataRequest): Promise<TemplateMetadata> {
try {
const completion = await this.client.chat.completions.create({
model: this.model,
temperature: 0.1,
max_tokens: 500,
response_format: {
type: 'json_schema',
json_schema: this.getJsonSchema()
} as any,
messages: [
{
role: 'system',
content: `You are an n8n workflow expert analyzing templates to extract structured metadata.`
},
{
role: 'user',
content: `Analyze this template: ${template.name}\nNodes: ${template.nodes.join(', ')}`
}
]
});
const content = completion.choices[0].message.content;
if (!content) {
throw new Error('No content in response');
}
const metadata = JSON.parse(content);
return TemplateMetadataSchema.parse(metadata);
} catch (error) {
logger.error('Error generating single metadata:', error);
return this.getDefaultMetadata();
}
}
}

View File

@@ -22,6 +22,8 @@ export interface StoredTemplate {
updated_at: string;
url: string;
scraped_at: string;
metadata_json?: string; // Structured metadata from OpenAI (JSON string)
metadata_generated_at?: string; // When metadata was generated
}
export class TemplateRepository {
@@ -536,4 +538,91 @@ export class TemplateRepository {
// Non-critical error - search will fallback to LIKE
}
}
/**
* Update metadata for a template
*/
updateTemplateMetadata(templateId: number, metadata: any): void {
const stmt = this.db.prepare(`
UPDATE templates
SET metadata_json = ?, metadata_generated_at = CURRENT_TIMESTAMP
WHERE id = ?
`);
stmt.run(JSON.stringify(metadata), templateId);
logger.debug(`Updated metadata for template ${templateId}`);
}
/**
* Batch update metadata for multiple templates
*/
batchUpdateMetadata(metadataMap: Map<number, any>): void {
const stmt = this.db.prepare(`
UPDATE templates
SET metadata_json = ?, metadata_generated_at = CURRENT_TIMESTAMP
WHERE id = ?
`);
// Simple approach - just run the updates
// Most operations are fast enough without explicit transactions
for (const [templateId, metadata] of metadataMap.entries()) {
stmt.run(JSON.stringify(metadata), templateId);
}
logger.info(`Updated metadata for ${metadataMap.size} templates`);
}
/**
* Get templates without metadata
*/
getTemplatesWithoutMetadata(limit: number = 100): StoredTemplate[] {
const stmt = this.db.prepare(`
SELECT * FROM templates
WHERE metadata_json IS NULL OR metadata_generated_at IS NULL
ORDER BY views DESC
LIMIT ?
`);
return stmt.all(limit) as StoredTemplate[];
}
/**
* Get templates with outdated metadata (older than days specified)
*/
getTemplatesWithOutdatedMetadata(daysOld: number = 30, limit: number = 100): StoredTemplate[] {
const stmt = this.db.prepare(`
SELECT * FROM templates
WHERE metadata_generated_at < datetime('now', '-' || ? || ' days')
ORDER BY views DESC
LIMIT ?
`);
return stmt.all(daysOld, limit) as StoredTemplate[];
}
/**
* Get template metadata stats
*/
getMetadataStats(): {
total: number;
withMetadata: number;
withoutMetadata: number;
outdated: number;
} {
const total = this.getTemplateCount();
const withMetadata = (this.db.prepare(`
SELECT COUNT(*) as count FROM templates
WHERE metadata_json IS NOT NULL
`).get() as { count: number }).count;
const withoutMetadata = total - withMetadata;
const outdated = (this.db.prepare(`
SELECT COUNT(*) as count FROM templates
WHERE metadata_generated_at < datetime('now', '-30 days')
`).get() as { count: number }).count;
return { total, withMetadata, withoutMetadata, outdated };
}
}

View File

@@ -0,0 +1,203 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { MetadataGenerator, TemplateMetadataSchema, MetadataRequest } from '../../../src/templates/metadata-generator';
// Mock OpenAI
vi.mock('openai', () => {
return {
default: vi.fn().mockImplementation(() => ({
chat: {
completions: {
create: vi.fn()
}
}
}))
};
});
describe('MetadataGenerator', () => {
let generator: MetadataGenerator;
beforeEach(() => {
generator = new MetadataGenerator('test-api-key', 'gpt-4o-mini');
});
describe('createBatchRequest', () => {
it('should create a valid batch request', () => {
const template: MetadataRequest = {
templateId: 123,
name: 'Test Workflow',
description: 'A test workflow',
nodes: ['n8n-nodes-base.webhook', 'n8n-nodes-base.httpRequest', 'n8n-nodes-base.slack']
};
const request = generator.createBatchRequest(template);
expect(request.custom_id).toBe('template-123');
expect(request.method).toBe('POST');
expect(request.url).toBe('/v1/chat/completions');
expect(request.body.model).toBe('gpt-4o-mini');
expect(request.body.response_format.type).toBe('json_schema');
expect(request.body.response_format.json_schema.strict).toBe(true);
expect(request.body.messages).toHaveLength(2);
});
it('should summarize nodes effectively', () => {
const template: MetadataRequest = {
templateId: 456,
name: 'Complex Workflow',
nodes: [
'n8n-nodes-base.webhook',
'n8n-nodes-base.httpRequest',
'n8n-nodes-base.httpRequest',
'n8n-nodes-base.postgres',
'n8n-nodes-base.slack',
'@n8n/n8n-nodes-langchain.agent'
]
};
const request = generator.createBatchRequest(template);
const userMessage = request.body.messages[1].content;
expect(userMessage).toContain('Complex Workflow');
expect(userMessage).toContain('Nodes Used (6)');
expect(userMessage).toContain('HTTP/Webhooks');
});
});
describe('parseResult', () => {
it('should parse a successful result', () => {
const mockResult = {
custom_id: 'template-789',
response: {
body: {
choices: [{
message: {
content: JSON.stringify({
categories: ['automation', 'integration'],
complexity: 'medium',
use_cases: ['API integration', 'Data sync'],
estimated_setup_minutes: 30,
required_services: ['Slack API'],
key_features: ['Webhook triggers', 'API calls'],
target_audience: ['developers']
})
},
finish_reason: 'stop'
}]
}
}
};
const result = generator.parseResult(mockResult);
expect(result.templateId).toBe(789);
expect(result.metadata.categories).toEqual(['automation', 'integration']);
expect(result.metadata.complexity).toBe('medium');
expect(result.error).toBeUndefined();
});
it('should handle error results', () => {
const mockResult = {
custom_id: 'template-999',
error: {
message: 'API error'
}
};
const result = generator.parseResult(mockResult);
expect(result.templateId).toBe(999);
expect(result.error).toBe('API error');
expect(result.metadata).toBeDefined();
expect(result.metadata.complexity).toBe('medium'); // Default metadata
});
it('should handle malformed responses', () => {
const mockResult = {
custom_id: 'template-111',
response: {
body: {
choices: [{
message: {
content: 'not valid json'
},
finish_reason: 'stop'
}]
}
}
};
const result = generator.parseResult(mockResult);
expect(result.templateId).toBe(111);
expect(result.error).toContain('Unexpected token');
expect(result.metadata).toBeDefined();
});
});
describe('TemplateMetadataSchema', () => {
it('should validate correct metadata', () => {
const validMetadata = {
categories: ['automation', 'integration'],
complexity: 'simple' as const,
use_cases: ['API calls', 'Data processing'],
estimated_setup_minutes: 15,
required_services: [],
key_features: ['Fast processing'],
target_audience: ['developers']
};
const result = TemplateMetadataSchema.safeParse(validMetadata);
expect(result.success).toBe(true);
});
it('should reject invalid complexity', () => {
const invalidMetadata = {
categories: ['automation'],
complexity: 'very-hard', // Invalid
use_cases: ['API calls'],
estimated_setup_minutes: 15,
required_services: [],
key_features: ['Fast'],
target_audience: ['developers']
};
const result = TemplateMetadataSchema.safeParse(invalidMetadata);
expect(result.success).toBe(false);
});
it('should enforce array limits', () => {
const tooManyCategories = {
categories: ['a', 'b', 'c', 'd', 'e', 'f'], // Max 5
complexity: 'simple' as const,
use_cases: ['API calls'],
estimated_setup_minutes: 15,
required_services: [],
key_features: ['Fast'],
target_audience: ['developers']
};
const result = TemplateMetadataSchema.safeParse(tooManyCategories);
expect(result.success).toBe(false);
});
it('should enforce time limits', () => {
const tooLongSetup = {
categories: ['automation'],
complexity: 'complex' as const,
use_cases: ['API calls'],
estimated_setup_minutes: 500, // Max 480
required_services: [],
key_features: ['Fast'],
target_audience: ['developers']
};
const result = TemplateMetadataSchema.safeParse(tooLongSetup);
expect(result.success).toBe(false);
});
});
});