feat: add template metadata generation and smart discovery

- Implement OpenAI batch API integration for metadata generation
- Add search_templates_by_metadata tool with advanced filtering
- Enhance list_templates to include descriptions and optional metadata
- Generate metadata for 2,534 templates (97.5% coverage)
- Update README with Template Tools section and enhanced Claude setup
- Add comprehensive documentation for metadata system

Enables intelligent template discovery through:
- Complexity levels (simple/medium/complex)
- Setup time estimates (5-480 minutes)
- Target audience filtering (developers/marketers/analysts)
- Required services detection
- Category and use case classification

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
czlonkowski
2025-09-15 00:18:53 +02:00
parent 6e24da722b
commit 1e586c0b23
15 changed files with 1159 additions and 134 deletions

View File

@@ -40,7 +40,7 @@ export class BatchProcessor {
}
/**
* Process templates in batches
* Process templates in batches (parallel submission)
*/
async processTemplates(
templates: MetadataRequest[],
@@ -51,26 +51,62 @@ export class BatchProcessor {
logger.info(`Processing ${templates.length} templates in ${batches.length} batches`);
// Submit all batches in parallel
console.log(`\n📤 Submitting ${batches.length} batch${batches.length > 1 ? 'es' : ''} to OpenAI...`);
const batchJobs: Array<{ batchNum: number; jobPromise: Promise<any>; templates: MetadataRequest[] }> = [];
for (let i = 0; i < batches.length; i++) {
const batch = batches[i];
const batchNum = i + 1;
try {
progressCallback?.(`Processing batch ${batchNum}/${batches.length}`, i * this.batchSize, templates.length);
progressCallback?.(`Submitting batch ${batchNum}/${batches.length}`, i * this.batchSize, templates.length);
// Process this batch
const batchResults = await this.processBatch(batch, `batch_${batchNum}`);
// Submit batch (don't wait for completion)
const jobPromise = this.submitBatch(batch, `batch_${batchNum}`);
batchJobs.push({ batchNum, jobPromise, templates: batch });
// Merge results
for (const result of batchResults) {
results.set(result.templateId, result);
}
console.log(` 📨 Submitted batch ${batchNum}/${batches.length} (${batch.length} templates)`);
} catch (error) {
logger.error(`Error submitting batch ${batchNum}:`, error);
console.error(` ❌ Failed to submit batch ${batchNum}`);
}
}
console.log(`\n⏳ All batches submitted. Waiting for completion...`);
console.log(` (Batches process in parallel - this is much faster than sequential processing)`);
// Process all batches in parallel and collect results as they complete
const batchPromises = batchJobs.map(async ({ batchNum, jobPromise, templates: batchTemplates }) => {
try {
const completedJob = await jobPromise;
console.log(`\n📦 Retrieving results for batch ${batchNum}/${batches.length}...`);
logger.info(`Completed batch ${batchNum}/${batches.length}: ${batchResults.length} results`);
progressCallback?.(`Completed batch ${batchNum}/${batches.length}`, Math.min((i + 1) * this.batchSize, templates.length), templates.length);
// Retrieve and parse results
const batchResults = await this.retrieveResults(completedJob);
logger.info(`Retrieved ${batchResults.length} results from batch ${batchNum}`);
progressCallback?.(`Retrieved batch ${batchNum}/${batches.length}`,
Math.min(batchNum * this.batchSize, templates.length), templates.length);
return { batchNum, results: batchResults };
} catch (error) {
logger.error(`Error processing batch ${batchNum}:`, error);
// Continue with next batch
console.error(` ❌ Batch ${batchNum} failed:`, error);
return { batchNum, results: [] };
}
});
// Wait for all batches to complete
const allBatchResults = await Promise.all(batchPromises);
// Merge all results
for (const { batchNum, results: batchResults } of allBatchResults) {
for (const result of batchResults) {
results.set(result.templateId, result);
}
if (batchResults.length > 0) {
console.log(` ✅ Merged ${batchResults.length} results from batch ${batchNum}`);
}
}
@@ -78,6 +114,51 @@ export class BatchProcessor {
return results;
}
/**
* Submit a batch without waiting for completion
*/
private async submitBatch(templates: MetadataRequest[], batchName: string): Promise<any> {
// Create JSONL file
const inputFile = await this.createBatchFile(templates, batchName);
try {
// Upload file to OpenAI
const uploadedFile = await this.uploadFile(inputFile);
// Create batch job
const batchJob = await this.createBatchJob(uploadedFile.id);
// Start monitoring (returns promise that resolves when complete)
const monitoringPromise = this.monitorBatchJob(batchJob.id);
// Clean up input file immediately
try {
fs.unlinkSync(inputFile);
} catch {}
// Store file IDs for cleanup later
monitoringPromise.then(async (completedJob) => {
// Cleanup uploaded files after completion
try {
await this.client.files.del(uploadedFile.id);
if (completedJob.output_file_id) {
// Note: We'll delete output file after retrieving results
}
} catch (error) {
logger.warn(`Failed to cleanup files for batch ${batchName}`, error);
}
});
return monitoringPromise;
} catch (error) {
// Cleanup on error
try {
fs.unlinkSync(inputFile);
} catch {}
throw error;
}
}
/**
* Process a single batch
*/
@@ -180,17 +261,33 @@ export class BatchProcessor {
* Monitor batch job with exponential backoff
*/
private async monitorBatchJob(batchId: string): Promise<any> {
const waitTimes = [60, 120, 300, 600, 900, 1800]; // Progressive wait times in seconds
// Start with shorter wait times for better UX
const waitTimes = [30, 60, 120, 300, 600, 900, 1800]; // Progressive wait times in seconds
let waitIndex = 0;
let attempts = 0;
const maxAttempts = 100; // Safety limit
const startTime = Date.now();
let lastStatus = '';
while (attempts < maxAttempts) {
const batchJob = await this.client.batches.retrieve(batchId);
// Only log if status changed
if (batchJob.status !== lastStatus) {
const elapsedMinutes = Math.floor((Date.now() - startTime) / 60000);
const statusSymbol = batchJob.status === 'in_progress' ? '⚙️' :
batchJob.status === 'finalizing' ? '📦' :
batchJob.status === 'validating' ? '🔍' : '⏳';
console.log(` ${statusSymbol} Batch ${batchId.slice(-8)}: ${batchJob.status} (${elapsedMinutes} min)`);
lastStatus = batchJob.status;
}
logger.debug(`Batch ${batchId} status: ${batchJob.status} (attempt ${attempts + 1})`);
if (batchJob.status === 'completed') {
const elapsedMinutes = Math.floor((Date.now() - startTime) / 60000);
console.log(` ✅ Batch ${batchId.slice(-8)} completed in ${elapsedMinutes} minutes`);
logger.info(`Batch job ${batchId} completed successfully`);
return batchJob;
}

View File

@@ -125,8 +125,8 @@ export class MetadataGenerator {
url: '/v1/chat/completions',
body: {
model: this.model,
temperature: 0.1,
max_tokens: 500,
temperature: 1,
max_completion_tokens: 1000,
response_format: {
type: 'json_schema',
json_schema: this.getJsonSchema()
@@ -134,18 +134,7 @@ export class MetadataGenerator {
messages: [
{
role: 'system',
content: `You are an n8n workflow expert analyzing templates to extract structured metadata.
Analyze the provided template information and extract:
- Categories: Classify into relevant categories (automation, integration, data, communication, etc.)
- Complexity: Assess as simple (1-3 nodes), medium (4-8 nodes), or complex (9+ nodes or advanced logic)
- Use cases: Identify primary business use cases
- Setup time: Estimate realistic setup time based on complexity and required configurations
- Required services: List any external services, APIs, or accounts needed
- Key features: Highlight main capabilities or benefits
- Target audience: Identify who would benefit most (developers, marketers, ops teams, etc.)
Be concise and practical in your analysis.`
content: `Analyze n8n workflow templates and extract metadata. Be concise.`
},
{
role: 'user',
@@ -254,8 +243,8 @@ export class MetadataGenerator {
try {
const completion = await this.client.chat.completions.create({
model: this.model,
temperature: 0.1,
max_tokens: 500,
temperature: 1,
max_completion_tokens: 1000,
response_format: {
type: 'json_schema',
json_schema: this.getJsonSchema()
@@ -263,17 +252,18 @@ export class MetadataGenerator {
messages: [
{
role: 'system',
content: `You are an n8n workflow expert analyzing templates to extract structured metadata.`
content: `Analyze n8n workflow templates and extract metadata. Be concise.`
},
{
role: 'user',
content: `Analyze this template: ${template.name}\nNodes: ${template.nodes.join(', ')}`
content: `Template: ${template.name}\nNodes: ${template.nodes.slice(0, 10).join(', ')}`
}
]
});
const content = completion.choices[0].message.content;
if (!content) {
logger.error('No content in OpenAI response');
throw new Error('No content in response');
}

View File

@@ -625,4 +625,173 @@ export class TemplateRepository {
return { total, withMetadata, withoutMetadata, outdated };
}
/**
* Search templates by metadata fields
*/
searchTemplatesByMetadata(filters: {
category?: string;
complexity?: 'simple' | 'medium' | 'complex';
maxSetupMinutes?: number;
minSetupMinutes?: number;
requiredService?: string;
targetAudience?: string;
}, limit: number = 20, offset: number = 0): StoredTemplate[] {
const conditions: string[] = ['metadata_json IS NOT NULL'];
const params: any[] = [];
// Build WHERE conditions based on filters
if (filters.category) {
conditions.push("json_extract(metadata_json, '$.categories') LIKE ?");
params.push(`%"${filters.category}"%`);
}
if (filters.complexity) {
conditions.push("json_extract(metadata_json, '$.complexity') = ?");
params.push(filters.complexity);
}
if (filters.maxSetupMinutes !== undefined) {
conditions.push("CAST(json_extract(metadata_json, '$.estimated_setup_minutes') AS INTEGER) <= ?");
params.push(filters.maxSetupMinutes);
}
if (filters.minSetupMinutes !== undefined) {
conditions.push("CAST(json_extract(metadata_json, '$.estimated_setup_minutes') AS INTEGER) >= ?");
params.push(filters.minSetupMinutes);
}
if (filters.requiredService) {
conditions.push("json_extract(metadata_json, '$.required_services') LIKE ?");
params.push(`%"${filters.requiredService}"%`);
}
if (filters.targetAudience) {
conditions.push("json_extract(metadata_json, '$.target_audience') LIKE ?");
params.push(`%"${filters.targetAudience}"%`);
}
const query = `
SELECT * FROM templates
WHERE ${conditions.join(' AND ')}
ORDER BY views DESC, created_at DESC
LIMIT ? OFFSET ?
`;
params.push(limit, offset);
const results = this.db.prepare(query).all(...params) as StoredTemplate[];
logger.debug(`Metadata search found ${results.length} results`, { filters, count: results.length });
return results.map(t => this.decompressWorkflow(t));
}
/**
* Get count for metadata search results
*/
getMetadataSearchCount(filters: {
category?: string;
complexity?: 'simple' | 'medium' | 'complex';
maxSetupMinutes?: number;
minSetupMinutes?: number;
requiredService?: string;
targetAudience?: string;
}): number {
const conditions: string[] = ['metadata_json IS NOT NULL'];
const params: any[] = [];
if (filters.category) {
conditions.push("json_extract(metadata_json, '$.categories') LIKE ?");
params.push(`%"${filters.category}"%`);
}
if (filters.complexity) {
conditions.push("json_extract(metadata_json, '$.complexity') = ?");
params.push(filters.complexity);
}
if (filters.maxSetupMinutes !== undefined) {
conditions.push("CAST(json_extract(metadata_json, '$.estimated_setup_minutes') AS INTEGER) <= ?");
params.push(filters.maxSetupMinutes);
}
if (filters.minSetupMinutes !== undefined) {
conditions.push("CAST(json_extract(metadata_json, '$.estimated_setup_minutes') AS INTEGER) >= ?");
params.push(filters.minSetupMinutes);
}
if (filters.requiredService) {
conditions.push("json_extract(metadata_json, '$.required_services') LIKE ?");
params.push(`%"${filters.requiredService}"%`);
}
if (filters.targetAudience) {
conditions.push("json_extract(metadata_json, '$.target_audience') LIKE ?");
params.push(`%"${filters.targetAudience}"%`);
}
const query = `SELECT COUNT(*) as count FROM templates WHERE ${conditions.join(' AND ')}`;
const result = this.db.prepare(query).get(...params) as { count: number };
return result.count;
}
/**
* Get unique categories from metadata
*/
getAvailableCategories(): string[] {
const results = this.db.prepare(`
SELECT DISTINCT json_extract(value, '$') as category
FROM templates, json_each(json_extract(metadata_json, '$.categories'))
WHERE metadata_json IS NOT NULL
ORDER BY category
`).all() as { category: string }[];
return results.map(r => r.category);
}
/**
* Get unique target audiences from metadata
*/
getAvailableTargetAudiences(): string[] {
const results = this.db.prepare(`
SELECT DISTINCT json_extract(value, '$') as audience
FROM templates, json_each(json_extract(metadata_json, '$.target_audience'))
WHERE metadata_json IS NOT NULL
ORDER BY audience
`).all() as { audience: string }[];
return results.map(r => r.audience);
}
/**
* Get templates by category with metadata
*/
getTemplatesByCategory(category: string, limit: number = 10, offset: number = 0): StoredTemplate[] {
const query = `
SELECT * FROM templates
WHERE metadata_json IS NOT NULL
AND json_extract(metadata_json, '$.categories') LIKE ?
ORDER BY views DESC, created_at DESC
LIMIT ? OFFSET ?
`;
const results = this.db.prepare(query).all(`%"${category}"%`, limit, offset) as StoredTemplate[];
return results.map(t => this.decompressWorkflow(t));
}
/**
* Get templates by complexity level
*/
getTemplatesByComplexity(complexity: 'simple' | 'medium' | 'complex', limit: number = 10, offset: number = 0): StoredTemplate[] {
const query = `
SELECT * FROM templates
WHERE metadata_json IS NOT NULL
AND json_extract(metadata_json, '$.complexity') = ?
ORDER BY views DESC, created_at DESC
LIMIT ? OFFSET ?
`;
const results = this.db.prepare(query).all(complexity, limit, offset) as StoredTemplate[];
return results.map(t => this.decompressWorkflow(t));
}
}

View File

@@ -15,6 +15,15 @@ export interface TemplateInfo {
views: number;
created: string;
url: string;
metadata?: {
categories: string[];
complexity: 'simple' | 'medium' | 'complex';
use_cases: string[];
estimated_setup_minutes: number;
required_services: string[];
key_features: string[];
target_audience: string[];
};
}
export interface TemplateWithWorkflow extends TemplateInfo {
@@ -32,8 +41,18 @@ export interface PaginatedResponse<T> {
export interface TemplateMinimal {
id: number;
name: string;
description: string;
views: number;
nodeCount: number;
metadata?: {
categories: string[];
complexity: 'simple' | 'medium' | 'complex';
use_cases: string[];
estimated_setup_minutes: number;
required_services: string[];
key_features: string[];
target_audience: string[];
};
}
export class TemplateService {
@@ -137,16 +156,30 @@ export class TemplateService {
/**
* List all templates with minimal data
*/
async listTemplates(limit: number = 10, offset: number = 0, sortBy: 'views' | 'created_at' | 'name' = 'views'): Promise<PaginatedResponse<TemplateMinimal>> {
async listTemplates(limit: number = 10, offset: number = 0, sortBy: 'views' | 'created_at' | 'name' = 'views', includeMetadata: boolean = false): Promise<PaginatedResponse<TemplateMinimal>> {
const templates = this.repository.getAllTemplates(limit, offset, sortBy);
const total = this.repository.getTemplateCount();
const items = templates.map(t => ({
id: t.id,
name: t.name,
views: t.views,
nodeCount: JSON.parse(t.nodes_used).length
}));
const items = templates.map(t => {
const item: TemplateMinimal = {
id: t.id,
name: t.name,
description: t.description, // Always include description
views: t.views,
nodeCount: JSON.parse(t.nodes_used).length
};
// Optionally include metadata
if (includeMetadata && t.metadata_json) {
try {
item.metadata = JSON.parse(t.metadata_json);
} catch (error) {
logger.warn(`Failed to parse metadata for template ${t.id}:`, error);
}
}
return item;
});
return {
items,
@@ -175,6 +208,87 @@ export class TemplateService {
];
}
/**
* Search templates by metadata filters
*/
async searchTemplatesByMetadata(
filters: {
category?: string;
complexity?: 'simple' | 'medium' | 'complex';
maxSetupMinutes?: number;
minSetupMinutes?: number;
requiredService?: string;
targetAudience?: string;
},
limit: number = 20,
offset: number = 0
): Promise<PaginatedResponse<TemplateInfo>> {
const templates = this.repository.searchTemplatesByMetadata(filters, limit, offset);
const total = this.repository.getMetadataSearchCount(filters);
return {
items: templates.map(this.formatTemplateInfo.bind(this)),
total,
limit,
offset,
hasMore: offset + limit < total
};
}
/**
* Get available categories from template metadata
*/
async getAvailableCategories(): Promise<string[]> {
return this.repository.getAvailableCategories();
}
/**
* Get available target audiences from template metadata
*/
async getAvailableTargetAudiences(): Promise<string[]> {
return this.repository.getAvailableTargetAudiences();
}
/**
* Get templates by category
*/
async getTemplatesByCategory(
category: string,
limit: number = 10,
offset: number = 0
): Promise<PaginatedResponse<TemplateInfo>> {
const templates = this.repository.getTemplatesByCategory(category, limit, offset);
const total = this.repository.getMetadataSearchCount({ category });
return {
items: templates.map(this.formatTemplateInfo.bind(this)),
total,
limit,
offset,
hasMore: offset + limit < total
};
}
/**
* Get templates by complexity level
*/
async getTemplatesByComplexity(
complexity: 'simple' | 'medium' | 'complex',
limit: number = 10,
offset: number = 0
): Promise<PaginatedResponse<TemplateInfo>> {
const templates = this.repository.getTemplatesByComplexity(complexity, limit, offset);
const total = this.repository.getMetadataSearchCount({ complexity });
return {
items: templates.map(this.formatTemplateInfo.bind(this)),
total,
limit,
offset,
hasMore: offset + limit < total
};
}
/**
* Get template statistics
*/
@@ -263,7 +377,7 @@ export class TemplateService {
* Format stored template for API response
*/
private formatTemplateInfo(template: StoredTemplate): TemplateInfo {
return {
const info: TemplateInfo = {
id: template.id,
name: template.name,
description: template.description,
@@ -277,5 +391,16 @@ export class TemplateService {
created: template.created_at,
url: template.url
};
// Include metadata if available
if (template.metadata_json) {
try {
info.metadata = JSON.parse(template.metadata_json);
} catch (error) {
logger.warn(`Failed to parse metadata for template ${template.id}:`, error);
}
}
return info;
}
}