feat: add template metadata generation and smart discovery

- Implement OpenAI batch API integration for metadata generation - Add search_templates_by_metadata tool with advanced filtering - Enhance list_templates to include descriptions and optional metadata - Generate metadata for 2,534 templates (97.5% coverage) - Update README with Template Tools section and enhanced Claude setup - Add comprehensive documentation for metadata system Enables intelligent template discovery through: - Complexity levels (simple/medium/complex) - Setup time estimates (5-480 minutes) - Target audience filtering (developers/marketers/analysts) - Required services detection - Category and use case classification Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-10 15:23:07 +00:00 · 2025-09-15 00:18:53 +02:00
parent 6e24da722b
commit 1e586c0b23
15 changed files with 1159 additions and 134 deletions
--- a/src/templates/batch-processor.ts
+++ b/src/templates/batch-processor.ts
@@ -40,7 +40,7 @@ export class BatchProcessor {
  }
  
  /**
-   * Process templates in batches
+   * Process templates in batches (parallel submission)
   */
  async processTemplates(
    templates: MetadataRequest[],
@@ -51,26 +51,62 @@ export class BatchProcessor {
    
    logger.info(`Processing ${templates.length} templates in ${batches.length} batches`);
    
+    // Submit all batches in parallel
+    console.log(`\n📤 Submitting ${batches.length} batch${batches.length > 1 ? 'es' : ''} to OpenAI...`);
+    const batchJobs: Array<{ batchNum: number; jobPromise: Promise<any>; templates: MetadataRequest[] }> = [];
+    
    for (let i = 0; i < batches.length; i++) {
      const batch = batches[i];
      const batchNum = i + 1;
      
      try {
-        progressCallback?.(`Processing batch ${batchNum}/${batches.length}`, i * this.batchSize, templates.length);
+        progressCallback?.(`Submitting batch ${batchNum}/${batches.length}`, i * this.batchSize, templates.length);
        
-        // Process this batch
-        const batchResults = await this.processBatch(batch, `batch_${batchNum}`);
+        // Submit batch (don't wait for completion)
+        const jobPromise = this.submitBatch(batch, `batch_${batchNum}`);
+        batchJobs.push({ batchNum, jobPromise, templates: batch });
        
-        // Merge results
-        for (const result of batchResults) {
-          results.set(result.templateId, result);
-        }
+        console.log(`   📨 Submitted batch ${batchNum}/${batches.length} (${batch.length} templates)`);
+      } catch (error) {
+        logger.error(`Error submitting batch ${batchNum}:`, error);
+        console.error(`   ❌ Failed to submit batch ${batchNum}`);
+      }
+    }
+    
+    console.log(`\n⏳ All batches submitted. Waiting for completion...`);
+    console.log(`   (Batches process in parallel - this is much faster than sequential processing)`);
+    
+    // Process all batches in parallel and collect results as they complete
+    const batchPromises = batchJobs.map(async ({ batchNum, jobPromise, templates: batchTemplates }) => {
+      try {
+        const completedJob = await jobPromise;
+        console.log(`\n📦 Retrieving results for batch ${batchNum}/${batches.length}...`);
        
-        logger.info(`Completed batch ${batchNum}/${batches.length}: ${batchResults.length} results`);
-        progressCallback?.(`Completed batch ${batchNum}/${batches.length}`, Math.min((i + 1) * this.batchSize, templates.length), templates.length);
+        // Retrieve and parse results
+        const batchResults = await this.retrieveResults(completedJob);
+        
+        logger.info(`Retrieved ${batchResults.length} results from batch ${batchNum}`);
+        progressCallback?.(`Retrieved batch ${batchNum}/${batches.length}`, 
+          Math.min(batchNum * this.batchSize, templates.length), templates.length);
+        
+        return { batchNum, results: batchResults };
      } catch (error) {
        logger.error(`Error processing batch ${batchNum}:`, error);
-        // Continue with next batch
+        console.error(`   ❌ Batch ${batchNum} failed:`, error);
+        return { batchNum, results: [] };
+      }
+    });
+    
+    // Wait for all batches to complete
+    const allBatchResults = await Promise.all(batchPromises);
+    
+    // Merge all results
+    for (const { batchNum, results: batchResults } of allBatchResults) {
+      for (const result of batchResults) {
+        results.set(result.templateId, result);
+      }
+      if (batchResults.length > 0) {
+        console.log(`   ✅ Merged ${batchResults.length} results from batch ${batchNum}`);
      }
    }
    
@@ -78,6 +114,51 @@ export class BatchProcessor {
    return results;
  }
  
+  /**
+   * Submit a batch without waiting for completion
+   */
+  private async submitBatch(templates: MetadataRequest[], batchName: string): Promise<any> {
+    // Create JSONL file
+    const inputFile = await this.createBatchFile(templates, batchName);
+    
+    try {
+      // Upload file to OpenAI
+      const uploadedFile = await this.uploadFile(inputFile);
+      
+      // Create batch job
+      const batchJob = await this.createBatchJob(uploadedFile.id);
+      
+      // Start monitoring (returns promise that resolves when complete)
+      const monitoringPromise = this.monitorBatchJob(batchJob.id);
+      
+      // Clean up input file immediately
+      try {
+        fs.unlinkSync(inputFile);
+      } catch {}
+      
+      // Store file IDs for cleanup later
+      monitoringPromise.then(async (completedJob) => {
+        // Cleanup uploaded files after completion
+        try {
+          await this.client.files.del(uploadedFile.id);
+          if (completedJob.output_file_id) {
+            // Note: We'll delete output file after retrieving results
+          }
+        } catch (error) {
+          logger.warn(`Failed to cleanup files for batch ${batchName}`, error);
+        }
+      });
+      
+      return monitoringPromise;
+    } catch (error) {
+      // Cleanup on error
+      try {
+        fs.unlinkSync(inputFile);
+      } catch {}
+      throw error;
+    }
+  }
+  
  /**
   * Process a single batch
   */
@@ -180,17 +261,33 @@ export class BatchProcessor {
   * Monitor batch job with exponential backoff
   */
  private async monitorBatchJob(batchId: string): Promise<any> {
-    const waitTimes = [60, 120, 300, 600, 900, 1800]; // Progressive wait times in seconds
+    // Start with shorter wait times for better UX
+    const waitTimes = [30, 60, 120, 300, 600, 900, 1800]; // Progressive wait times in seconds
    let waitIndex = 0;
    let attempts = 0;
    const maxAttempts = 100; // Safety limit
+    const startTime = Date.now();
+    let lastStatus = '';
    
    while (attempts < maxAttempts) {
      const batchJob = await this.client.batches.retrieve(batchId);
      
+      // Only log if status changed
+      if (batchJob.status !== lastStatus) {
+        const elapsedMinutes = Math.floor((Date.now() - startTime) / 60000);
+        const statusSymbol = batchJob.status === 'in_progress' ? '⚙️' : 
+                            batchJob.status === 'finalizing' ? '📦' :
+                            batchJob.status === 'validating' ? '🔍' : '⏳';
+        
+        console.log(`   ${statusSymbol} Batch ${batchId.slice(-8)}: ${batchJob.status} (${elapsedMinutes} min)`);
+        lastStatus = batchJob.status;
+      }
+      
      logger.debug(`Batch ${batchId} status: ${batchJob.status} (attempt ${attempts + 1})`);
      
      if (batchJob.status === 'completed') {
+        const elapsedMinutes = Math.floor((Date.now() - startTime) / 60000);
+        console.log(`   ✅ Batch ${batchId.slice(-8)} completed in ${elapsedMinutes} minutes`);
        logger.info(`Batch job ${batchId} completed successfully`);
        return batchJob;
      }
--- a/src/templates/metadata-generator.ts
+++ b/src/templates/metadata-generator.ts
@@ -125,8 +125,8 @@ export class MetadataGenerator {
      url: '/v1/chat/completions',
      body: {
        model: this.model,
-        temperature: 0.1,
-        max_tokens: 500,
+        temperature: 1,
+        max_completion_tokens: 1000,
        response_format: {
          type: 'json_schema',
          json_schema: this.getJsonSchema()
@@ -134,18 +134,7 @@ export class MetadataGenerator {
        messages: [
          {
            role: 'system',
-            content: `You are an n8n workflow expert analyzing templates to extract structured metadata.
-            
-            Analyze the provided template information and extract:
-            - Categories: Classify into relevant categories (automation, integration, data, communication, etc.)
-            - Complexity: Assess as simple (1-3 nodes), medium (4-8 nodes), or complex (9+ nodes or advanced logic)
-            - Use cases: Identify primary business use cases
-            - Setup time: Estimate realistic setup time based on complexity and required configurations
-            - Required services: List any external services, APIs, or accounts needed
-            - Key features: Highlight main capabilities or benefits
-            - Target audience: Identify who would benefit most (developers, marketers, ops teams, etc.)
-            
-            Be concise and practical in your analysis.`
+            content: `Analyze n8n workflow templates and extract metadata. Be concise.`
          },
          {
            role: 'user',
@@ -254,8 +243,8 @@ export class MetadataGenerator {
    try {
      const completion = await this.client.chat.completions.create({
        model: this.model,
-        temperature: 0.1,
-        max_tokens: 500,
+        temperature: 1,
+        max_completion_tokens: 1000,
        response_format: {
          type: 'json_schema',
          json_schema: this.getJsonSchema()
@@ -263,17 +252,18 @@ export class MetadataGenerator {
        messages: [
          {
            role: 'system',
-            content: `You are an n8n workflow expert analyzing templates to extract structured metadata.`
+            content: `Analyze n8n workflow templates and extract metadata. Be concise.`
          },
          {
            role: 'user',
-            content: `Analyze this template: ${template.name}\nNodes: ${template.nodes.join(', ')}`
+            content: `Template: ${template.name}\nNodes: ${template.nodes.slice(0, 10).join(', ')}`
          }
        ]
      });
      
      const content = completion.choices[0].message.content;
      if (!content) {
+        logger.error('No content in OpenAI response');
        throw new Error('No content in response');
      }
      
--- a/src/templates/template-repository.ts
+++ b/src/templates/template-repository.ts
@@ -625,4 +625,173 @@ export class TemplateRepository {
    
    return { total, withMetadata, withoutMetadata, outdated };
  }
+  
+  /**
+   * Search templates by metadata fields
+   */
+  searchTemplatesByMetadata(filters: {
+    category?: string;
+    complexity?: 'simple' | 'medium' | 'complex';
+    maxSetupMinutes?: number;
+    minSetupMinutes?: number;
+    requiredService?: string;
+    targetAudience?: string;
+  }, limit: number = 20, offset: number = 0): StoredTemplate[] {
+    const conditions: string[] = ['metadata_json IS NOT NULL'];
+    const params: any[] = [];
+    
+    // Build WHERE conditions based on filters
+    if (filters.category) {
+      conditions.push("json_extract(metadata_json, '$.categories') LIKE ?");
+      params.push(`%"${filters.category}"%`);
+    }
+    
+    if (filters.complexity) {
+      conditions.push("json_extract(metadata_json, '$.complexity') = ?");
+      params.push(filters.complexity);
+    }
+    
+    if (filters.maxSetupMinutes !== undefined) {
+      conditions.push("CAST(json_extract(metadata_json, '$.estimated_setup_minutes') AS INTEGER) <= ?");
+      params.push(filters.maxSetupMinutes);
+    }
+    
+    if (filters.minSetupMinutes !== undefined) {
+      conditions.push("CAST(json_extract(metadata_json, '$.estimated_setup_minutes') AS INTEGER) >= ?");
+      params.push(filters.minSetupMinutes);
+    }
+    
+    if (filters.requiredService) {
+      conditions.push("json_extract(metadata_json, '$.required_services') LIKE ?");
+      params.push(`%"${filters.requiredService}"%`);
+    }
+    
+    if (filters.targetAudience) {
+      conditions.push("json_extract(metadata_json, '$.target_audience') LIKE ?");
+      params.push(`%"${filters.targetAudience}"%`);
+    }
+    
+    const query = `
+      SELECT * FROM templates 
+      WHERE ${conditions.join(' AND ')}
+      ORDER BY views DESC, created_at DESC
+      LIMIT ? OFFSET ?
+    `;
+    
+    params.push(limit, offset);
+    const results = this.db.prepare(query).all(...params) as StoredTemplate[];
+    
+    logger.debug(`Metadata search found ${results.length} results`, { filters, count: results.length });
+    return results.map(t => this.decompressWorkflow(t));
+  }
+  
+  /**
+   * Get count for metadata search results
+   */
+  getMetadataSearchCount(filters: {
+    category?: string;
+    complexity?: 'simple' | 'medium' | 'complex';
+    maxSetupMinutes?: number;
+    minSetupMinutes?: number;
+    requiredService?: string;
+    targetAudience?: string;
+  }): number {
+    const conditions: string[] = ['metadata_json IS NOT NULL'];
+    const params: any[] = [];
+    
+    if (filters.category) {
+      conditions.push("json_extract(metadata_json, '$.categories') LIKE ?");
+      params.push(`%"${filters.category}"%`);
+    }
+    
+    if (filters.complexity) {
+      conditions.push("json_extract(metadata_json, '$.complexity') = ?");
+      params.push(filters.complexity);
+    }
+    
+    if (filters.maxSetupMinutes !== undefined) {
+      conditions.push("CAST(json_extract(metadata_json, '$.estimated_setup_minutes') AS INTEGER) <= ?");
+      params.push(filters.maxSetupMinutes);
+    }
+    
+    if (filters.minSetupMinutes !== undefined) {
+      conditions.push("CAST(json_extract(metadata_json, '$.estimated_setup_minutes') AS INTEGER) >= ?");
+      params.push(filters.minSetupMinutes);
+    }
+    
+    if (filters.requiredService) {
+      conditions.push("json_extract(metadata_json, '$.required_services') LIKE ?");
+      params.push(`%"${filters.requiredService}"%`);
+    }
+    
+    if (filters.targetAudience) {
+      conditions.push("json_extract(metadata_json, '$.target_audience') LIKE ?");
+      params.push(`%"${filters.targetAudience}"%`);
+    }
+    
+    const query = `SELECT COUNT(*) as count FROM templates WHERE ${conditions.join(' AND ')}`;
+    const result = this.db.prepare(query).get(...params) as { count: number };
+    
+    return result.count;
+  }
+  
+  /**
+   * Get unique categories from metadata
+   */
+  getAvailableCategories(): string[] {
+    const results = this.db.prepare(`
+      SELECT DISTINCT json_extract(value, '$') as category
+      FROM templates, json_each(json_extract(metadata_json, '$.categories'))
+      WHERE metadata_json IS NOT NULL
+      ORDER BY category
+    `).all() as { category: string }[];
+    
+    return results.map(r => r.category);
+  }
+  
+  /**
+   * Get unique target audiences from metadata
+   */
+  getAvailableTargetAudiences(): string[] {
+    const results = this.db.prepare(`
+      SELECT DISTINCT json_extract(value, '$') as audience
+      FROM templates, json_each(json_extract(metadata_json, '$.target_audience'))
+      WHERE metadata_json IS NOT NULL
+      ORDER BY audience
+    `).all() as { audience: string }[];
+    
+    return results.map(r => r.audience);
+  }
+  
+  /**
+   * Get templates by category with metadata
+   */
+  getTemplatesByCategory(category: string, limit: number = 10, offset: number = 0): StoredTemplate[] {
+    const query = `
+      SELECT * FROM templates 
+      WHERE metadata_json IS NOT NULL 
+        AND json_extract(metadata_json, '$.categories') LIKE ?
+      ORDER BY views DESC, created_at DESC
+      LIMIT ? OFFSET ?
+    `;
+    
+    const results = this.db.prepare(query).all(`%"${category}"%`, limit, offset) as StoredTemplate[];
+    return results.map(t => this.decompressWorkflow(t));
+  }
+  
+  /**
+   * Get templates by complexity level
+   */
+  getTemplatesByComplexity(complexity: 'simple' | 'medium' | 'complex', limit: number = 10, offset: number = 0): StoredTemplate[] {
+    const query = `
+      SELECT * FROM templates 
+      WHERE metadata_json IS NOT NULL 
+        AND json_extract(metadata_json, '$.complexity') = ?
+      ORDER BY views DESC, created_at DESC
+      LIMIT ? OFFSET ?
+    `;
+    
+    const results = this.db.prepare(query).all(complexity, limit, offset) as StoredTemplate[];
+    return results.map(t => this.decompressWorkflow(t));
+  }
 }
--- a/src/templates/template-service.ts
+++ b/src/templates/template-service.ts
@@ -15,6 +15,15 @@ export interface TemplateInfo {
  views: number;
  created: string;
  url: string;
+  metadata?: {
+    categories: string[];
+    complexity: 'simple' | 'medium' | 'complex';
+    use_cases: string[];
+    estimated_setup_minutes: number;
+    required_services: string[];
+    key_features: string[];
+    target_audience: string[];
+  };
 }

 export interface TemplateWithWorkflow extends TemplateInfo {
@@ -32,8 +41,18 @@ export interface PaginatedResponse<T> {
 export interface TemplateMinimal {
  id: number;
  name: string;
+  description: string;
  views: number;
  nodeCount: number;
+  metadata?: {
+    categories: string[];
+    complexity: 'simple' | 'medium' | 'complex';
+    use_cases: string[];
+    estimated_setup_minutes: number;
+    required_services: string[];
+    key_features: string[];
+    target_audience: string[];
+  };
 }

 export class TemplateService {
@@ -137,16 +156,30 @@ export class TemplateService {
  /**
   * List all templates with minimal data
   */
-  async listTemplates(limit: number = 10, offset: number = 0, sortBy: 'views' | 'created_at' | 'name' = 'views'): Promise<PaginatedResponse<TemplateMinimal>> {
+  async listTemplates(limit: number = 10, offset: number = 0, sortBy: 'views' | 'created_at' | 'name' = 'views', includeMetadata: boolean = false): Promise<PaginatedResponse<TemplateMinimal>> {
    const templates = this.repository.getAllTemplates(limit, offset, sortBy);
    const total = this.repository.getTemplateCount();
    
-    const items = templates.map(t => ({
-      id: t.id,
-      name: t.name,
-      views: t.views,
-      nodeCount: JSON.parse(t.nodes_used).length
-    }));
+    const items = templates.map(t => {
+      const item: TemplateMinimal = {
+        id: t.id,
+        name: t.name,
+        description: t.description, // Always include description
+        views: t.views,
+        nodeCount: JSON.parse(t.nodes_used).length
+      };
+      
+      // Optionally include metadata
+      if (includeMetadata && t.metadata_json) {
+        try {
+          item.metadata = JSON.parse(t.metadata_json);
+        } catch (error) {
+          logger.warn(`Failed to parse metadata for template ${t.id}:`, error);
+        }
+      }
+      
+      return item;
+    });
    
    return {
      items,
@@ -175,6 +208,87 @@ export class TemplateService {
    ];
  }
  
+  /**
+   * Search templates by metadata filters
+   */
+  async searchTemplatesByMetadata(
+    filters: {
+      category?: string;
+      complexity?: 'simple' | 'medium' | 'complex';
+      maxSetupMinutes?: number;
+      minSetupMinutes?: number;
+      requiredService?: string;
+      targetAudience?: string;
+    },
+    limit: number = 20,
+    offset: number = 0
+  ): Promise<PaginatedResponse<TemplateInfo>> {
+    const templates = this.repository.searchTemplatesByMetadata(filters, limit, offset);
+    const total = this.repository.getMetadataSearchCount(filters);
+    
+    return {
+      items: templates.map(this.formatTemplateInfo.bind(this)),
+      total,
+      limit,
+      offset,
+      hasMore: offset + limit < total
+    };
+  }
+  
+  /**
+   * Get available categories from template metadata
+   */
+  async getAvailableCategories(): Promise<string[]> {
+    return this.repository.getAvailableCategories();
+  }
+  
+  /**
+   * Get available target audiences from template metadata
+   */
+  async getAvailableTargetAudiences(): Promise<string[]> {
+    return this.repository.getAvailableTargetAudiences();
+  }
+  
+  /**
+   * Get templates by category
+   */
+  async getTemplatesByCategory(
+    category: string,
+    limit: number = 10,
+    offset: number = 0
+  ): Promise<PaginatedResponse<TemplateInfo>> {
+    const templates = this.repository.getTemplatesByCategory(category, limit, offset);
+    const total = this.repository.getMetadataSearchCount({ category });
+    
+    return {
+      items: templates.map(this.formatTemplateInfo.bind(this)),
+      total,
+      limit,
+      offset,
+      hasMore: offset + limit < total
+    };
+  }
+  
+  /**
+   * Get templates by complexity level
+   */
+  async getTemplatesByComplexity(
+    complexity: 'simple' | 'medium' | 'complex',
+    limit: number = 10,
+    offset: number = 0
+  ): Promise<PaginatedResponse<TemplateInfo>> {
+    const templates = this.repository.getTemplatesByComplexity(complexity, limit, offset);
+    const total = this.repository.getMetadataSearchCount({ complexity });
+    
+    return {
+      items: templates.map(this.formatTemplateInfo.bind(this)),
+      total,
+      limit,
+      offset,
+      hasMore: offset + limit < total
+    };
+  }
+  
  /**
   * Get template statistics
   */
@@ -263,7 +377,7 @@ export class TemplateService {
   * Format stored template for API response
   */
  private formatTemplateInfo(template: StoredTemplate): TemplateInfo {
-    return {
+    const info: TemplateInfo = {
      id: template.id,
      name: template.name,
      description: template.description,
@@ -277,5 +391,16 @@ export class TemplateService {
      created: template.created_at,
      url: template.url
    };
+    
+    // Include metadata if available
+    if (template.metadata_json) {
+      try {
+        info.metadata = JSON.parse(template.metadata_json);
+      } catch (error) {
+        logger.warn(`Failed to parse metadata for template ${template.id}:`, error);
+      }
+    }
+    
+    return info;
  }
 }