feat: add incremental template updates and fix metadata generation

Template Updates: - Add npm script for incremental template fetch (fetch:templates:update) - Create MEMORY_TEMPLATE_UPDATE.md with comprehensive documentation - Update 48 new templates (2598 → 2646 total) - Latest template now from September 24, 2025 Metadata Generation Fixes: - Update model from gpt-4o-mini to gpt-5-mini-2025-08-07 - Remove temperature parameter (not supported in batch API) - Increase max_completion_tokens from 1000 to 3000 - Add comprehensive error file handling to batch-processor - Process failed requests and assign default metadata - Save error files for debugging (temp/batch/) Test Updates: - Update all test files to use gpt-5-mini-2025-08-07 model - 3 test assertions updated in metadata-generator.test.ts - 1 test option updated in batch-processor.test.ts Documentation: - Add troubleshooting section for metadata generation - Include error handling examples - Document incremental vs full rebuild modes 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2026-03-22 18:33:08 +00:00 · 2025-09-30 09:59:42 +02:00
parent 87cc84f593
commit fff47f9f9d
8 changed files with 521 additions and 29 deletions
--- a/src/templates/batch-processor.ts
+++ b/src/templates/batch-processor.ts
@@ -312,31 +312,80 @@ export class BatchProcessor {
   * Retrieve and parse results
   */
  private async retrieveResults(batchJob: any): Promise<MetadataResult[]> {
-    if (!batchJob.output_file_id) {
-      throw new Error('No output file available for batch job');
-    }
-    
-    // Download result file
-    const fileResponse = await this.client.files.content(batchJob.output_file_id);
-    const fileContent = await fileResponse.text();
-    
-    // Parse JSONL results
    const results: MetadataResult[] = [];
-    const lines = fileContent.trim().split('\n');
-    
-    for (const line of lines) {
-      if (!line) continue;
-      
+
+    // Check if we have an output file (successful results)
+    if (batchJob.output_file_id) {
+      const fileResponse = await this.client.files.content(batchJob.output_file_id);
+      const fileContent = await fileResponse.text();
+
+      const lines = fileContent.trim().split('\n');
+      for (const line of lines) {
+        if (!line) continue;
+        try {
+          const result = JSON.parse(line);
+          const parsed = this.generator.parseResult(result);
+          results.push(parsed);
+        } catch (error) {
+          logger.error('Error parsing result line:', error);
+        }
+      }
+      logger.info(`Retrieved ${results.length} successful results from batch job`);
+    }
+
+    // Check if we have an error file (failed results)
+    if (batchJob.error_file_id) {
+      logger.warn(`Batch job has error file: ${batchJob.error_file_id}`);
+
      try {
-        const result = JSON.parse(line);
-        const parsed = this.generator.parseResult(result);
-        results.push(parsed);
+        const errorResponse = await this.client.files.content(batchJob.error_file_id);
+        const errorContent = await errorResponse.text();
+
+        // Save error file locally for debugging
+        const errorFilePath = path.join(this.outputDir, `batch_${batchJob.id}_error.jsonl`);
+        fs.writeFileSync(errorFilePath, errorContent);
+        logger.warn(`Error file saved to: ${errorFilePath}`);
+
+        // Parse errors and create default metadata for failed templates
+        const errorLines = errorContent.trim().split('\n');
+        logger.warn(`Found ${errorLines.length} failed requests in error file`);
+
+        for (const line of errorLines) {
+          if (!line) continue;
+          try {
+            const errorResult = JSON.parse(line);
+            const templateId = parseInt(errorResult.custom_id?.replace('template-', '') || '0');
+
+            if (templateId > 0) {
+              const errorMessage = errorResult.response?.body?.error?.message ||
+                                  errorResult.error?.message ||
+                                  'Unknown error';
+
+              logger.debug(`Template ${templateId} failed: ${errorMessage}`);
+
+              // Use getDefaultMetadata() from generator (it's private but accessible via bracket notation)
+              const defaultMeta = (this.generator as any).getDefaultMetadata();
+              results.push({
+                templateId,
+                metadata: defaultMeta,
+                error: errorMessage
+              });
+            }
+          } catch (parseError) {
+            logger.error('Error parsing error line:', parseError);
+          }
+        }
      } catch (error) {
-        logger.error('Error parsing result line:', error);
+        logger.error('Failed to process error file:', error);
      }
    }
-    
-    logger.info(`Retrieved ${results.length} results from batch job`);
+
+    // If we have no results at all, something is very wrong
+    if (results.length === 0 && !batchJob.output_file_id && !batchJob.error_file_id) {
+      throw new Error('No output file or error file available for batch job');
+    }
+
+    logger.info(`Total results (successful + failed): ${results.length}`);
    return results;
  }
  
--- a/src/templates/metadata-generator.ts
+++ b/src/templates/metadata-generator.ts
@@ -34,7 +34,7 @@ export class MetadataGenerator {
  private client: OpenAI;
  private model: string;
  
-  constructor(apiKey: string, model: string = 'gpt-4o-mini') {
+  constructor(apiKey: string, model: string = 'gpt-5-mini-2025-08-07') {
    this.client = new OpenAI({ apiKey });
    this.model = model;
  }
@@ -131,8 +131,8 @@ export class MetadataGenerator {
      url: '/v1/chat/completions',
      body: {
        model: this.model,
-        temperature: 0.3,  // Lower temperature for more consistent structured outputs
-        max_completion_tokens: 1000,
+        // temperature removed - batch API only supports default (1.0) for this model
+        max_completion_tokens: 3000,
        response_format: {
          type: 'json_schema',
          json_schema: this.getJsonSchema()
@@ -288,8 +288,8 @@ export class MetadataGenerator {
    try {
      const completion = await this.client.chat.completions.create({
        model: this.model,
-        temperature: 0.3,  // Lower temperature for more consistent structured outputs
-        max_completion_tokens: 1000,
+        // temperature removed - not supported in batch API for this model
+        max_completion_tokens: 3000,
        response_format: {
          type: 'json_schema',
          json_schema: this.getJsonSchema()