feat: add incremental template updates and fix metadata generation

Template Updates:
- Add npm script for incremental template fetch (fetch:templates:update)
- Create MEMORY_TEMPLATE_UPDATE.md with comprehensive documentation
- Update 48 new templates (2598 → 2646 total)
- Latest template now from September 24, 2025

Metadata Generation Fixes:
- Update model from gpt-4o-mini to gpt-5-mini-2025-08-07
- Remove temperature parameter (not supported in batch API)
- Increase max_completion_tokens from 1000 to 3000
- Add comprehensive error file handling to batch-processor
- Process failed requests and assign default metadata
- Save error files for debugging (temp/batch/)

Test Updates:
- Update all test files to use gpt-5-mini-2025-08-07 model
- 3 test assertions updated in metadata-generator.test.ts
- 1 test option updated in batch-processor.test.ts

Documentation:
- Add troubleshooting section for metadata generation
- Include error handling examples
- Document incremental vs full rebuild modes

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
czlonkowski
2025-09-30 09:59:42 +02:00
parent 87cc84f593
commit fff47f9f9d
8 changed files with 521 additions and 29 deletions

View File

@@ -312,31 +312,80 @@ export class BatchProcessor {
* Retrieve and parse results
*/
private async retrieveResults(batchJob: any): Promise<MetadataResult[]> {
if (!batchJob.output_file_id) {
throw new Error('No output file available for batch job');
}
// Download result file
const fileResponse = await this.client.files.content(batchJob.output_file_id);
const fileContent = await fileResponse.text();
// Parse JSONL results
const results: MetadataResult[] = [];
const lines = fileContent.trim().split('\n');
for (const line of lines) {
if (!line) continue;
// Check if we have an output file (successful results)
if (batchJob.output_file_id) {
const fileResponse = await this.client.files.content(batchJob.output_file_id);
const fileContent = await fileResponse.text();
const lines = fileContent.trim().split('\n');
for (const line of lines) {
if (!line) continue;
try {
const result = JSON.parse(line);
const parsed = this.generator.parseResult(result);
results.push(parsed);
} catch (error) {
logger.error('Error parsing result line:', error);
}
}
logger.info(`Retrieved ${results.length} successful results from batch job`);
}
// Check if we have an error file (failed results)
if (batchJob.error_file_id) {
logger.warn(`Batch job has error file: ${batchJob.error_file_id}`);
try {
const result = JSON.parse(line);
const parsed = this.generator.parseResult(result);
results.push(parsed);
const errorResponse = await this.client.files.content(batchJob.error_file_id);
const errorContent = await errorResponse.text();
// Save error file locally for debugging
const errorFilePath = path.join(this.outputDir, `batch_${batchJob.id}_error.jsonl`);
fs.writeFileSync(errorFilePath, errorContent);
logger.warn(`Error file saved to: ${errorFilePath}`);
// Parse errors and create default metadata for failed templates
const errorLines = errorContent.trim().split('\n');
logger.warn(`Found ${errorLines.length} failed requests in error file`);
for (const line of errorLines) {
if (!line) continue;
try {
const errorResult = JSON.parse(line);
const templateId = parseInt(errorResult.custom_id?.replace('template-', '') || '0');
if (templateId > 0) {
const errorMessage = errorResult.response?.body?.error?.message ||
errorResult.error?.message ||
'Unknown error';
logger.debug(`Template ${templateId} failed: ${errorMessage}`);
// Use getDefaultMetadata() from generator (it's private but accessible via bracket notation)
const defaultMeta = (this.generator as any).getDefaultMetadata();
results.push({
templateId,
metadata: defaultMeta,
error: errorMessage
});
}
} catch (parseError) {
logger.error('Error parsing error line:', parseError);
}
}
} catch (error) {
logger.error('Error parsing result line:', error);
logger.error('Failed to process error file:', error);
}
}
logger.info(`Retrieved ${results.length} results from batch job`);
// If we have no results at all, something is very wrong
if (results.length === 0 && !batchJob.output_file_id && !batchJob.error_file_id) {
throw new Error('No output file or error file available for batch job');
}
logger.info(`Total results (successful + failed): ${results.length}`);
return results;
}

View File

@@ -34,7 +34,7 @@ export class MetadataGenerator {
private client: OpenAI;
private model: string;
constructor(apiKey: string, model: string = 'gpt-4o-mini') {
constructor(apiKey: string, model: string = 'gpt-5-mini-2025-08-07') {
this.client = new OpenAI({ apiKey });
this.model = model;
}
@@ -131,8 +131,8 @@ export class MetadataGenerator {
url: '/v1/chat/completions',
body: {
model: this.model,
temperature: 0.3, // Lower temperature for more consistent structured outputs
max_completion_tokens: 1000,
// temperature removed - batch API only supports default (1.0) for this model
max_completion_tokens: 3000,
response_format: {
type: 'json_schema',
json_schema: this.getJsonSchema()
@@ -288,8 +288,8 @@ export class MetadataGenerator {
try {
const completion = await this.client.chat.completions.create({
model: this.model,
temperature: 0.3, // Lower temperature for more consistent structured outputs
max_completion_tokens: 1000,
// temperature removed - not supported in batch API for this model
max_completion_tokens: 3000,
response_format: {
type: 'json_schema',
json_schema: this.getJsonSchema()