Files
n8n-mcp/tests/unit/templates/batch-processor.test.ts
czlonkowski c18c4e7584 fix: address critical security issues in template metadata
- Fix SQL injection vulnerability in template-repository.ts
  - Use proper parameterization with SQLite concatenation operator
  - Escape JSON strings correctly for LIKE queries
  - Prevent malicious SQL through filter parameters

- Add input sanitization for OpenAI API calls
  - Sanitize template names and descriptions before sending to API
  - Remove control characters and prompt injection patterns
  - Limit input length to prevent token abuse

- Lower temperature to 0.3 for consistent structured outputs

- Add comprehensive test coverage
  - 100+ new tests for metadata functionality
  - Security-focused tests for SQL injection prevention
  - Integration tests with real database operations

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-15 00:51:41 +02:00

556 lines
19 KiB
TypeScript

import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import { BatchProcessor, BatchProcessorOptions } from '../../../src/templates/batch-processor';
import { MetadataRequest } from '../../../src/templates/metadata-generator';
// Mock fs operations
vi.mock('fs');
const mockedFs = vi.mocked(fs);
// Mock OpenAI
const mockClient = {
files: {
create: vi.fn(),
content: vi.fn(),
del: vi.fn()
},
batches: {
create: vi.fn(),
retrieve: vi.fn()
}
};
vi.mock('openai', () => {
return {
default: vi.fn().mockImplementation(() => mockClient)
};
});
// Mock MetadataGenerator
const mockGenerator = {
createBatchRequest: vi.fn(),
parseResult: vi.fn()
};
class MockMetadataGenerator {
createBatchRequest = mockGenerator.createBatchRequest;
parseResult = mockGenerator.parseResult;
}
vi.mock('../../../src/templates/metadata-generator', () => {
return {
MetadataGenerator: MockMetadataGenerator
};
});
// Mock logger
vi.mock('../../../src/utils/logger', () => ({
logger: {
info: vi.fn(),
warn: vi.fn(),
error: vi.fn(),
debug: vi.fn()
}
}));
describe('BatchProcessor', () => {
let processor: BatchProcessor;
let options: BatchProcessorOptions;
let mockStream: any;
beforeEach(() => {
vi.clearAllMocks();
options = {
apiKey: 'test-api-key',
model: 'gpt-4o-mini',
batchSize: 3,
outputDir: './test-temp'
};
// Mock stream for file writing
mockStream = {
write: vi.fn(),
end: vi.fn(),
on: vi.fn((event, callback) => {
if (event === 'finish') {
setTimeout(callback, 0);
}
})
};
// Mock fs operations
mockedFs.existsSync = vi.fn().mockReturnValue(false);
mockedFs.mkdirSync = vi.fn();
mockedFs.createWriteStream = vi.fn().mockReturnValue(mockStream);
mockedFs.createReadStream = vi.fn().mockReturnValue({});
mockedFs.unlinkSync = vi.fn();
processor = new BatchProcessor(options);
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('constructor', () => {
it('should create output directory if it does not exist', () => {
expect(mockedFs.existsSync).toHaveBeenCalledWith('./test-temp');
expect(mockedFs.mkdirSync).toHaveBeenCalledWith('./test-temp', { recursive: true });
});
it('should not create directory if it already exists', () => {
mockedFs.existsSync = vi.fn().mockReturnValue(true);
mockedFs.mkdirSync = vi.fn();
new BatchProcessor(options);
expect(mockedFs.mkdirSync).not.toHaveBeenCalled();
});
it('should use default options when not provided', () => {
const minimalOptions = { apiKey: 'test-key' };
const proc = new BatchProcessor(minimalOptions);
expect(proc).toBeDefined();
// Default batchSize is 100, outputDir is './temp'
});
});
describe('processTemplates', () => {
const mockTemplates: MetadataRequest[] = [
{ templateId: 1, name: 'Template 1', nodes: ['n8n-nodes-base.webhook'] },
{ templateId: 2, name: 'Template 2', nodes: ['n8n-nodes-base.slack'] },
{ templateId: 3, name: 'Template 3', nodes: ['n8n-nodes-base.httpRequest'] },
{ templateId: 4, name: 'Template 4', nodes: ['n8n-nodes-base.code'] }
];
it('should process templates in batches correctly', async () => {
// Mock file operations
const mockFile = { id: 'file-123' };
mockClient.files.create.mockResolvedValue(mockFile);
// Mock batch job
const mockBatchJob = {
id: 'batch-123',
status: 'completed',
output_file_id: 'output-file-123'
};
mockClient.batches.create.mockResolvedValue(mockBatchJob);
mockClient.batches.retrieve.mockResolvedValue(mockBatchJob);
// Mock results
const mockFileContent = 'result1\nresult2\nresult3';
mockClient.files.content.mockResolvedValue({ text: () => Promise.resolve(mockFileContent) });
const mockParsedResults = [
{ templateId: 1, metadata: { categories: ['automation'] } },
{ templateId: 2, metadata: { categories: ['communication'] } },
{ templateId: 3, metadata: { categories: ['integration'] } }
];
mockGenerator.parseResult.mockReturnValueOnce(mockParsedResults[0])
.mockReturnValueOnce(mockParsedResults[1])
.mockReturnValueOnce(mockParsedResults[2]);
const progressCallback = vi.fn();
const results = await processor.processTemplates(mockTemplates, progressCallback);
// Should create 2 batches (batchSize = 3, templates = 4)
expect(mockClient.batches.create).toHaveBeenCalledTimes(2);
expect(results.size).toBe(3); // 3 successful results
expect(progressCallback).toHaveBeenCalled();
});
it('should handle empty templates array', async () => {
const results = await processor.processTemplates([]);
expect(results.size).toBe(0);
});
it('should handle batch submission errors gracefully', async () => {
mockClient.files.create.mockRejectedValue(new Error('Upload failed'));
const results = await processor.processTemplates([mockTemplates[0]]);
// Should not throw, should return empty results
expect(results.size).toBe(0);
});
it('should handle batch job failures', async () => {
const mockFile = { id: 'file-123' };
mockClient.files.create.mockResolvedValue(mockFile);
const failedBatchJob = {
id: 'batch-123',
status: 'failed'
};
mockClient.batches.create.mockResolvedValue(failedBatchJob);
mockClient.batches.retrieve.mockResolvedValue(failedBatchJob);
const results = await processor.processTemplates([mockTemplates[0]]);
expect(results.size).toBe(0);
});
});
describe('createBatchFile', () => {
it('should create JSONL file with correct format', async () => {
const templates: MetadataRequest[] = [
{ templateId: 1, name: 'Test', nodes: ['node1'] },
{ templateId: 2, name: 'Test2', nodes: ['node2'] }
];
const mockRequest = { custom_id: 'template-1', method: 'POST' };
mockGenerator.createBatchRequest.mockReturnValue(mockRequest);
// Access private method through type assertion
const filename = await (processor as any).createBatchFile(templates, 'test_batch');
expect(mockStream.write).toHaveBeenCalledTimes(2);
expect(mockStream.write).toHaveBeenCalledWith(JSON.stringify(mockRequest) + '\n');
expect(mockStream.end).toHaveBeenCalled();
expect(filename).toContain('test_batch');
});
it('should handle stream errors', async () => {
const templates: MetadataRequest[] = [
{ templateId: 1, name: 'Test', nodes: ['node1'] }
];
// Mock stream error
mockStream.on = vi.fn((event, callback) => {
if (event === 'error') {
setTimeout(() => callback(new Error('Stream error')), 0);
}
});
await expect(
(processor as any).createBatchFile(templates, 'error_batch')
).rejects.toThrow('Stream error');
});
});
describe('uploadFile', () => {
it('should upload file to OpenAI', async () => {
const mockFile = { id: 'uploaded-file-123' };
mockClient.files.create.mockResolvedValue(mockFile);
const result = await (processor as any).uploadFile('/path/to/file.jsonl');
expect(mockClient.files.create).toHaveBeenCalledWith({
file: expect.any(Object),
purpose: 'batch'
});
expect(result).toEqual(mockFile);
});
it('should handle upload errors', async () => {
mockClient.files.create.mockRejectedValue(new Error('Upload failed'));
await expect(
(processor as any).uploadFile('/path/to/file.jsonl')
).rejects.toThrow('Upload failed');
});
});
describe('createBatchJob', () => {
it('should create batch job with correct parameters', async () => {
const mockBatchJob = { id: 'batch-123' };
mockClient.batches.create.mockResolvedValue(mockBatchJob);
const result = await (processor as any).createBatchJob('file-123');
expect(mockClient.batches.create).toHaveBeenCalledWith({
input_file_id: 'file-123',
endpoint: '/v1/chat/completions',
completion_window: '24h'
});
expect(result).toEqual(mockBatchJob);
});
it('should handle batch creation errors', async () => {
mockClient.batches.create.mockRejectedValue(new Error('Batch creation failed'));
await expect(
(processor as any).createBatchJob('file-123')
).rejects.toThrow('Batch creation failed');
});
});
describe('monitorBatchJob', () => {
it('should monitor job until completion', async () => {
const completedJob = { id: 'batch-123', status: 'completed' };
mockClient.batches.retrieve.mockResolvedValue(completedJob);
const result = await (processor as any).monitorBatchJob('batch-123');
expect(mockClient.batches.retrieve).toHaveBeenCalledWith('batch-123');
expect(result).toEqual(completedJob);
});
it('should handle status progression', async () => {
const jobs = [
{ id: 'batch-123', status: 'validating' },
{ id: 'batch-123', status: 'in_progress' },
{ id: 'batch-123', status: 'finalizing' },
{ id: 'batch-123', status: 'completed' }
];
mockClient.batches.retrieve.mockImplementation(() => {
return Promise.resolve(jobs.shift() || jobs[jobs.length - 1]);
});
// Mock sleep to speed up test
const originalSleep = (processor as any).sleep;
(processor as any).sleep = vi.fn().mockResolvedValue(undefined);
const result = await (processor as any).monitorBatchJob('batch-123');
expect(result.status).toBe('completed');
expect(mockClient.batches.retrieve).toHaveBeenCalledTimes(4);
// Restore original sleep method
(processor as any).sleep = originalSleep;
});
it('should throw error for failed jobs', async () => {
const failedJob = { id: 'batch-123', status: 'failed' };
mockClient.batches.retrieve.mockResolvedValue(failedJob);
await expect(
(processor as any).monitorBatchJob('batch-123')
).rejects.toThrow('Batch job failed with status: failed');
});
it('should handle expired jobs', async () => {
const expiredJob = { id: 'batch-123', status: 'expired' };
mockClient.batches.retrieve.mockResolvedValue(expiredJob);
await expect(
(processor as any).monitorBatchJob('batch-123')
).rejects.toThrow('Batch job failed with status: expired');
});
it('should handle cancelled jobs', async () => {
const cancelledJob = { id: 'batch-123', status: 'cancelled' };
mockClient.batches.retrieve.mockResolvedValue(cancelledJob);
await expect(
(processor as any).monitorBatchJob('batch-123')
).rejects.toThrow('Batch job failed with status: cancelled');
});
it('should timeout after max attempts', async () => {
const inProgressJob = { id: 'batch-123', status: 'in_progress' };
mockClient.batches.retrieve.mockResolvedValue(inProgressJob);
// Mock sleep to speed up test
(processor as any).sleep = vi.fn().mockResolvedValue(undefined);
await expect(
(processor as any).monitorBatchJob('batch-123')
).rejects.toThrow('Batch job monitoring timed out');
});
});
describe('retrieveResults', () => {
it('should download and parse results correctly', async () => {
const batchJob = { output_file_id: 'output-123' };
const fileContent = '{"custom_id": "template-1"}\n{"custom_id": "template-2"}';
mockClient.files.content.mockResolvedValue({
text: () => Promise.resolve(fileContent)
});
const mockResults = [
{ templateId: 1, metadata: { categories: ['test'] } },
{ templateId: 2, metadata: { categories: ['test2'] } }
];
mockGenerator.parseResult.mockReturnValueOnce(mockResults[0])
.mockReturnValueOnce(mockResults[1]);
const results = await (processor as any).retrieveResults(batchJob);
expect(mockClient.files.content).toHaveBeenCalledWith('output-123');
expect(mockGenerator.parseResult).toHaveBeenCalledTimes(2);
expect(results).toHaveLength(2);
});
it('should throw error when no output file available', async () => {
const batchJob = { output_file_id: null };
await expect(
(processor as any).retrieveResults(batchJob)
).rejects.toThrow('No output file available for batch job');
});
it('should handle malformed result lines gracefully', async () => {
const batchJob = { output_file_id: 'output-123' };
const fileContent = '{"valid": "json"}\ninvalid json line\n{"another": "valid"}';
mockClient.files.content.mockResolvedValue({
text: () => Promise.resolve(fileContent)
});
const mockValidResult = { templateId: 1, metadata: { categories: ['test'] } };
mockGenerator.parseResult.mockReturnValue(mockValidResult);
const results = await (processor as any).retrieveResults(batchJob);
// Should parse valid lines and skip invalid ones
expect(results).toHaveLength(2);
expect(mockGenerator.parseResult).toHaveBeenCalledTimes(2);
});
it('should handle file download errors', async () => {
const batchJob = { output_file_id: 'output-123' };
mockClient.files.content.mockRejectedValue(new Error('Download failed'));
await expect(
(processor as any).retrieveResults(batchJob)
).rejects.toThrow('Download failed');
});
});
describe('cleanup', () => {
it('should clean up all files successfully', async () => {
await (processor as any).cleanup('local-file.jsonl', 'input-123', 'output-456');
expect(mockedFs.unlinkSync).toHaveBeenCalledWith('local-file.jsonl');
expect(mockClient.files.del).toHaveBeenCalledWith('input-123');
expect(mockClient.files.del).toHaveBeenCalledWith('output-456');
});
it('should handle local file deletion errors gracefully', async () => {
mockedFs.unlinkSync = vi.fn().mockImplementation(() => {
throw new Error('File not found');
});
// Should not throw error
await expect(
(processor as any).cleanup('nonexistent.jsonl', 'input-123')
).resolves.toBeUndefined();
});
it('should handle OpenAI file deletion errors gracefully', async () => {
mockClient.files.del.mockRejectedValue(new Error('Delete failed'));
// Should not throw error
await expect(
(processor as any).cleanup('local-file.jsonl', 'input-123', 'output-456')
).resolves.toBeUndefined();
});
it('should work without output file ID', async () => {
await (processor as any).cleanup('local-file.jsonl', 'input-123');
expect(mockedFs.unlinkSync).toHaveBeenCalledWith('local-file.jsonl');
expect(mockClient.files.del).toHaveBeenCalledWith('input-123');
expect(mockClient.files.del).toHaveBeenCalledTimes(1); // Only input file
});
});
describe('createBatches', () => {
it('should split templates into correct batch sizes', () => {
const templates: MetadataRequest[] = [
{ templateId: 1, name: 'T1', nodes: [] },
{ templateId: 2, name: 'T2', nodes: [] },
{ templateId: 3, name: 'T3', nodes: [] },
{ templateId: 4, name: 'T4', nodes: [] },
{ templateId: 5, name: 'T5', nodes: [] }
];
const batches = (processor as any).createBatches(templates);
expect(batches).toHaveLength(2); // 3 + 2 templates
expect(batches[0]).toHaveLength(3);
expect(batches[1]).toHaveLength(2);
});
it('should handle single template correctly', () => {
const templates = [{ templateId: 1, name: 'T1', nodes: [] }];
const batches = (processor as any).createBatches(templates);
expect(batches).toHaveLength(1);
expect(batches[0]).toHaveLength(1);
});
it('should handle empty templates array', () => {
const batches = (processor as any).createBatches([]);
expect(batches).toHaveLength(0);
});
});
describe('file system security', () => {
it('should sanitize file paths to prevent directory traversal', async () => {
// Test with malicious batch name
const maliciousBatchName = '../../../etc/passwd';
const templates = [{ templateId: 1, name: 'Test', nodes: [] }];
await (processor as any).createBatchFile(templates, maliciousBatchName);
// Should create file in the designated output directory, not escape it
const writtenPath = mockedFs.createWriteStream.mock.calls[0][0];
expect(writtenPath).toMatch(/^\.\/test-temp\//);
expect(writtenPath).not.toContain('../');
});
it('should handle very long file names gracefully', async () => {
const longBatchName = 'a'.repeat(300); // Very long name
const templates = [{ templateId: 1, name: 'Test', nodes: [] }];
await expect(
(processor as any).createBatchFile(templates, longBatchName)
).resolves.toBeDefined();
});
});
describe('memory management', () => {
it('should clean up files even on processing errors', async () => {
const templates = [{ templateId: 1, name: 'Test', nodes: [] }];
// Mock file upload to fail
mockClient.files.create.mockRejectedValue(new Error('Upload failed'));
const submitBatch = (processor as any).submitBatch.bind(processor);
await expect(
submitBatch(templates, 'error_test')
).rejects.toThrow('Upload failed');
// File should still be cleaned up
expect(mockedFs.unlinkSync).toHaveBeenCalled();
});
it('should handle concurrent batch processing correctly', async () => {
const templates = Array.from({ length: 10 }, (_, i) => ({
templateId: i + 1,
name: `Template ${i + 1}`,
nodes: ['node']
}));
// Mock successful processing
mockClient.files.create.mockResolvedValue({ id: 'file-123' });
const completedJob = {
id: 'batch-123',
status: 'completed',
output_file_id: 'output-123'
};
mockClient.batches.create.mockResolvedValue(completedJob);
mockClient.batches.retrieve.mockResolvedValue(completedJob);
mockClient.files.content.mockResolvedValue({
text: () => Promise.resolve('{"custom_id": "template-1"}')
});
mockGenerator.parseResult.mockReturnValue({
templateId: 1,
metadata: { categories: ['test'] }
});
const results = await processor.processTemplates(templates);
expect(results.size).toBeGreaterThan(0);
expect(mockClient.batches.create).toHaveBeenCalled();
});
});
});