n8n-mcp/tests/unit/templates/batch-processor.test.ts

import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import * as fs from 'fs';
import * as path from 'path';
import { BatchProcessor, BatchProcessorOptions } from '../../../src/templates/batch-processor';
import { MetadataRequest } from '../../../src/templates/metadata-generator';

// Mock fs operations
vi.mock('fs');
const mockedFs = vi.mocked(fs);

// Mock OpenAI
const mockClient = {
  files: {
    create: vi.fn(),
    content: vi.fn(),
    del: vi.fn()
  },
  batches: {
    create: vi.fn(),
    retrieve: vi.fn()
  }
};

vi.mock('openai', () => {
  return {
    default: vi.fn().mockImplementation(() => mockClient)
  };
});

// Mock MetadataGenerator
const mockGenerator = {
  createBatchRequest: vi.fn(),
  parseResult: vi.fn()
};

class MockMetadataGenerator {
  createBatchRequest = mockGenerator.createBatchRequest;
  parseResult = mockGenerator.parseResult;
}

vi.mock('../../../src/templates/metadata-generator', () => {
  return {
    MetadataGenerator: MockMetadataGenerator
  };
});

// Mock logger
vi.mock('../../../src/utils/logger', () => ({
  logger: {
    info: vi.fn(),
    warn: vi.fn(),
    error: vi.fn(),
    debug: vi.fn()
  }
}));

describe('BatchProcessor', () => {
  let processor: BatchProcessor;
  let options: BatchProcessorOptions;
  let mockStream: any;

  beforeEach(() => {
    vi.clearAllMocks();

    options = {
      apiKey: 'test-api-key',
      model: 'gpt-4o-mini',
      batchSize: 3,
      outputDir: './test-temp'
    };

    // Mock stream for file writing
    mockStream = {
      write: vi.fn(),
      end: vi.fn(),
      on: vi.fn((event, callback) => {
        if (event === 'finish') {
          setTimeout(callback, 0);
        }
      })
    };

    // Mock fs operations
    mockedFs.existsSync = vi.fn().mockReturnValue(false);
    mockedFs.mkdirSync = vi.fn();
    mockedFs.createWriteStream = vi.fn().mockReturnValue(mockStream);
    mockedFs.createReadStream = vi.fn().mockReturnValue({});
    mockedFs.unlinkSync = vi.fn();

    processor = new BatchProcessor(options);
  });

  afterEach(() => {
    vi.restoreAllMocks();
  });

  describe('constructor', () => {
    it('should create output directory if it does not exist', () => {
      expect(mockedFs.existsSync).toHaveBeenCalledWith('./test-temp');
      expect(mockedFs.mkdirSync).toHaveBeenCalledWith('./test-temp', { recursive: true });
    });

    it('should not create directory if it already exists', () => {
      mockedFs.existsSync = vi.fn().mockReturnValue(true);
      mockedFs.mkdirSync = vi.fn();

      new BatchProcessor(options);

      expect(mockedFs.mkdirSync).not.toHaveBeenCalled();
    });

    it('should use default options when not provided', () => {
      const minimalOptions = { apiKey: 'test-key' };
      const proc = new BatchProcessor(minimalOptions);

      expect(proc).toBeDefined();
      // Default batchSize is 100, outputDir is './temp'
    });
  });

  describe('processTemplates', () => {
    const mockTemplates: MetadataRequest[] = [
      { templateId: 1, name: 'Template 1', nodes: ['n8n-nodes-base.webhook'] },
      { templateId: 2, name: 'Template 2', nodes: ['n8n-nodes-base.slack'] },
      { templateId: 3, name: 'Template 3', nodes: ['n8n-nodes-base.httpRequest'] },
      { templateId: 4, name: 'Template 4', nodes: ['n8n-nodes-base.code'] }
    ];

    it('should process templates in batches correctly', async () => {
      // Mock file operations
      const mockFile = { id: 'file-123' };
      mockClient.files.create.mockResolvedValue(mockFile);

      // Mock batch job
      const mockBatchJob = {
        id: 'batch-123',
        status: 'completed',
        output_file_id: 'output-file-123'
      };
      mockClient.batches.create.mockResolvedValue(mockBatchJob);
      mockClient.batches.retrieve.mockResolvedValue(mockBatchJob);

      // Mock results
      const mockFileContent = 'result1\nresult2\nresult3';
      mockClient.files.content.mockResolvedValue({ text: () => Promise.resolve(mockFileContent) });

      const mockParsedResults = [
        { templateId: 1, metadata: { categories: ['automation'] } },
        { templateId: 2, metadata: { categories: ['communication'] } },
        { templateId: 3, metadata: { categories: ['integration'] } }
      ];
      mockGenerator.parseResult.mockReturnValueOnce(mockParsedResults[0])
                                .mockReturnValueOnce(mockParsedResults[1])
                                .mockReturnValueOnce(mockParsedResults[2]);

      const progressCallback = vi.fn();
      const results = await processor.processTemplates(mockTemplates, progressCallback);

      // Should create 2 batches (batchSize = 3, templates = 4)
      expect(mockClient.batches.create).toHaveBeenCalledTimes(2);
      expect(results.size).toBe(3); // 3 successful results
      expect(progressCallback).toHaveBeenCalled();
    });

    it('should handle empty templates array', async () => {
      const results = await processor.processTemplates([]);
      expect(results.size).toBe(0);
    });

    it('should handle batch submission errors gracefully', async () => {
      mockClient.files.create.mockRejectedValue(new Error('Upload failed'));

      const results = await processor.processTemplates([mockTemplates[0]]);

      // Should not throw, should return empty results
      expect(results.size).toBe(0);
    });

    it('should handle batch job failures', async () => {
      const mockFile = { id: 'file-123' };
      mockClient.files.create.mockResolvedValue(mockFile);

      const failedBatchJob = {
        id: 'batch-123',
        status: 'failed'
      };
      mockClient.batches.create.mockResolvedValue(failedBatchJob);
      mockClient.batches.retrieve.mockResolvedValue(failedBatchJob);

      const results = await processor.processTemplates([mockTemplates[0]]);

      expect(results.size).toBe(0);
    });
  });

  describe('createBatchFile', () => {
    it('should create JSONL file with correct format', async () => {
      const templates: MetadataRequest[] = [
        { templateId: 1, name: 'Test', nodes: ['node1'] },
        { templateId: 2, name: 'Test2', nodes: ['node2'] }
      ];

      const mockRequest = { custom_id: 'template-1', method: 'POST' };
      mockGenerator.createBatchRequest.mockReturnValue(mockRequest);

      // Access private method through type assertion
      const filename = await (processor as any).createBatchFile(templates, 'test_batch');

      expect(mockStream.write).toHaveBeenCalledTimes(2);
      expect(mockStream.write).toHaveBeenCalledWith(JSON.stringify(mockRequest) + '\n');
      expect(mockStream.end).toHaveBeenCalled();
      expect(filename).toContain('test_batch');
    });

    it('should handle stream errors', async () => {
      const templates: MetadataRequest[] = [
        { templateId: 1, name: 'Test', nodes: ['node1'] }
      ];

      // Mock stream error
      mockStream.on = vi.fn((event, callback) => {
        if (event === 'error') {
          setTimeout(() => callback(new Error('Stream error')), 0);
        }
      });

      await expect(
        (processor as any).createBatchFile(templates, 'error_batch')
      ).rejects.toThrow('Stream error');
    });
  });

  describe('uploadFile', () => {
    it('should upload file to OpenAI', async () => {
      const mockFile = { id: 'uploaded-file-123' };
      mockClient.files.create.mockResolvedValue(mockFile);

      const result = await (processor as any).uploadFile('/path/to/file.jsonl');

      expect(mockClient.files.create).toHaveBeenCalledWith({
        file: expect.any(Object),
        purpose: 'batch'
      });
      expect(result).toEqual(mockFile);
    });

    it('should handle upload errors', async () => {
      mockClient.files.create.mockRejectedValue(new Error('Upload failed'));

      await expect(
        (processor as any).uploadFile('/path/to/file.jsonl')
      ).rejects.toThrow('Upload failed');
    });
  });

  describe('createBatchJob', () => {
    it('should create batch job with correct parameters', async () => {
      const mockBatchJob = { id: 'batch-123' };
      mockClient.batches.create.mockResolvedValue(mockBatchJob);

      const result = await (processor as any).createBatchJob('file-123');

      expect(mockClient.batches.create).toHaveBeenCalledWith({
        input_file_id: 'file-123',
        endpoint: '/v1/chat/completions',
        completion_window: '24h'
      });
      expect(result).toEqual(mockBatchJob);
    });

    it('should handle batch creation errors', async () => {
      mockClient.batches.create.mockRejectedValue(new Error('Batch creation failed'));

      await expect(
        (processor as any).createBatchJob('file-123')
      ).rejects.toThrow('Batch creation failed');
    });
  });

  describe('monitorBatchJob', () => {
    it('should monitor job until completion', async () => {
      const completedJob = { id: 'batch-123', status: 'completed' };
      mockClient.batches.retrieve.mockResolvedValue(completedJob);

      const result = await (processor as any).monitorBatchJob('batch-123');

      expect(mockClient.batches.retrieve).toHaveBeenCalledWith('batch-123');
      expect(result).toEqual(completedJob);
    });

    it('should handle status progression', async () => {
      const jobs = [
        { id: 'batch-123', status: 'validating' },
        { id: 'batch-123', status: 'in_progress' },
        { id: 'batch-123', status: 'finalizing' },
        { id: 'batch-123', status: 'completed' }
      ];

      mockClient.batches.retrieve.mockImplementation(() => {
        return Promise.resolve(jobs.shift() || jobs[jobs.length - 1]);
      });

      // Mock sleep to speed up test
      const originalSleep = (processor as any).sleep;
      (processor as any).sleep = vi.fn().mockResolvedValue(undefined);

      const result = await (processor as any).monitorBatchJob('batch-123');

      expect(result.status).toBe('completed');
      expect(mockClient.batches.retrieve).toHaveBeenCalledTimes(4);

      // Restore original sleep method
      (processor as any).sleep = originalSleep;
    });

    it('should throw error for failed jobs', async () => {
      const failedJob = { id: 'batch-123', status: 'failed' };
      mockClient.batches.retrieve.mockResolvedValue(failedJob);

      await expect(
        (processor as any).monitorBatchJob('batch-123')
      ).rejects.toThrow('Batch job failed with status: failed');
    });

    it('should handle expired jobs', async () => {
      const expiredJob = { id: 'batch-123', status: 'expired' };
      mockClient.batches.retrieve.mockResolvedValue(expiredJob);

      await expect(
        (processor as any).monitorBatchJob('batch-123')
      ).rejects.toThrow('Batch job failed with status: expired');
    });

    it('should handle cancelled jobs', async () => {
      const cancelledJob = { id: 'batch-123', status: 'cancelled' };
      mockClient.batches.retrieve.mockResolvedValue(cancelledJob);

      await expect(
        (processor as any).monitorBatchJob('batch-123')
      ).rejects.toThrow('Batch job failed with status: cancelled');
    });

    it('should timeout after max attempts', async () => {
      const inProgressJob = { id: 'batch-123', status: 'in_progress' };
      mockClient.batches.retrieve.mockResolvedValue(inProgressJob);

      // Mock sleep to speed up test
      (processor as any).sleep = vi.fn().mockResolvedValue(undefined);

      await expect(
        (processor as any).monitorBatchJob('batch-123')
      ).rejects.toThrow('Batch job monitoring timed out');
    });
  });

  describe('retrieveResults', () => {
    it('should download and parse results correctly', async () => {
      const batchJob = { output_file_id: 'output-123' };
      const fileContent = '{"custom_id": "template-1"}\n{"custom_id": "template-2"}';

      mockClient.files.content.mockResolvedValue({
        text: () => Promise.resolve(fileContent)
      });

      const mockResults = [
        { templateId: 1, metadata: { categories: ['test'] } },
        { templateId: 2, metadata: { categories: ['test2'] } }
      ];

      mockGenerator.parseResult.mockReturnValueOnce(mockResults[0])
                                .mockReturnValueOnce(mockResults[1]);

      const results = await (processor as any).retrieveResults(batchJob);

      expect(mockClient.files.content).toHaveBeenCalledWith('output-123');
      expect(mockGenerator.parseResult).toHaveBeenCalledTimes(2);
      expect(results).toHaveLength(2);
    });

    it('should throw error when no output file available', async () => {
      const batchJob = { output_file_id: null };

      await expect(
        (processor as any).retrieveResults(batchJob)
      ).rejects.toThrow('No output file available for batch job');
    });

    it('should handle malformed result lines gracefully', async () => {
      const batchJob = { output_file_id: 'output-123' };
      const fileContent = '{"valid": "json"}\ninvalid json line\n{"another": "valid"}';

      mockClient.files.content.mockResolvedValue({
        text: () => Promise.resolve(fileContent)
      });

      const mockValidResult = { templateId: 1, metadata: { categories: ['test'] } };
      mockGenerator.parseResult.mockReturnValue(mockValidResult);

      const results = await (processor as any).retrieveResults(batchJob);

      // Should parse valid lines and skip invalid ones
      expect(results).toHaveLength(2);
      expect(mockGenerator.parseResult).toHaveBeenCalledTimes(2);
    });

    it('should handle file download errors', async () => {
      const batchJob = { output_file_id: 'output-123' };
      mockClient.files.content.mockRejectedValue(new Error('Download failed'));

      await expect(
        (processor as any).retrieveResults(batchJob)
      ).rejects.toThrow('Download failed');
    });
  });

  describe('cleanup', () => {
    it('should clean up all files successfully', async () => {
      await (processor as any).cleanup('local-file.jsonl', 'input-123', 'output-456');

      expect(mockedFs.unlinkSync).toHaveBeenCalledWith('local-file.jsonl');
      expect(mockClient.files.del).toHaveBeenCalledWith('input-123');
      expect(mockClient.files.del).toHaveBeenCalledWith('output-456');
    });

    it('should handle local file deletion errors gracefully', async () => {
      mockedFs.unlinkSync = vi.fn().mockImplementation(() => {
        throw new Error('File not found');
      });

      // Should not throw error
      await expect(
        (processor as any).cleanup('nonexistent.jsonl', 'input-123')
      ).resolves.toBeUndefined();
    });

    it('should handle OpenAI file deletion errors gracefully', async () => {
      mockClient.files.del.mockRejectedValue(new Error('Delete failed'));

      // Should not throw error
      await expect(
        (processor as any).cleanup('local-file.jsonl', 'input-123', 'output-456')
      ).resolves.toBeUndefined();
    });

    it('should work without output file ID', async () => {
      await (processor as any).cleanup('local-file.jsonl', 'input-123');

      expect(mockedFs.unlinkSync).toHaveBeenCalledWith('local-file.jsonl');
      expect(mockClient.files.del).toHaveBeenCalledWith('input-123');
      expect(mockClient.files.del).toHaveBeenCalledTimes(1); // Only input file
    });
  });

  describe('createBatches', () => {
    it('should split templates into correct batch sizes', () => {
      const templates: MetadataRequest[] = [
        { templateId: 1, name: 'T1', nodes: [] },
        { templateId: 2, name: 'T2', nodes: [] },
        { templateId: 3, name: 'T3', nodes: [] },
        { templateId: 4, name: 'T4', nodes: [] },
        { templateId: 5, name: 'T5', nodes: [] }
      ];

      const batches = (processor as any).createBatches(templates);

      expect(batches).toHaveLength(2); // 3 + 2 templates
      expect(batches[0]).toHaveLength(3);
      expect(batches[1]).toHaveLength(2);
    });

    it('should handle single template correctly', () => {
      const templates = [{ templateId: 1, name: 'T1', nodes: [] }];
      const batches = (processor as any).createBatches(templates);

      expect(batches).toHaveLength(1);
      expect(batches[0]).toHaveLength(1);
    });

    it('should handle empty templates array', () => {
      const batches = (processor as any).createBatches([]);
      expect(batches).toHaveLength(0);
    });
  });

  describe('file system security', () => {
    it('should sanitize file paths to prevent directory traversal', async () => {
      // Test with malicious batch name
      const maliciousBatchName = '../../../etc/passwd';
      const templates = [{ templateId: 1, name: 'Test', nodes: [] }];

      await (processor as any).createBatchFile(templates, maliciousBatchName);

      // Should create file in the designated output directory, not escape it
      const writtenPath = mockedFs.createWriteStream.mock.calls[0][0];
      expect(writtenPath).toMatch(/^\.\/test-temp\//);
      expect(writtenPath).not.toContain('../');
    });

    it('should handle very long file names gracefully', async () => {
      const longBatchName = 'a'.repeat(300); // Very long name
      const templates = [{ templateId: 1, name: 'Test', nodes: [] }];

      await expect(
        (processor as any).createBatchFile(templates, longBatchName)
      ).resolves.toBeDefined();
    });
  });

  describe('memory management', () => {
    it('should clean up files even on processing errors', async () => {
      const templates = [{ templateId: 1, name: 'Test', nodes: [] }];

      // Mock file upload to fail
      mockClient.files.create.mockRejectedValue(new Error('Upload failed'));

      const submitBatch = (processor as any).submitBatch.bind(processor);

      await expect(
        submitBatch(templates, 'error_test')
      ).rejects.toThrow('Upload failed');

      // File should still be cleaned up
      expect(mockedFs.unlinkSync).toHaveBeenCalled();
    });

    it('should handle concurrent batch processing correctly', async () => {
      const templates = Array.from({ length: 10 }, (_, i) => ({
        templateId: i + 1,
        name: `Template ${i + 1}`,
        nodes: ['node']
      }));

      // Mock successful processing
      mockClient.files.create.mockResolvedValue({ id: 'file-123' });
      const completedJob = {
        id: 'batch-123',
        status: 'completed',
        output_file_id: 'output-123'
      };
      mockClient.batches.create.mockResolvedValue(completedJob);
      mockClient.batches.retrieve.mockResolvedValue(completedJob);
      mockClient.files.content.mockResolvedValue({
        text: () => Promise.resolve('{"custom_id": "template-1"}')
      });
      mockGenerator.parseResult.mockReturnValue({
        templateId: 1,
        metadata: { categories: ['test'] }
      });

      const results = await processor.processTemplates(templates);

      expect(results.size).toBeGreaterThan(0);
      expect(mockClient.batches.create).toHaveBeenCalled();
    });
  });
});