Merge remote-tracking branch 'origin/v0.14.0rc' into feature/v0.14.0rc-1768981415660-tt2v

# Conflicts: # apps/ui/src/components/views/project-settings-view/config/navigation.ts # apps/ui/src/components/views/project-settings-view/hooks/use-project-settings-view.ts
2026-03-17 22:13:08 +00:00 · 2026-01-21 17:46:22 +01:00
parent 662f854203 f2860d9366
commit a45ee59b7d
61 changed files with 4752 additions and 213 deletions
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -83,6 +83,7 @@ import { createNotificationsRoutes } from './routes/notifications/index.js';
 import { getNotificationService } from './services/notification-service.js';
 import { createEventHistoryRoutes } from './routes/event-history/index.js';
 import { getEventHistoryService } from './services/event-history-service.js';
+import { getTestRunnerService } from './services/test-runner-service.js';

 // Load environment variables
 dotenv.config();
@@ -248,6 +249,10 @@ notificationService.setEventEmitter(events);
 // Initialize Event History Service
 const eventHistoryService = getEventHistoryService();

+// Initialize Test Runner Service with event emitter for real-time test output streaming
+const testRunnerService = getTestRunnerService();
+testRunnerService.setEventEmitter(events);
+
 // Initialize Event Hook Service for custom event triggers (with history storage)
 eventHookService.initialize(events, settingsService, eventHistoryService, featureLoader);

--- a/apps/server/src/providers/index.ts
+++ b/apps/server/src/providers/index.ts
@@ -16,6 +16,16 @@ export type {
  ProviderMessage,
  InstallationStatus,
  ModelDefinition,
+  AgentDefinition,
+  ReasoningEffort,
+  SystemPromptPreset,
+  ConversationMessage,
+  ContentBlock,
+  ValidationResult,
+  McpServerConfig,
+  McpStdioServerConfig,
+  McpSSEServerConfig,
+  McpHttpServerConfig,
 } from './types.js';

 // Claude provider
--- a/apps/server/src/providers/types.ts
+++ b/apps/server/src/providers/types.ts
@@ -19,4 +19,7 @@ export type {
  InstallationStatus,
  ValidationResult,
  ModelDefinition,
+  AgentDefinition,
+  ReasoningEffort,
+  SystemPromptPreset,
 } from '@automaker/types';
--- a/apps/server/src/routes/worktree/index.ts
+++ b/apps/server/src/routes/worktree/index.ts
@@ -42,6 +42,9 @@ import { createStartDevHandler } from './routes/start-dev.js';
 import { createStopDevHandler } from './routes/stop-dev.js';
 import { createListDevServersHandler } from './routes/list-dev-servers.js';
 import { createGetDevServerLogsHandler } from './routes/dev-server-logs.js';
+import { createStartTestsHandler } from './routes/start-tests.js';
+import { createStopTestsHandler } from './routes/stop-tests.js';
+import { createGetTestLogsHandler } from './routes/test-logs.js';
 import {
  createGetInitScriptHandler,
  createPutInitScriptHandler,
@@ -140,6 +143,15 @@ export function createWorktreeRoutes(
    createGetDevServerLogsHandler()
  );

+  // Test runner routes
+  router.post(
+    '/start-tests',
+    validatePathParams('worktreePath', 'projectPath?'),
+    createStartTestsHandler(settingsService)
+  );
+  router.post('/stop-tests', createStopTestsHandler());
+  router.get('/test-logs', validatePathParams('worktreePath?'), createGetTestLogsHandler());
+
  // Init script routes
  router.get('/init-script', createGetInitScriptHandler());
  router.put('/init-script', validatePathParams('projectPath'), createPutInitScriptHandler());
--- a/apps/server/src/routes/worktree/routes/start-tests.ts
+++ b/apps/server/src/routes/worktree/routes/start-tests.ts
@@ -0,0 +1,92 @@
+/**
+ * POST /start-tests endpoint - Start tests for a worktree
+ *
+ * Runs the test command configured in project settings.
+ * If no testCommand is configured, returns an error.
+ */
+
+import type { Request, Response } from 'express';
+import type { SettingsService } from '../../../services/settings-service.js';
+import { getTestRunnerService } from '../../../services/test-runner-service.js';
+import { getErrorMessage, logError } from '../common.js';
+
+export function createStartTestsHandler(settingsService?: SettingsService) {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const body = req.body;
+
+      // Validate request body
+      if (!body || typeof body !== 'object') {
+        res.status(400).json({
+          success: false,
+          error: 'Request body must be an object',
+        });
+        return;
+      }
+
+      const worktreePath = typeof body.worktreePath === 'string' ? body.worktreePath : undefined;
+      const projectPath = typeof body.projectPath === 'string' ? body.projectPath : undefined;
+      const testFile = typeof body.testFile === 'string' ? body.testFile : undefined;
+
+      if (!worktreePath) {
+        res.status(400).json({
+          success: false,
+          error: 'worktreePath is required and must be a string',
+        });
+        return;
+      }
+
+      // Get project settings to find the test command
+      // Use projectPath if provided, otherwise use worktreePath
+      const settingsPath = projectPath || worktreePath;
+
+      if (!settingsService) {
+        res.status(500).json({
+          success: false,
+          error: 'Settings service not available',
+        });
+        return;
+      }
+
+      const projectSettings = await settingsService.getProjectSettings(settingsPath);
+      const testCommand = projectSettings?.testCommand;
+
+      if (!testCommand) {
+        res.status(400).json({
+          success: false,
+          error:
+            'No test command configured. Please configure a test command in Project Settings > Testing Configuration.',
+        });
+        return;
+      }
+
+      const testRunnerService = getTestRunnerService();
+      const result = await testRunnerService.startTests(worktreePath, {
+        command: testCommand,
+        testFile,
+      });
+
+      if (result.success && result.result) {
+        res.json({
+          success: true,
+          result: {
+            sessionId: result.result.sessionId,
+            worktreePath: result.result.worktreePath,
+            command: result.result.command,
+            status: result.result.status,
+            testFile: result.result.testFile,
+            message: result.result.message,
+          },
+        });
+      } else {
+        res.status(400).json({
+          success: false,
+          error: result.error || 'Failed to start tests',
+        });
+      }
+    } catch (error) {
+      logError(error, 'Start tests failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/worktree/routes/stop-tests.ts
+++ b/apps/server/src/routes/worktree/routes/stop-tests.ts
@@ -0,0 +1,58 @@
+/**
+ * POST /stop-tests endpoint - Stop a running test session
+ *
+ * Stops the test runner process for a specific session,
+ * cancelling any ongoing tests and freeing up resources.
+ */
+
+import type { Request, Response } from 'express';
+import { getTestRunnerService } from '../../../services/test-runner-service.js';
+import { getErrorMessage, logError } from '../common.js';
+
+export function createStopTestsHandler() {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const body = req.body;
+
+      // Validate request body
+      if (!body || typeof body !== 'object') {
+        res.status(400).json({
+          success: false,
+          error: 'Request body must be an object',
+        });
+        return;
+      }
+
+      const sessionId = typeof body.sessionId === 'string' ? body.sessionId : undefined;
+
+      if (!sessionId) {
+        res.status(400).json({
+          success: false,
+          error: 'sessionId is required and must be a string',
+        });
+        return;
+      }
+
+      const testRunnerService = getTestRunnerService();
+      const result = await testRunnerService.stopTests(sessionId);
+
+      if (result.success && result.result) {
+        res.json({
+          success: true,
+          result: {
+            sessionId: result.result.sessionId,
+            message: result.result.message,
+          },
+        });
+      } else {
+        res.status(400).json({
+          success: false,
+          error: result.error || 'Failed to stop tests',
+        });
+      }
+    } catch (error) {
+      logError(error, 'Stop tests failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/routes/worktree/routes/test-logs.ts
+++ b/apps/server/src/routes/worktree/routes/test-logs.ts
@@ -0,0 +1,160 @@
+/**
+ * GET /test-logs endpoint - Get buffered logs for a test runner session
+ *
+ * Returns the scrollback buffer containing historical log output for a test run.
+ * Used by clients to populate the log panel on initial connection
+ * before subscribing to real-time updates via WebSocket.
+ *
+ * Query parameters:
+ * - worktreePath: Path to the worktree (optional if sessionId provided)
+ * - sessionId: Specific test session ID (optional, uses active session if not provided)
+ */
+
+import type { Request, Response } from 'express';
+import { getTestRunnerService } from '../../../services/test-runner-service.js';
+import { getErrorMessage, logError } from '../common.js';
+
+interface SessionInfo {
+  sessionId: string;
+  worktreePath?: string;
+  command?: string;
+  testFile?: string;
+  exitCode?: number | null;
+}
+
+interface OutputResult {
+  sessionId: string;
+  status: string;
+  output: string;
+  startedAt: string;
+  finishedAt?: string | null;
+}
+
+function buildLogsResponse(session: SessionInfo, output: OutputResult) {
+  return {
+    success: true,
+    result: {
+      sessionId: session.sessionId,
+      worktreePath: session.worktreePath,
+      command: session.command,
+      status: output.status,
+      testFile: session.testFile,
+      logs: output.output,
+      startedAt: output.startedAt,
+      finishedAt: output.finishedAt,
+      exitCode: session.exitCode ?? null,
+    },
+  };
+}
+
+export function createGetTestLogsHandler() {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const { worktreePath, sessionId } = req.query as {
+        worktreePath?: string;
+        sessionId?: string;
+      };
+
+      const testRunnerService = getTestRunnerService();
+
+      // If sessionId is provided, get logs for that specific session
+      if (sessionId) {
+        const result = testRunnerService.getSessionOutput(sessionId);
+
+        if (result.success && result.result) {
+          const session = testRunnerService.getSession(sessionId);
+          res.json(
+            buildLogsResponse(
+              {
+                sessionId: result.result.sessionId,
+                worktreePath: session?.worktreePath,
+                command: session?.command,
+                testFile: session?.testFile,
+                exitCode: session?.exitCode,
+              },
+              result.result
+            )
+          );
+        } else {
+          res.status(404).json({
+            success: false,
+            error: result.error || 'Failed to get test logs',
+          });
+        }
+        return;
+      }
+
+      // If worktreePath is provided, get logs for the active session
+      if (worktreePath) {
+        const activeSession = testRunnerService.getActiveSession(worktreePath);
+
+        if (activeSession) {
+          const result = testRunnerService.getSessionOutput(activeSession.id);
+
+          if (result.success && result.result) {
+            res.json(
+              buildLogsResponse(
+                {
+                  sessionId: activeSession.id,
+                  worktreePath: activeSession.worktreePath,
+                  command: activeSession.command,
+                  testFile: activeSession.testFile,
+                  exitCode: activeSession.exitCode,
+                },
+                result.result
+              )
+            );
+          } else {
+            res.status(404).json({
+              success: false,
+              error: result.error || 'Failed to get test logs',
+            });
+          }
+        } else {
+          // No active session - check for most recent session for this worktree
+          const sessions = testRunnerService.listSessions(worktreePath);
+          if (sessions.result.sessions.length > 0) {
+            // Get the most recent session (list is not sorted, so find it)
+            const mostRecent = sessions.result.sessions.reduce((latest, current) => {
+              const latestTime = new Date(latest.startedAt).getTime();
+              const currentTime = new Date(current.startedAt).getTime();
+              return currentTime > latestTime ? current : latest;
+            });
+
+            const result = testRunnerService.getSessionOutput(mostRecent.sessionId);
+            if (result.success && result.result) {
+              res.json(
+                buildLogsResponse(
+                  {
+                    sessionId: mostRecent.sessionId,
+                    worktreePath: mostRecent.worktreePath,
+                    command: mostRecent.command,
+                    testFile: mostRecent.testFile,
+                    exitCode: mostRecent.exitCode,
+                  },
+                  result.result
+                )
+              );
+              return;
+            }
+          }
+
+          res.status(404).json({
+            success: false,
+            error: 'No test sessions found for this worktree',
+          });
+        }
+        return;
+      }
+
+      // Neither sessionId nor worktreePath provided
+      res.status(400).json({
+        success: false,
+        error: 'Either worktreePath or sessionId query parameter is required',
+      });
+    } catch (error) {
+      logError(error, 'Get test logs failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/services/auto-mode-service.ts
+++ b/apps/server/src/services/auto-mode-service.ts
@@ -1281,7 +1281,11 @@ export class AutoModeService {

      // Check for pipeline steps and execute them
      const pipelineConfig = await pipelineService.getPipelineConfig(projectPath);
-      const sortedSteps = [...(pipelineConfig?.steps || [])].sort((a, b) => a.order - b.order);
+      // Filter out excluded pipeline steps and sort by order
+      const excludedStepIds = new Set(feature.excludedPipelineSteps || []);
+      const sortedSteps = [...(pipelineConfig?.steps || [])]
+        .sort((a, b) => a.order - b.order)
+        .filter((step) => !excludedStepIds.has(step.id));

      if (sortedSteps.length > 0) {
        // Execute pipeline steps sequentially
@@ -1743,15 +1747,76 @@ Complete the pipeline step instructions above. Review the previous work and appl
  ): Promise<void> {
    const featureId = feature.id;

-    const sortedSteps = [...pipelineConfig.steps].sort((a, b) => a.order - b.order);
+    // Sort all steps first
+    const allSortedSteps = [...pipelineConfig.steps].sort((a, b) => a.order - b.order);

-    // Validate step index
-    if (startFromStepIndex < 0 || startFromStepIndex >= sortedSteps.length) {
+    // Get the current step we're resuming from (using the index from unfiltered list)
+    if (startFromStepIndex < 0 || startFromStepIndex >= allSortedSteps.length) {
      throw new Error(`Invalid step index: ${startFromStepIndex}`);
    }
+    const currentStep = allSortedSteps[startFromStepIndex];

-    // Get steps to execute (from startFromStepIndex onwards)
-    const stepsToExecute = sortedSteps.slice(startFromStepIndex);
+    // Filter out excluded pipeline steps
+    const excludedStepIds = new Set(feature.excludedPipelineSteps || []);
+
+    // Check if the current step is excluded
+    // If so, use getNextStatus to find the appropriate next step
+    if (excludedStepIds.has(currentStep.id)) {
+      console.log(
+        `[AutoMode] Current step ${currentStep.id} is excluded for feature ${featureId}, finding next valid step`
+      );
+      const nextStatus = pipelineService.getNextStatus(
+        `pipeline_${currentStep.id}`,
+        pipelineConfig,
+        feature.skipTests ?? false,
+        feature.excludedPipelineSteps
+      );
+
+      // If next status is not a pipeline step, feature is done
+      if (!pipelineService.isPipelineStatus(nextStatus)) {
+        await this.updateFeatureStatus(projectPath, featureId, nextStatus);
+        this.emitAutoModeEvent('auto_mode_feature_complete', {
+          featureId,
+          featureName: feature.title,
+          branchName: feature.branchName ?? null,
+          passes: true,
+          message: 'Pipeline completed (remaining steps excluded)',
+          projectPath,
+        });
+        return;
+      }
+
+      // Find the next step and update the start index
+      const nextStepId = pipelineService.getStepIdFromStatus(nextStatus);
+      const nextStepIndex = allSortedSteps.findIndex((s) => s.id === nextStepId);
+      if (nextStepIndex === -1) {
+        throw new Error(`Next step ${nextStepId} not found in pipeline config`);
+      }
+      startFromStepIndex = nextStepIndex;
+    }
+
+    // Get steps to execute (from startFromStepIndex onwards, excluding excluded steps)
+    const stepsToExecute = allSortedSteps
+      .slice(startFromStepIndex)
+      .filter((step) => !excludedStepIds.has(step.id));
+
+    // If no steps left to execute, complete the feature
+    if (stepsToExecute.length === 0) {
+      const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified';
+      await this.updateFeatureStatus(projectPath, featureId, finalStatus);
+      this.emitAutoModeEvent('auto_mode_feature_complete', {
+        featureId,
+        featureName: feature.title,
+        branchName: feature.branchName ?? null,
+        passes: true,
+        message: 'Pipeline completed (all remaining steps excluded)',
+        projectPath,
+      });
+      return;
+    }
+
+    // Use the filtered steps for counting
+    const sortedSteps = allSortedSteps.filter((step) => !excludedStepIds.has(step.id));

    console.log(
      `[AutoMode] Resuming pipeline for feature ${featureId} from step ${startFromStepIndex + 1}/${sortedSteps.length}`
--- a/apps/server/src/services/pipeline-service.ts
+++ b/apps/server/src/services/pipeline-service.ts
@@ -234,51 +234,75 @@ export class PipelineService {
   *
   * Determines what status a feature should transition to based on current status.
   * Flow: in_progress -> pipeline_step_0 -> pipeline_step_1 -> ... -> final status
+   * Steps in the excludedStepIds array will be skipped.
   *
   * @param currentStatus - Current feature status
   * @param config - Pipeline configuration (or null if no pipeline)
   * @param skipTests - Whether to skip tests (affects final status)
+   * @param excludedStepIds - Optional array of step IDs to skip
   * @returns The next status in the pipeline flow
   */
  getNextStatus(
    currentStatus: FeatureStatusWithPipeline,
    config: PipelineConfig | null,
-    skipTests: boolean
+    skipTests: boolean,
+    excludedStepIds?: string[]
  ): FeatureStatusWithPipeline {
    const steps = config?.steps || [];
+    const exclusions = new Set(excludedStepIds || []);

-    // Sort steps by order
-    const sortedSteps = [...steps].sort((a, b) => a.order - b.order);
+    // Sort steps by order and filter out excluded steps
+    const sortedSteps = [...steps]
+      .sort((a, b) => a.order - b.order)
+      .filter((step) => !exclusions.has(step.id));

-    // If no pipeline steps, use original logic
+    // If no pipeline steps (or all excluded), use original logic
    if (sortedSteps.length === 0) {
-      if (currentStatus === 'in_progress') {
+      // If coming from in_progress or already in a pipeline step, go to final status
+      if (currentStatus === 'in_progress' || currentStatus.startsWith('pipeline_')) {
        return skipTests ? 'waiting_approval' : 'verified';
      }
      return currentStatus;
    }

-    // Coming from in_progress -> go to first pipeline step
+    // Coming from in_progress -> go to first non-excluded pipeline step
    if (currentStatus === 'in_progress') {
      return `pipeline_${sortedSteps[0].id}`;
    }

-    // Coming from a pipeline step -> go to next step or final status
+    // Coming from a pipeline step -> go to next non-excluded step or final status
    if (currentStatus.startsWith('pipeline_')) {
      const currentStepId = currentStatus.replace('pipeline_', '');
      const currentIndex = sortedSteps.findIndex((s) => s.id === currentStepId);

      if (currentIndex === -1) {
-        // Step not found, go to final status
+        // Current step not found in filtered list (might be excluded or invalid)
+        // Find next valid step after this one from the original sorted list
+        const allSortedSteps = [...steps].sort((a, b) => a.order - b.order);
+        const originalIndex = allSortedSteps.findIndex((s) => s.id === currentStepId);
+
+        if (originalIndex === -1) {
+          // Step truly doesn't exist, go to final status
+          return skipTests ? 'waiting_approval' : 'verified';
+        }
+
+        // Find the next non-excluded step after the current one
+        for (let i = originalIndex + 1; i < allSortedSteps.length; i++) {
+          if (!exclusions.has(allSortedSteps[i].id)) {
+            return `pipeline_${allSortedSteps[i].id}`;
+          }
+        }
+
+        // No more non-excluded steps, go to final status
        return skipTests ? 'waiting_approval' : 'verified';
      }

      if (currentIndex < sortedSteps.length - 1) {
-        // Go to next step
+        // Go to next non-excluded step
        return `pipeline_${sortedSteps[currentIndex + 1].id}`;
      }

-      // Last step completed, go to final status
+      // Last non-excluded step completed, go to final status
      return skipTests ? 'waiting_approval' : 'verified';
    }

--- a/apps/server/src/services/settings-service.ts
+++ b/apps/server/src/services/settings-service.ts
@@ -827,6 +827,16 @@ export class SettingsService {
      delete updated.phaseModelOverrides;
    }

+    // Handle defaultFeatureModel special cases:
+    // - "__CLEAR__" marker means delete the key (use global setting)
+    // - object means project-specific override
+    if (
+      'defaultFeatureModel' in updates &&
+      (updates as Record<string, unknown>).defaultFeatureModel === '__CLEAR__'
+    ) {
+      delete updated.defaultFeatureModel;
+    }
+
    await writeSettingsJson(settingsPath, updated);
    logger.info(`Project settings updated for ${projectPath}`);

--- a/apps/server/src/services/test-runner-service.ts
+++ b/apps/server/src/services/test-runner-service.ts
@@ -0,0 +1,682 @@
+/**
+ * Test Runner Service
+ *
+ * Manages test execution processes for git worktrees.
+ * Runs user-configured test commands with output streaming.
+ *
+ * Features:
+ * - Process management with graceful shutdown
+ * - Output buffering and throttling for WebSocket streaming
+ * - Support for running all tests or specific files
+ * - Cross-platform process cleanup (Windows/Unix)
+ */
+
+import { spawn, execSync, type ChildProcess } from 'child_process';
+import * as secureFs from '../lib/secure-fs.js';
+import { createLogger } from '@automaker/utils';
+import type { EventEmitter } from '../lib/events.js';
+
+const logger = createLogger('TestRunnerService');
+
+// Maximum scrollback buffer size (characters)
+const MAX_SCROLLBACK_SIZE = 50000; // ~50KB per test run
+
+// Throttle output to prevent overwhelming WebSocket under heavy load
+// Note: Too aggressive throttling (< 50ms) can cause memory issues and UI crashes
+// due to rapid React state updates and string concatenation overhead
+const OUTPUT_THROTTLE_MS = 100; // ~10fps - balances responsiveness with stability
+const OUTPUT_BATCH_SIZE = 8192; // Larger batch size to reduce event frequency
+
+/**
+ * Status of a test run
+ */
+export type TestRunStatus = 'pending' | 'running' | 'passed' | 'failed' | 'cancelled' | 'error';
+
+/**
+ * Information about an active test run session
+ */
+export interface TestRunSession {
+  /** Unique identifier for this test run */
+  id: string;
+  /** Path to the worktree where tests are running */
+  worktreePath: string;
+  /** The command being run */
+  command: string;
+  /** The spawned child process */
+  process: ChildProcess | null;
+  /** When the test run started */
+  startedAt: Date;
+  /** When the test run finished (if completed) */
+  finishedAt: Date | null;
+  /** Current status of the test run */
+  status: TestRunStatus;
+  /** Exit code from the process (if completed) */
+  exitCode: number | null;
+  /** Specific test file being run (optional) */
+  testFile?: string;
+  /** Scrollback buffer for log history (replay on reconnect) */
+  scrollbackBuffer: string;
+  /** Pending output to be flushed to subscribers */
+  outputBuffer: string;
+  /** Throttle timer for batching output */
+  flushTimeout: NodeJS.Timeout | null;
+  /** Flag to indicate session is stopping (prevents output after stop) */
+  stopping: boolean;
+}
+
+/**
+ * Result of a test run operation
+ */
+export interface TestRunResult {
+  success: boolean;
+  result?: {
+    sessionId: string;
+    worktreePath: string;
+    command: string;
+    status: TestRunStatus;
+    testFile?: string;
+    message: string;
+  };
+  error?: string;
+}
+
+/**
+ * Test Runner Service class
+ * Manages test execution processes across worktrees
+ */
+class TestRunnerService {
+  private sessions: Map<string, TestRunSession> = new Map();
+  private emitter: EventEmitter | null = null;
+
+  /**
+   * Set the event emitter for streaming log events
+   * Called during service initialization with the global event emitter
+   */
+  setEventEmitter(emitter: EventEmitter): void {
+    this.emitter = emitter;
+  }
+
+  /**
+   * Helper to check if a file exists using secureFs
+   */
+  private async fileExists(filePath: string): Promise<boolean> {
+    try {
+      await secureFs.access(filePath);
+      return true;
+    } catch {
+      return false;
+    }
+  }
+
+  /**
+   * Append data to scrollback buffer with size limit enforcement
+   * Evicts oldest data when buffer exceeds MAX_SCROLLBACK_SIZE
+   */
+  private appendToScrollback(session: TestRunSession, data: string): void {
+    session.scrollbackBuffer += data;
+    if (session.scrollbackBuffer.length > MAX_SCROLLBACK_SIZE) {
+      session.scrollbackBuffer = session.scrollbackBuffer.slice(-MAX_SCROLLBACK_SIZE);
+    }
+  }
+
+  /**
+   * Flush buffered output to WebSocket subscribers
+   * Sends batched output to prevent overwhelming clients under heavy load
+   */
+  private flushOutput(session: TestRunSession): void {
+    // Skip flush if session is stopping or buffer is empty
+    if (session.stopping || session.outputBuffer.length === 0) {
+      session.flushTimeout = null;
+      return;
+    }
+
+    let dataToSend = session.outputBuffer;
+    if (dataToSend.length > OUTPUT_BATCH_SIZE) {
+      // Send in batches if buffer is large
+      dataToSend = session.outputBuffer.slice(0, OUTPUT_BATCH_SIZE);
+      session.outputBuffer = session.outputBuffer.slice(OUTPUT_BATCH_SIZE);
+      // Schedule another flush for remaining data
+      session.flushTimeout = setTimeout(() => this.flushOutput(session), OUTPUT_THROTTLE_MS);
+    } else {
+      session.outputBuffer = '';
+      session.flushTimeout = null;
+    }
+
+    // Emit output event for WebSocket streaming
+    if (this.emitter) {
+      this.emitter.emit('test-runner:output', {
+        sessionId: session.id,
+        worktreePath: session.worktreePath,
+        content: dataToSend,
+        timestamp: new Date().toISOString(),
+      });
+    }
+  }
+
+  /**
+   * Handle incoming stdout/stderr data from test process
+   * Buffers data for scrollback replay and schedules throttled emission
+   */
+  private handleProcessOutput(session: TestRunSession, data: Buffer): void {
+    // Skip output if session is stopping
+    if (session.stopping) {
+      return;
+    }
+
+    const content = data.toString();
+
+    // Append to scrollback buffer for replay on reconnect
+    this.appendToScrollback(session, content);
+
+    // Buffer output for throttled live delivery
+    session.outputBuffer += content;
+
+    // Schedule flush if not already scheduled
+    if (!session.flushTimeout) {
+      session.flushTimeout = setTimeout(() => this.flushOutput(session), OUTPUT_THROTTLE_MS);
+    }
+
+    // Also log for debugging (existing behavior)
+    logger.debug(`[${session.id}] ${content.trim()}`);
+  }
+
+  /**
+   * Kill any process running (platform-specific cleanup)
+   */
+  private killProcessTree(pid: number): void {
+    try {
+      if (process.platform === 'win32') {
+        // Windows: use taskkill to kill process tree
+        execSync(`taskkill /F /T /PID ${pid}`, { stdio: 'ignore' });
+      } else {
+        // Unix: kill the process group
+        try {
+          process.kill(-pid, 'SIGTERM');
+        } catch {
+          // Fallback to killing just the process
+          process.kill(pid, 'SIGTERM');
+        }
+      }
+    } catch (error) {
+      logger.debug(`Error killing process ${pid}:`, error);
+    }
+  }
+
+  /**
+   * Generate a unique session ID
+   */
+  private generateSessionId(): string {
+    return `test-${Date.now()}-${Math.random().toString(36).slice(2, 11)}`;
+  }
+
+  /**
+   * Sanitize a test file path to prevent command injection
+   * Allows only safe characters for file paths
+   */
+  private sanitizeTestFile(testFile: string): string {
+    // Remove any shell metacharacters and normalize path
+    // Allow only alphanumeric, dots, slashes, hyphens, underscores, colons (for Windows paths)
+    return testFile.replace(/[^a-zA-Z0-9.\\/_\-:]/g, '');
+  }
+
+  /**
+   * Start tests in a worktree using the provided command
+   *
+   * @param worktreePath - Path to the worktree where tests should run
+   * @param options - Configuration for the test run
+   * @returns TestRunResult with session info or error
+   */
+  async startTests(
+    worktreePath: string,
+    options: {
+      command: string;
+      testFile?: string;
+    }
+  ): Promise<TestRunResult> {
+    const { command, testFile } = options;
+
+    // Check if already running
+    const existingSession = this.getActiveSession(worktreePath);
+    if (existingSession) {
+      return {
+        success: false,
+        error: `Tests are already running for this worktree (session: ${existingSession.id})`,
+      };
+    }
+
+    // Verify the worktree exists
+    if (!(await this.fileExists(worktreePath))) {
+      return {
+        success: false,
+        error: `Worktree path does not exist: ${worktreePath}`,
+      };
+    }
+
+    if (!command) {
+      return {
+        success: false,
+        error: 'No test command provided',
+      };
+    }
+
+    // Build the final command (append test file if specified)
+    let finalCommand = command;
+    if (testFile) {
+      // Sanitize test file path to prevent command injection
+      const sanitizedFile = this.sanitizeTestFile(testFile);
+      // Append the test file to the command
+      // Most test runners support: command -- file or command file
+      finalCommand = `${command} -- ${sanitizedFile}`;
+    }
+
+    // Parse command into cmd and args (shell execution)
+    // We use shell: true to support complex commands like "npm run test:server"
+    logger.info(`Starting tests in ${worktreePath}`);
+    logger.info(`Command: ${finalCommand}`);
+
+    // Create session
+    const sessionId = this.generateSessionId();
+    const session: TestRunSession = {
+      id: sessionId,
+      worktreePath,
+      command: finalCommand,
+      process: null,
+      startedAt: new Date(),
+      finishedAt: null,
+      status: 'pending',
+      exitCode: null,
+      testFile,
+      scrollbackBuffer: '',
+      outputBuffer: '',
+      flushTimeout: null,
+      stopping: false,
+    };
+
+    // Spawn the test process using shell
+    const env = {
+      ...process.env,
+      FORCE_COLOR: '1',
+      COLORTERM: 'truecolor',
+      TERM: 'xterm-256color',
+      CI: 'true', // Helps some test runners format output better
+    };
+
+    const testProcess = spawn(finalCommand, [], {
+      cwd: worktreePath,
+      env,
+      shell: true,
+      stdio: ['ignore', 'pipe', 'pipe'],
+      detached: process.platform !== 'win32', // Use process groups on Unix for cleanup
+    });
+
+    session.process = testProcess;
+    session.status = 'running';
+
+    // Track if process failed early
+    const status = { error: null as string | null, exited: false };
+
+    // Helper to clean up resources and emit events
+    const cleanupAndFinish = (
+      exitCode: number | null,
+      finalStatus: TestRunStatus,
+      errorMessage?: string
+    ) => {
+      session.finishedAt = new Date();
+      session.exitCode = exitCode;
+      session.status = finalStatus;
+
+      if (session.flushTimeout) {
+        clearTimeout(session.flushTimeout);
+        session.flushTimeout = null;
+      }
+
+      // Flush any remaining output
+      if (session.outputBuffer.length > 0 && this.emitter && !session.stopping) {
+        this.emitter.emit('test-runner:output', {
+          sessionId: session.id,
+          worktreePath: session.worktreePath,
+          content: session.outputBuffer,
+          timestamp: new Date().toISOString(),
+        });
+        session.outputBuffer = '';
+      }
+
+      // Emit completed event
+      if (this.emitter && !session.stopping) {
+        this.emitter.emit('test-runner:completed', {
+          sessionId: session.id,
+          worktreePath: session.worktreePath,
+          command: session.command,
+          status: finalStatus,
+          exitCode,
+          error: errorMessage,
+          duration: session.finishedAt.getTime() - session.startedAt.getTime(),
+          timestamp: new Date().toISOString(),
+        });
+      }
+    };
+
+    // Capture stdout
+    if (testProcess.stdout) {
+      testProcess.stdout.on('data', (data: Buffer) => {
+        this.handleProcessOutput(session, data);
+      });
+    }
+
+    // Capture stderr
+    if (testProcess.stderr) {
+      testProcess.stderr.on('data', (data: Buffer) => {
+        this.handleProcessOutput(session, data);
+      });
+    }
+
+    testProcess.on('error', (error) => {
+      logger.error(`Process error for ${sessionId}:`, error);
+      status.error = error.message;
+      cleanupAndFinish(null, 'error', error.message);
+    });
+
+    testProcess.on('exit', (code) => {
+      logger.info(`Test process for ${worktreePath} exited with code ${code}`);
+      status.exited = true;
+
+      // Determine final status based on exit code
+      let finalStatus: TestRunStatus;
+      if (session.stopping) {
+        finalStatus = 'cancelled';
+      } else if (code === 0) {
+        finalStatus = 'passed';
+      } else {
+        finalStatus = 'failed';
+      }
+
+      cleanupAndFinish(code, finalStatus);
+    });
+
+    // Store session
+    this.sessions.set(sessionId, session);
+
+    // Wait a moment to see if the process fails immediately
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    if (status.error) {
+      return {
+        success: false,
+        error: `Failed to start tests: ${status.error}`,
+      };
+    }
+
+    if (status.exited) {
+      // Process already exited - check if it was immediate failure
+      const exitedSession = this.sessions.get(sessionId);
+      if (exitedSession && exitedSession.status === 'error') {
+        return {
+          success: false,
+          error: `Test process exited immediately. Check output for details.`,
+        };
+      }
+    }
+
+    // Emit started event
+    if (this.emitter) {
+      this.emitter.emit('test-runner:started', {
+        sessionId,
+        worktreePath,
+        command: finalCommand,
+        testFile,
+        timestamp: new Date().toISOString(),
+      });
+    }
+
+    return {
+      success: true,
+      result: {
+        sessionId,
+        worktreePath,
+        command: finalCommand,
+        status: 'running',
+        testFile,
+        message: `Tests started: ${finalCommand}`,
+      },
+    };
+  }
+
+  /**
+   * Stop a running test session
+   *
+   * @param sessionId - The ID of the test session to stop
+   * @returns Result with success status and message
+   */
+  async stopTests(sessionId: string): Promise<{
+    success: boolean;
+    result?: { sessionId: string; message: string };
+    error?: string;
+  }> {
+    const session = this.sessions.get(sessionId);
+
+    if (!session) {
+      return {
+        success: false,
+        error: `Test session not found: ${sessionId}`,
+      };
+    }
+
+    if (session.status !== 'running') {
+      return {
+        success: true,
+        result: {
+          sessionId,
+          message: `Tests already finished (status: ${session.status})`,
+        },
+      };
+    }
+
+    logger.info(`Cancelling test session ${sessionId}`);
+
+    // Mark as stopping to prevent further output events
+    session.stopping = true;
+
+    // Clean up flush timeout
+    if (session.flushTimeout) {
+      clearTimeout(session.flushTimeout);
+      session.flushTimeout = null;
+    }
+
+    // Kill the process
+    if (session.process && !session.process.killed && session.process.pid) {
+      this.killProcessTree(session.process.pid);
+    }
+
+    session.status = 'cancelled';
+    session.finishedAt = new Date();
+
+    // Emit cancelled event
+    if (this.emitter) {
+      this.emitter.emit('test-runner:completed', {
+        sessionId,
+        worktreePath: session.worktreePath,
+        command: session.command,
+        status: 'cancelled',
+        exitCode: null,
+        duration: session.finishedAt.getTime() - session.startedAt.getTime(),
+        timestamp: new Date().toISOString(),
+      });
+    }
+
+    return {
+      success: true,
+      result: {
+        sessionId,
+        message: 'Test run cancelled',
+      },
+    };
+  }
+
+  /**
+   * Get the active test session for a worktree
+   */
+  getActiveSession(worktreePath: string): TestRunSession | undefined {
+    for (const session of this.sessions.values()) {
+      if (session.worktreePath === worktreePath && session.status === 'running') {
+        return session;
+      }
+    }
+    return undefined;
+  }
+
+  /**
+   * Get a test session by ID
+   */
+  getSession(sessionId: string): TestRunSession | undefined {
+    return this.sessions.get(sessionId);
+  }
+
+  /**
+   * Get buffered output for a test session
+   */
+  getSessionOutput(sessionId: string): {
+    success: boolean;
+    result?: {
+      sessionId: string;
+      output: string;
+      status: TestRunStatus;
+      startedAt: string;
+      finishedAt: string | null;
+    };
+    error?: string;
+  } {
+    const session = this.sessions.get(sessionId);
+
+    if (!session) {
+      return {
+        success: false,
+        error: `Test session not found: ${sessionId}`,
+      };
+    }
+
+    return {
+      success: true,
+      result: {
+        sessionId,
+        output: session.scrollbackBuffer,
+        status: session.status,
+        startedAt: session.startedAt.toISOString(),
+        finishedAt: session.finishedAt?.toISOString() || null,
+      },
+    };
+  }
+
+  /**
+   * List all test sessions (optionally filter by worktree)
+   */
+  listSessions(worktreePath?: string): {
+    success: boolean;
+    result: {
+      sessions: Array<{
+        sessionId: string;
+        worktreePath: string;
+        command: string;
+        status: TestRunStatus;
+        testFile?: string;
+        startedAt: string;
+        finishedAt: string | null;
+        exitCode: number | null;
+      }>;
+    };
+  } {
+    let sessions = Array.from(this.sessions.values());
+
+    if (worktreePath) {
+      sessions = sessions.filter((s) => s.worktreePath === worktreePath);
+    }
+
+    return {
+      success: true,
+      result: {
+        sessions: sessions.map((s) => ({
+          sessionId: s.id,
+          worktreePath: s.worktreePath,
+          command: s.command,
+          status: s.status,
+          testFile: s.testFile,
+          startedAt: s.startedAt.toISOString(),
+          finishedAt: s.finishedAt?.toISOString() || null,
+          exitCode: s.exitCode,
+        })),
+      },
+    };
+  }
+
+  /**
+   * Check if a worktree has an active test run
+   */
+  isRunning(worktreePath: string): boolean {
+    return this.getActiveSession(worktreePath) !== undefined;
+  }
+
+  /**
+   * Clean up old completed sessions (keep only recent ones)
+   */
+  cleanupOldSessions(maxAgeMs: number = 30 * 60 * 1000): void {
+    const now = Date.now();
+    for (const [sessionId, session] of this.sessions.entries()) {
+      if (session.status !== 'running' && session.finishedAt) {
+        if (now - session.finishedAt.getTime() > maxAgeMs) {
+          this.sessions.delete(sessionId);
+          logger.debug(`Cleaned up old test session: ${sessionId}`);
+        }
+      }
+    }
+  }
+
+  /**
+   * Cancel all running test sessions (for cleanup)
+   */
+  async cancelAll(): Promise<void> {
+    logger.info(`Cancelling all ${this.sessions.size} test sessions`);
+
+    for (const session of this.sessions.values()) {
+      if (session.status === 'running') {
+        await this.stopTests(session.id);
+      }
+    }
+  }
+
+  /**
+   * Cleanup service resources
+   */
+  async cleanup(): Promise<void> {
+    await this.cancelAll();
+    this.sessions.clear();
+  }
+}
+
+// Singleton instance
+let testRunnerServiceInstance: TestRunnerService | null = null;
+
+export function getTestRunnerService(): TestRunnerService {
+  if (!testRunnerServiceInstance) {
+    testRunnerServiceInstance = new TestRunnerService();
+  }
+  return testRunnerServiceInstance;
+}
+
+// Cleanup on process exit
+process.on('SIGTERM', () => {
+  if (testRunnerServiceInstance) {
+    testRunnerServiceInstance.cleanup().catch((err) => {
+      logger.error('Cleanup failed on SIGTERM:', err);
+    });
+  }
+});
+
+process.on('SIGINT', () => {
+  if (testRunnerServiceInstance) {
+    testRunnerServiceInstance.cleanup().catch((err) => {
+      logger.error('Cleanup failed on SIGINT:', err);
+    });
+  }
+});
+
+// Export the class for testing purposes
+export { TestRunnerService };