Fix agent output validation to prevent false verified status (#807)

* Changes from fix/cursor-fix * feat: Enhance provider error messages with diagnostic context, address test failure, fix port change, move playwright tests to different port * Update apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * ci: Update test server port from 3008 to 3108 and add environment configuration * fix: Correct typo in health endpoint URL and standardize port env vars --------- Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
2026-03-17 22:13:08 +00:00 · 2026-02-24 20:18:40 -08:00
parent 0330c70261
commit 51e9a23ba1
36 changed files with 1610 additions and 104 deletions
--- a/apps/server/tests/unit/services/agent-executor.test.ts
+++ b/apps/server/tests/unit/services/agent-executor.test.ts
@@ -685,6 +685,309 @@ describe('AgentExecutor', () => {
      await expect(executor.execute(options, callbacks)).rejects.toThrow('API rate limit exceeded');
    });

+    it('should throw "Unknown error" when provider stream yields error with empty message', async () => {
+      const executor = new AgentExecutor(
+        mockEventBus,
+        mockFeatureStateManager,
+        mockPlanApprovalService,
+        mockSettingsService
+      );
+
+      const mockProvider = {
+        getName: () => 'mock',
+        executeQuery: vi.fn().mockImplementation(function* () {
+          yield {
+            type: 'error',
+            error: '',
+            session_id: 'sess-123',
+          };
+        }),
+      } as unknown as BaseProvider;
+
+      const options: AgentExecutionOptions = {
+        workDir: '/test',
+        featureId: 'test-feature',
+        prompt: 'Test prompt',
+        projectPath: '/project',
+        abortController: new AbortController(),
+        provider: mockProvider,
+        effectiveBareModel: 'claude-sonnet-4-6',
+        planningMode: 'skip',
+      };
+
+      const callbacks = {
+        waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
+        saveFeatureSummary: vi.fn(),
+        updateFeatureSummary: vi.fn(),
+        buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
+      };
+
+      await expect(executor.execute(options, callbacks)).rejects.toThrow('Unknown error');
+    });
+
+    it('should throw with sanitized error when provider yields ANSI-decorated error', async () => {
+      const executor = new AgentExecutor(
+        mockEventBus,
+        mockFeatureStateManager,
+        mockPlanApprovalService,
+        mockSettingsService
+      );
+
+      const mockProvider = {
+        getName: () => 'mock',
+        executeQuery: vi.fn().mockImplementation(function* () {
+          yield {
+            type: 'error',
+            // ANSI color codes + "Error: " prefix that should be stripped
+            error: '\x1b[31mError: Connection refused\x1b[0m',
+          };
+        }),
+      } as unknown as BaseProvider;
+
+      const options: AgentExecutionOptions = {
+        workDir: '/test',
+        featureId: 'test-feature',
+        prompt: 'Test prompt',
+        projectPath: '/project',
+        abortController: new AbortController(),
+        provider: mockProvider,
+        effectiveBareModel: 'claude-sonnet-4-6',
+        planningMode: 'skip',
+      };
+
+      const callbacks = {
+        waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
+        saveFeatureSummary: vi.fn(),
+        updateFeatureSummary: vi.fn(),
+        buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
+      };
+
+      // Should strip ANSI codes and "Error: " prefix
+      await expect(executor.execute(options, callbacks)).rejects.toThrow('Connection refused');
+    });
+
+    it('should throw when result subtype is error_max_turns', async () => {
+      const executor = new AgentExecutor(
+        mockEventBus,
+        mockFeatureStateManager,
+        mockPlanApprovalService,
+        mockSettingsService
+      );
+
+      const mockProvider = {
+        getName: () => 'mock',
+        executeQuery: vi.fn().mockImplementation(function* () {
+          yield {
+            type: 'assistant',
+            message: {
+              content: [{ type: 'text', text: 'Working on it...' }],
+            },
+          };
+          yield {
+            type: 'result',
+            subtype: 'error_max_turns',
+            session_id: 'sess-456',
+          };
+        }),
+      } as unknown as BaseProvider;
+
+      const options: AgentExecutionOptions = {
+        workDir: '/test',
+        featureId: 'test-feature',
+        prompt: 'Test prompt',
+        projectPath: '/project',
+        abortController: new AbortController(),
+        provider: mockProvider,
+        effectiveBareModel: 'claude-sonnet-4-6',
+        planningMode: 'skip',
+      };
+
+      const callbacks = {
+        waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
+        saveFeatureSummary: vi.fn(),
+        updateFeatureSummary: vi.fn(),
+        buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
+      };
+
+      await expect(executor.execute(options, callbacks)).rejects.toThrow(
+        'Agent execution ended with: error_max_turns'
+      );
+    });
+
+    it('should throw when result subtype is error_during_execution', async () => {
+      const executor = new AgentExecutor(
+        mockEventBus,
+        mockFeatureStateManager,
+        mockPlanApprovalService,
+        mockSettingsService
+      );
+
+      const mockProvider = {
+        getName: () => 'mock',
+        executeQuery: vi.fn().mockImplementation(function* () {
+          yield {
+            type: 'result',
+            subtype: 'error_during_execution',
+            session_id: 'sess-789',
+          };
+        }),
+      } as unknown as BaseProvider;
+
+      const options: AgentExecutionOptions = {
+        workDir: '/test',
+        featureId: 'test-feature',
+        prompt: 'Test prompt',
+        projectPath: '/project',
+        abortController: new AbortController(),
+        provider: mockProvider,
+        effectiveBareModel: 'claude-sonnet-4-6',
+        planningMode: 'skip',
+      };
+
+      const callbacks = {
+        waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
+        saveFeatureSummary: vi.fn(),
+        updateFeatureSummary: vi.fn(),
+        buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
+      };
+
+      await expect(executor.execute(options, callbacks)).rejects.toThrow(
+        'Agent execution ended with: error_during_execution'
+      );
+    });
+
+    it('should throw when result subtype is error_max_structured_output_retries', async () => {
+      const executor = new AgentExecutor(
+        mockEventBus,
+        mockFeatureStateManager,
+        mockPlanApprovalService,
+        mockSettingsService
+      );
+
+      const mockProvider = {
+        getName: () => 'mock',
+        executeQuery: vi.fn().mockImplementation(function* () {
+          yield {
+            type: 'result',
+            subtype: 'error_max_structured_output_retries',
+          };
+        }),
+      } as unknown as BaseProvider;
+
+      const options: AgentExecutionOptions = {
+        workDir: '/test',
+        featureId: 'test-feature',
+        prompt: 'Test prompt',
+        projectPath: '/project',
+        abortController: new AbortController(),
+        provider: mockProvider,
+        effectiveBareModel: 'claude-sonnet-4-6',
+        planningMode: 'skip',
+      };
+
+      const callbacks = {
+        waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
+        saveFeatureSummary: vi.fn(),
+        updateFeatureSummary: vi.fn(),
+        buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
+      };
+
+      await expect(executor.execute(options, callbacks)).rejects.toThrow(
+        'Agent execution ended with: error_max_structured_output_retries'
+      );
+    });
+
+    it('should throw when result subtype is error_max_budget_usd', async () => {
+      const executor = new AgentExecutor(
+        mockEventBus,
+        mockFeatureStateManager,
+        mockPlanApprovalService,
+        mockSettingsService
+      );
+
+      const mockProvider = {
+        getName: () => 'mock',
+        executeQuery: vi.fn().mockImplementation(function* () {
+          yield {
+            type: 'result',
+            subtype: 'error_max_budget_usd',
+            session_id: 'sess-budget',
+          };
+        }),
+      } as unknown as BaseProvider;
+
+      const options: AgentExecutionOptions = {
+        workDir: '/test',
+        featureId: 'test-feature',
+        prompt: 'Test prompt',
+        projectPath: '/project',
+        abortController: new AbortController(),
+        provider: mockProvider,
+        effectiveBareModel: 'claude-sonnet-4-6',
+        planningMode: 'skip',
+      };
+
+      const callbacks = {
+        waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
+        saveFeatureSummary: vi.fn(),
+        updateFeatureSummary: vi.fn(),
+        buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
+      };
+
+      await expect(executor.execute(options, callbacks)).rejects.toThrow(
+        'Agent execution ended with: error_max_budget_usd'
+      );
+    });
+
+    it('should NOT throw when result subtype is success', async () => {
+      const executor = new AgentExecutor(
+        mockEventBus,
+        mockFeatureStateManager,
+        mockPlanApprovalService,
+        mockSettingsService
+      );
+
+      const mockProvider = {
+        getName: () => 'mock',
+        executeQuery: vi.fn().mockImplementation(function* () {
+          yield {
+            type: 'assistant',
+            message: {
+              content: [{ type: 'text', text: 'Done!' }],
+            },
+          };
+          yield {
+            type: 'result',
+            subtype: 'success',
+            session_id: 'sess-ok',
+          };
+        }),
+      } as unknown as BaseProvider;
+
+      const options: AgentExecutionOptions = {
+        workDir: '/test',
+        featureId: 'test-feature',
+        prompt: 'Test prompt',
+        projectPath: '/project',
+        abortController: new AbortController(),
+        provider: mockProvider,
+        effectiveBareModel: 'claude-sonnet-4-6',
+        planningMode: 'skip',
+      };
+
+      const callbacks = {
+        waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
+        saveFeatureSummary: vi.fn(),
+        updateFeatureSummary: vi.fn(),
+        buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
+      };
+
+      // Should resolve without throwing
+      const result = await executor.execute(options, callbacks);
+      expect(result.aborted).toBe(false);
+      expect(result.responseText).toContain('Done!');
+    });
+
    it('should throw error when authentication fails in response', async () => {
      const executor = new AgentExecutor(
        mockEventBus,