Fix agent output summary for pipeline steps (#812)

* Changes from fix/agent-output-summary-for-pipeline-steps * feat: Optimize pipeline summary extraction and fix regex vulnerability * fix: Use fallback summary for pipeline steps when extraction fails * fix: Strip follow-up session scaffold from pipeline step fallback summaries
2026-03-19 22:53:08 +00:00 · 2026-02-25 22:13:38 -08:00
parent 70c9fd77f6
commit 9747faf1b9
37 changed files with 7164 additions and 163 deletions
--- a/apps/server/tests/unit/services/spec-parser.test.ts
+++ b/apps/server/tests/unit/services/spec-parser.test.ts
@@ -207,12 +207,21 @@ Let me begin by...

  describe('detectTaskCompleteMarker', () => {
    it('should detect task complete marker and return task ID', () => {
-      expect(detectTaskCompleteMarker('[TASK_COMPLETE] T001')).toBe('T001');
-      expect(detectTaskCompleteMarker('[TASK_COMPLETE] T042')).toBe('T042');
+      expect(detectTaskCompleteMarker('[TASK_COMPLETE] T001')).toEqual({
+        id: 'T001',
+        summary: undefined,
+      });
+      expect(detectTaskCompleteMarker('[TASK_COMPLETE] T042')).toEqual({
+        id: 'T042',
+        summary: undefined,
+      });
    });

    it('should handle marker with summary', () => {
-      expect(detectTaskCompleteMarker('[TASK_COMPLETE] T001: User model created')).toBe('T001');
+      expect(detectTaskCompleteMarker('[TASK_COMPLETE] T001: User model created')).toEqual({
+        id: 'T001',
+        summary: 'User model created',
+      });
    });

    it('should return null when no marker present', () => {
@@ -229,7 +238,28 @@ Done with the implementation:

 Moving on to...
 `;
-      expect(detectTaskCompleteMarker(accumulated)).toBe('T003');
+      expect(detectTaskCompleteMarker(accumulated)).toEqual({
+        id: 'T003',
+        summary: 'Database setup complete',
+      });
+    });
+
+    it('should find marker in the middle of a stream with trailing text', () => {
+      const streamText =
+        'The implementation is complete! [TASK_COMPLETE] T001: Added user model and tests. Now let me check the next task...';
+      expect(detectTaskCompleteMarker(streamText)).toEqual({
+        id: 'T001',
+        summary: 'Added user model and tests. Now let me check the next task...',
+      });
+    });
+
+    it('should find marker in the middle of a stream with multiple tasks and return the FIRST match', () => {
+      const streamText =
+        '[TASK_COMPLETE] T001: Task one done. Continuing... [TASK_COMPLETE] T002: Task two done. Moving on...';
+      expect(detectTaskCompleteMarker(streamText)).toEqual({
+        id: 'T001',
+        summary: 'Task one done. Continuing...',
+      });
    });

    it('should not confuse with TASK_START marker', () => {
@@ -240,6 +270,44 @@ Moving on to...
      expect(detectTaskCompleteMarker('[TASK_COMPLETE] TASK1')).toBeNull();
      expect(detectTaskCompleteMarker('[TASK_COMPLETE] T1')).toBeNull();
    });
+
+    it('should allow brackets in summary text', () => {
+      // Regression test: summaries containing array[index] syntax should not be truncated
+      expect(
+        detectTaskCompleteMarker('[TASK_COMPLETE] T001: Supports array[index] access syntax')
+      ).toEqual({
+        id: 'T001',
+        summary: 'Supports array[index] access syntax',
+      });
+    });
+
+    it('should handle summary with multiple brackets', () => {
+      expect(
+        detectTaskCompleteMarker('[TASK_COMPLETE] T042: Fixed bug in data[0].items[key] mapping')
+      ).toEqual({
+        id: 'T042',
+        summary: 'Fixed bug in data[0].items[key] mapping',
+      });
+    });
+
+    it('should stop at newline in summary', () => {
+      const result = detectTaskCompleteMarker(
+        '[TASK_COMPLETE] T001: First line\nSecond line without marker'
+      );
+      expect(result).toEqual({
+        id: 'T001',
+        summary: 'First line',
+      });
+    });
+
+    it('should stop at next TASK_START marker', () => {
+      expect(
+        detectTaskCompleteMarker('[TASK_COMPLETE] T001: Summary text[TASK_START] T002')
+      ).toEqual({
+        id: 'T001',
+        summary: 'Summary text',
+      });
+    });
  });

  describe('detectPhaseCompleteMarker', () => {
@@ -637,5 +705,85 @@ Second paragraph of summary.
        expect(extractSummary(text)).toBe('First paragraph of summary.');
      });
    });
+
+    describe('pipeline accumulated output (multiple <summary> tags)', () => {
+      it('should return only the LAST summary tag from accumulated pipeline output', () => {
+        // Documents WHY the UI needs server-side feature.summary:
+        // When pipeline steps accumulate raw output in agent-output.md, each step
+        // writes its own <summary> tag. extractSummary takes only the LAST match,
+        // losing all previous steps' summaries.
+        const accumulatedOutput = `
+## Step 1: Code Review
+
+Some review output...
+
+<summary>
+## Code Review Summary
+- Found 3 issues
+- Suggested 2 improvements
+</summary>
+
+---
+
+## Follow-up Session
+
+## Step 2: Testing
+
+Running tests...
+
+<summary>
+## Testing Summary
+- All 15 tests pass
+- Coverage at 92%
+</summary>
+`;
+        const result = extractSummary(accumulatedOutput);
+        // Only the LAST summary tag is returned - the Code Review summary is lost
+        expect(result).toBe('## Testing Summary\n- All 15 tests pass\n- Coverage at 92%');
+        expect(result).not.toContain('Code Review');
+      });
+
+      it('should return only the LAST summary from three pipeline steps', () => {
+        const accumulatedOutput = `
+<summary>Step 1: Implementation complete</summary>
+
+---
+
+## Follow-up Session
+
+<summary>Step 2: Code review findings</summary>
+
+---
+
+## Follow-up Session
+
+<summary>Step 3: All tests passing</summary>
+`;
+        const result = extractSummary(accumulatedOutput);
+        expect(result).toBe('Step 3: All tests passing');
+        expect(result).not.toContain('Step 1');
+        expect(result).not.toContain('Step 2');
+      });
+
+      it('should handle accumulated output where only one step has a summary tag', () => {
+        const accumulatedOutput = `
+## Step 1: Implementation
+Some raw output without summary tags...
+
+---
+
+## Follow-up Session
+
+## Step 2: Testing
+
+<summary>
+## Test Results
+- All tests pass
+</summary>
+`;
+        const result = extractSummary(accumulatedOutput);
+        expect(result).toBe('## Test Results\n- All tests pass');
+      });
+    });
  });
 });