Fix agent output summary for pipeline steps (#812)

* Changes from fix/agent-output-summary-for-pipeline-steps

* feat: Optimize pipeline summary extraction and fix regex vulnerability

* fix: Use fallback summary for pipeline steps when extraction fails

* fix: Strip follow-up session scaffold from pipeline step fallback summaries
This commit is contained in:
gsxdsm
2026-02-25 22:13:38 -08:00
committed by GitHub
parent 70c9fd77f6
commit 9747faf1b9
37 changed files with 7164 additions and 163 deletions

View File

@@ -207,12 +207,21 @@ Let me begin by...
describe('detectTaskCompleteMarker', () => {
it('should detect task complete marker and return task ID', () => {
expect(detectTaskCompleteMarker('[TASK_COMPLETE] T001')).toBe('T001');
expect(detectTaskCompleteMarker('[TASK_COMPLETE] T042')).toBe('T042');
expect(detectTaskCompleteMarker('[TASK_COMPLETE] T001')).toEqual({
id: 'T001',
summary: undefined,
});
expect(detectTaskCompleteMarker('[TASK_COMPLETE] T042')).toEqual({
id: 'T042',
summary: undefined,
});
});
it('should handle marker with summary', () => {
expect(detectTaskCompleteMarker('[TASK_COMPLETE] T001: User model created')).toBe('T001');
expect(detectTaskCompleteMarker('[TASK_COMPLETE] T001: User model created')).toEqual({
id: 'T001',
summary: 'User model created',
});
});
it('should return null when no marker present', () => {
@@ -229,7 +238,28 @@ Done with the implementation:
Moving on to...
`;
expect(detectTaskCompleteMarker(accumulated)).toBe('T003');
expect(detectTaskCompleteMarker(accumulated)).toEqual({
id: 'T003',
summary: 'Database setup complete',
});
});
it('should find marker in the middle of a stream with trailing text', () => {
const streamText =
'The implementation is complete! [TASK_COMPLETE] T001: Added user model and tests. Now let me check the next task...';
expect(detectTaskCompleteMarker(streamText)).toEqual({
id: 'T001',
summary: 'Added user model and tests. Now let me check the next task...',
});
});
it('should find marker in the middle of a stream with multiple tasks and return the FIRST match', () => {
const streamText =
'[TASK_COMPLETE] T001: Task one done. Continuing... [TASK_COMPLETE] T002: Task two done. Moving on...';
expect(detectTaskCompleteMarker(streamText)).toEqual({
id: 'T001',
summary: 'Task one done. Continuing...',
});
});
it('should not confuse with TASK_START marker', () => {
@@ -240,6 +270,44 @@ Moving on to...
expect(detectTaskCompleteMarker('[TASK_COMPLETE] TASK1')).toBeNull();
expect(detectTaskCompleteMarker('[TASK_COMPLETE] T1')).toBeNull();
});
it('should allow brackets in summary text', () => {
// Regression test: summaries containing array[index] syntax should not be truncated
expect(
detectTaskCompleteMarker('[TASK_COMPLETE] T001: Supports array[index] access syntax')
).toEqual({
id: 'T001',
summary: 'Supports array[index] access syntax',
});
});
it('should handle summary with multiple brackets', () => {
expect(
detectTaskCompleteMarker('[TASK_COMPLETE] T042: Fixed bug in data[0].items[key] mapping')
).toEqual({
id: 'T042',
summary: 'Fixed bug in data[0].items[key] mapping',
});
});
it('should stop at newline in summary', () => {
const result = detectTaskCompleteMarker(
'[TASK_COMPLETE] T001: First line\nSecond line without marker'
);
expect(result).toEqual({
id: 'T001',
summary: 'First line',
});
});
it('should stop at next TASK_START marker', () => {
expect(
detectTaskCompleteMarker('[TASK_COMPLETE] T001: Summary text[TASK_START] T002')
).toEqual({
id: 'T001',
summary: 'Summary text',
});
});
});
describe('detectPhaseCompleteMarker', () => {
@@ -637,5 +705,85 @@ Second paragraph of summary.
expect(extractSummary(text)).toBe('First paragraph of summary.');
});
});
describe('pipeline accumulated output (multiple <summary> tags)', () => {
it('should return only the LAST summary tag from accumulated pipeline output', () => {
// Documents WHY the UI needs server-side feature.summary:
// When pipeline steps accumulate raw output in agent-output.md, each step
// writes its own <summary> tag. extractSummary takes only the LAST match,
// losing all previous steps' summaries.
const accumulatedOutput = `
## Step 1: Code Review
Some review output...
<summary>
## Code Review Summary
- Found 3 issues
- Suggested 2 improvements
</summary>
---
## Follow-up Session
## Step 2: Testing
Running tests...
<summary>
## Testing Summary
- All 15 tests pass
- Coverage at 92%
</summary>
`;
const result = extractSummary(accumulatedOutput);
// Only the LAST summary tag is returned - the Code Review summary is lost
expect(result).toBe('## Testing Summary\n- All 15 tests pass\n- Coverage at 92%');
expect(result).not.toContain('Code Review');
});
it('should return only the LAST summary from three pipeline steps', () => {
const accumulatedOutput = `
<summary>Step 1: Implementation complete</summary>
---
## Follow-up Session
<summary>Step 2: Code review findings</summary>
---
## Follow-up Session
<summary>Step 3: All tests passing</summary>
`;
const result = extractSummary(accumulatedOutput);
expect(result).toBe('Step 3: All tests passing');
expect(result).not.toContain('Step 1');
expect(result).not.toContain('Step 2');
});
it('should handle accumulated output where only one step has a summary tag', () => {
const accumulatedOutput = `
## Step 1: Implementation
Some raw output without summary tags...
---
## Follow-up Session
## Step 2: Testing
<summary>
## Test Results
- All tests pass
</summary>
`;
const result = extractSummary(accumulatedOutput);
expect(result).toBe('## Test Results\n- All tests pass');
});
});
});
});