Files
automaker/apps/server/tests/unit/ui/log-parser-summary.test.ts
gsxdsm 9747faf1b9 Fix agent output summary for pipeline steps (#812)
* Changes from fix/agent-output-summary-for-pipeline-steps

* feat: Optimize pipeline summary extraction and fix regex vulnerability

* fix: Use fallback summary for pipeline steps when extraction fails

* fix: Strip follow-up session scaffold from pipeline step fallback summaries
2026-02-25 22:13:38 -08:00

454 lines
12 KiB
TypeScript

/**
* Unit tests for the UI's log-parser extractSummary() function.
*
* These tests document the behavior of extractSummary() which is used as a
* CLIENT-SIDE FALLBACK when feature.summary (server-accumulated) is not available.
*
* IMPORTANT: extractSummary() returns only the LAST <summary> tag from raw output.
* For pipeline features with multiple steps, the server-side FeatureStateManager
* accumulates all step summaries into feature.summary, which the UI prefers.
*
* The tests below verify that extractSummary() correctly:
* - Returns the LAST summary when multiple exist (mimicking pipeline accumulation)
* - Handles various summary formats (<summary> tags, markdown headers)
* - Returns null when no summary is found
* - Handles edge cases like empty input and malformed tags
*/
import { describe, it, expect } from 'vitest';
// Recreate the extractSummary logic from apps/ui/src/lib/log-parser.ts
// We can't import directly because it's a UI file, so we mirror the logic here
/**
* Cleans up fragmented streaming text by removing spurious newlines
*/
function cleanFragmentedText(content: string): string {
let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
return cleaned;
}
/**
* Extracts summary content from raw log output
* Returns the LAST summary text if found, or null if no summary exists
*/
function extractSummary(rawOutput: string): string | null {
if (!rawOutput || !rawOutput.trim()) {
return null;
}
const cleanedOutput = cleanFragmentedText(rawOutput);
const regexesToTry: Array<{
regex: RegExp;
processor: (m: RegExpMatchArray) => string;
}> = [
{ regex: /<summary>([\s\S]*?)<\/summary>/gi, processor: (m) => m[1] },
{ regex: /^##\s+Summary[^\n]*\n([\s\S]*?)(?=\n##\s+[^#]|\n🔧|$)/gm, processor: (m) => m[1] },
{
regex: /^##\s+(Feature|Changes|Implementation)[^\n]*\n([\s\S]*?)(?=\n##\s+[^#]|\n🔧|$)/gm,
processor: (m) => `## ${m[1]}\n${m[2]}`,
},
{
regex: /(^|\n)(All tasks completed[\s\S]*?)(?=\n🔧|\n📋|\n⚡|\n❌|$)/g,
processor: (m) => m[2],
},
{
regex:
/(^|\n)((I've|I have) (successfully |now )?(completed|finished|implemented)[\s\S]*?)(?=\n🔧|\n📋|\n⚡|\n❌|$)/g,
processor: (m) => m[2],
},
];
for (const { regex, processor } of regexesToTry) {
const matches = [...cleanedOutput.matchAll(regex)];
if (matches.length > 0) {
const lastMatch = matches[matches.length - 1];
return cleanFragmentedText(processor(lastMatch)).trim();
}
}
return null;
}
describe('log-parser extractSummary (UI fallback)', () => {
describe('basic summary extraction', () => {
it('should extract summary from <summary> tags', () => {
const output = `
Some agent output...
<summary>
## Changes Made
- Fixed the bug in parser.ts
- Added error handling
</summary>
More output...
`;
const result = extractSummary(output);
expect(result).toBe('## Changes Made\n- Fixed the bug in parser.ts\n- Added error handling');
});
it('should prefer <summary> tags over markdown headers', () => {
const output = `
## Summary
Markdown summary here.
<summary>
XML summary here.
</summary>
`;
const result = extractSummary(output);
expect(result).toBe('XML summary here.');
});
});
describe('multiple summaries (pipeline accumulation scenario)', () => {
it('should return ONLY the LAST summary tag when multiple exist', () => {
// This is the key behavior for pipeline features:
// extractSummary returns only the LAST, which is why server-side
// accumulation is needed for multi-step pipelines
const output = `
## Step 1: Code Review
<summary>
- Found 3 issues
- Approved with changes
</summary>
---
## Step 2: Testing
<summary>
- All tests pass
- Coverage 95%
</summary>
`;
const result = extractSummary(output);
expect(result).toBe('- All tests pass\n- Coverage 95%');
expect(result).not.toContain('Code Review');
expect(result).not.toContain('Found 3 issues');
});
it('should return ONLY the LAST summary from three pipeline steps', () => {
const output = `
<summary>Step 1 complete</summary>
---
<summary>Step 2 complete</summary>
---
<summary>Step 3 complete - all done!</summary>
`;
const result = extractSummary(output);
expect(result).toBe('Step 3 complete - all done!');
expect(result).not.toContain('Step 1');
expect(result).not.toContain('Step 2');
});
it('should handle mixed summary formats across pipeline steps', () => {
const output = `
## Step 1
<summary>
Implementation done
</summary>
---
## Step 2
## Summary
Review complete
---
## Step 3
<summary>
All tests passing
</summary>
`;
const result = extractSummary(output);
// The <summary> tag format takes priority, and returns the LAST match
expect(result).toBe('All tests passing');
});
});
describe('priority order of summary patterns', () => {
it('should try patterns in priority order: <summary> first, then markdown headers', () => {
// When both <summary> tags and markdown headers exist,
// <summary> tags should take priority
const output = `
## Summary
This markdown summary should be ignored.
<summary>
This XML summary should be used.
</summary>
`;
const result = extractSummary(output);
expect(result).toBe('This XML summary should be used.');
expect(result).not.toContain('ignored');
});
it('should fall back to Feature/Changes/Implementation headers when no <summary> tag', () => {
// Note: The regex for these headers requires content before the header
// (^ at start or preceded by newline). Adding some content before.
const output = `
Agent output here...
## Feature
New authentication system with OAuth support.
## Next
`;
const result = extractSummary(output);
// Should find the Feature header and include it in result
// Note: Due to regex behavior, it captures content until next ##
expect(result).toContain('## Feature');
});
it('should fall back to completion phrases when no structured summary found', () => {
const output = `
Working on the feature...
Making progress...
All tasks completed successfully. The feature is ready.
🔧 Tool: Bash
`;
const result = extractSummary(output);
expect(result).toContain('All tasks completed');
});
});
describe('edge cases', () => {
it('should return null for empty string', () => {
expect(extractSummary('')).toBeNull();
});
it('should return null for whitespace-only string', () => {
expect(extractSummary(' \n\n ')).toBeNull();
});
it('should return null when no summary pattern found', () => {
expect(extractSummary('Random agent output without any summary patterns')).toBeNull();
});
it('should handle malformed <summary> tags gracefully', () => {
const output = `
<summary>
This summary is never closed...
`;
// Without closing tag, the regex won't match
expect(extractSummary(output)).toBeNull();
});
it('should handle empty <summary> tags', () => {
const output = `
<summary></summary>
`;
const result = extractSummary(output);
expect(result).toBe(''); // Empty string is valid
});
it('should handle <summary> tags with only whitespace', () => {
const output = `
<summary>
</summary>
`;
const result = extractSummary(output);
expect(result).toBe(''); // Trimmed to empty string
});
it('should handle summary with markdown code blocks', () => {
const output = `
<summary>
## Changes
\`\`\`typescript
const x = 1;
\`\`\`
Done!
</summary>
`;
const result = extractSummary(output);
expect(result).toContain('```typescript');
expect(result).toContain('const x = 1;');
});
it('should handle summary with special characters', () => {
const output = `
<summary>
Fixed bug in parser.ts: "quotes" and 'apostrophes'
Special chars: <>&$@#%^*
</summary>
`;
const result = extractSummary(output);
expect(result).toContain('"quotes"');
expect(result).toContain('<>&$@#%^*');
});
});
describe('fragmented streaming text handling', () => {
it('should handle fragmented <summary> tags from streaming', () => {
// Sometimes streaming providers split text like "<sum\n\nmary>"
const output = `
<sum
mary>
Fixed the issue
</sum
mary>
`;
const result = extractSummary(output);
// The cleanFragmentedText function should normalize this
expect(result).toBe('Fixed the issue');
});
it('should handle fragmented text within summary content', () => {
const output = `
<summary>
Fixed the bug in par
ser.ts
</summary>
`;
const result = extractSummary(output);
// cleanFragmentedText should join "par\n\nser" into "parser"
expect(result).toBe('Fixed the bug in parser.ts');
});
});
describe('completion phrase detection', () => {
it('should extract "All tasks completed" summaries', () => {
const output = `
Some output...
All tasks completed successfully. The feature is ready for review.
🔧 Tool: Bash
`;
const result = extractSummary(output);
expect(result).toContain('All tasks completed');
});
it("should extract I've completed summaries", () => {
const output = `
Working on feature...
I've successfully implemented the feature with all requirements met.
🔧 Tool: Read
`;
const result = extractSummary(output);
expect(result).toContain("I've successfully implemented");
});
it('should extract "I have finished" summaries', () => {
const output = `
Implementation phase...
I have finished the implementation.
📋 Planning
`;
const result = extractSummary(output);
expect(result).toContain('I have finished');
});
});
describe('real-world pipeline scenarios', () => {
it('should handle typical multi-step pipeline output (returns last only)', () => {
// This test documents WHY server-side accumulation is essential:
// extractSummary only returns the last step's summary
const output = `
📋 Planning Mode: Full
🔧 Tool: Read
Input: {"file_path": "src/parser.ts"}
<summary>
## Code Review
- Analyzed parser.ts
- Found potential improvements
</summary>
---
## Follow-up Session
🔧 Tool: Edit
Input: {"file_path": "src/parser.ts"}
<summary>
## Implementation
- Applied suggested improvements
- Updated tests
</summary>
---
## Follow-up Session
🔧 Tool: Bash
Input: {"command": "npm test"}
<summary>
## Testing
- All 42 tests pass
- No regressions detected
</summary>
`;
const result = extractSummary(output);
// Only the LAST summary is returned
expect(result).toBe('## Testing\n- All 42 tests pass\n- No regressions detected');
// Earlier summaries are lost
expect(result).not.toContain('Code Review');
expect(result).not.toContain('Implementation');
});
it('should handle single-step non-pipeline output', () => {
// For non-pipeline features, extractSummary works correctly
const output = `
Working on feature...
<summary>
## Implementation Complete
- Created new component
- Added unit tests
- Updated documentation
</summary>
`;
const result = extractSummary(output);
expect(result).toContain('Implementation Complete');
expect(result).toContain('Created new component');
});
});
});
/**
* These tests verify the UI fallback behavior for summary extraction.
*
* KEY INSIGHT: The extractSummary() function returns only the LAST summary,
* which is why the server-side FeatureStateManager.saveFeatureSummary() method
* accumulates all step summaries into feature.summary.
*
* The UI's AgentOutputModal component uses this priority:
* 1. feature.summary (server-accumulated, contains all steps)
* 2. extractSummary(output) (client-side fallback, last summary only)
*
* For pipeline features, this ensures all step summaries are displayed.
*/