mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-03-17 10:03:08 +00:00
* Changes from fix/agent-output-summary-for-pipeline-steps * feat: Optimize pipeline summary extraction and fix regex vulnerability * fix: Use fallback summary for pipeline steps when extraction fails * fix: Strip follow-up session scaffold from pipeline step fallback summaries
454 lines
12 KiB
TypeScript
454 lines
12 KiB
TypeScript
/**
|
|
* Unit tests for the UI's log-parser extractSummary() function.
|
|
*
|
|
* These tests document the behavior of extractSummary() which is used as a
|
|
* CLIENT-SIDE FALLBACK when feature.summary (server-accumulated) is not available.
|
|
*
|
|
* IMPORTANT: extractSummary() returns only the LAST <summary> tag from raw output.
|
|
* For pipeline features with multiple steps, the server-side FeatureStateManager
|
|
* accumulates all step summaries into feature.summary, which the UI prefers.
|
|
*
|
|
* The tests below verify that extractSummary() correctly:
|
|
* - Returns the LAST summary when multiple exist (mimicking pipeline accumulation)
|
|
* - Handles various summary formats (<summary> tags, markdown headers)
|
|
* - Returns null when no summary is found
|
|
* - Handles edge cases like empty input and malformed tags
|
|
*/
|
|
|
|
import { describe, it, expect } from 'vitest';
|
|
|
|
// Recreate the extractSummary logic from apps/ui/src/lib/log-parser.ts
|
|
// We can't import directly because it's a UI file, so we mirror the logic here
|
|
|
|
/**
|
|
* Cleans up fragmented streaming text by removing spurious newlines
|
|
*/
|
|
function cleanFragmentedText(content: string): string {
|
|
let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
|
|
cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
|
|
cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
|
|
return cleaned;
|
|
}
|
|
|
|
/**
|
|
* Extracts summary content from raw log output
|
|
* Returns the LAST summary text if found, or null if no summary exists
|
|
*/
|
|
function extractSummary(rawOutput: string): string | null {
|
|
if (!rawOutput || !rawOutput.trim()) {
|
|
return null;
|
|
}
|
|
|
|
const cleanedOutput = cleanFragmentedText(rawOutput);
|
|
|
|
const regexesToTry: Array<{
|
|
regex: RegExp;
|
|
processor: (m: RegExpMatchArray) => string;
|
|
}> = [
|
|
{ regex: /<summary>([\s\S]*?)<\/summary>/gi, processor: (m) => m[1] },
|
|
{ regex: /^##\s+Summary[^\n]*\n([\s\S]*?)(?=\n##\s+[^#]|\n🔧|$)/gm, processor: (m) => m[1] },
|
|
{
|
|
regex: /^##\s+(Feature|Changes|Implementation)[^\n]*\n([\s\S]*?)(?=\n##\s+[^#]|\n🔧|$)/gm,
|
|
processor: (m) => `## ${m[1]}\n${m[2]}`,
|
|
},
|
|
{
|
|
regex: /(^|\n)(All tasks completed[\s\S]*?)(?=\n🔧|\n📋|\n⚡|\n❌|$)/g,
|
|
processor: (m) => m[2],
|
|
},
|
|
{
|
|
regex:
|
|
/(^|\n)((I've|I have) (successfully |now )?(completed|finished|implemented)[\s\S]*?)(?=\n🔧|\n📋|\n⚡|\n❌|$)/g,
|
|
processor: (m) => m[2],
|
|
},
|
|
];
|
|
|
|
for (const { regex, processor } of regexesToTry) {
|
|
const matches = [...cleanedOutput.matchAll(regex)];
|
|
if (matches.length > 0) {
|
|
const lastMatch = matches[matches.length - 1];
|
|
return cleanFragmentedText(processor(lastMatch)).trim();
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
describe('log-parser extractSummary (UI fallback)', () => {
|
|
describe('basic summary extraction', () => {
|
|
it('should extract summary from <summary> tags', () => {
|
|
const output = `
|
|
Some agent output...
|
|
|
|
<summary>
|
|
## Changes Made
|
|
- Fixed the bug in parser.ts
|
|
- Added error handling
|
|
</summary>
|
|
|
|
More output...
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toBe('## Changes Made\n- Fixed the bug in parser.ts\n- Added error handling');
|
|
});
|
|
|
|
it('should prefer <summary> tags over markdown headers', () => {
|
|
const output = `
|
|
## Summary
|
|
|
|
Markdown summary here.
|
|
|
|
<summary>
|
|
XML summary here.
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toBe('XML summary here.');
|
|
});
|
|
});
|
|
|
|
describe('multiple summaries (pipeline accumulation scenario)', () => {
|
|
it('should return ONLY the LAST summary tag when multiple exist', () => {
|
|
// This is the key behavior for pipeline features:
|
|
// extractSummary returns only the LAST, which is why server-side
|
|
// accumulation is needed for multi-step pipelines
|
|
const output = `
|
|
## Step 1: Code Review
|
|
|
|
<summary>
|
|
- Found 3 issues
|
|
- Approved with changes
|
|
</summary>
|
|
|
|
---
|
|
|
|
## Step 2: Testing
|
|
|
|
<summary>
|
|
- All tests pass
|
|
- Coverage 95%
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toBe('- All tests pass\n- Coverage 95%');
|
|
expect(result).not.toContain('Code Review');
|
|
expect(result).not.toContain('Found 3 issues');
|
|
});
|
|
|
|
it('should return ONLY the LAST summary from three pipeline steps', () => {
|
|
const output = `
|
|
<summary>Step 1 complete</summary>
|
|
|
|
---
|
|
|
|
<summary>Step 2 complete</summary>
|
|
|
|
---
|
|
|
|
<summary>Step 3 complete - all done!</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toBe('Step 3 complete - all done!');
|
|
expect(result).not.toContain('Step 1');
|
|
expect(result).not.toContain('Step 2');
|
|
});
|
|
|
|
it('should handle mixed summary formats across pipeline steps', () => {
|
|
const output = `
|
|
## Step 1
|
|
|
|
<summary>
|
|
Implementation done
|
|
</summary>
|
|
|
|
---
|
|
|
|
## Step 2
|
|
|
|
## Summary
|
|
Review complete
|
|
|
|
---
|
|
|
|
## Step 3
|
|
|
|
<summary>
|
|
All tests passing
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
// The <summary> tag format takes priority, and returns the LAST match
|
|
expect(result).toBe('All tests passing');
|
|
});
|
|
});
|
|
|
|
describe('priority order of summary patterns', () => {
|
|
it('should try patterns in priority order: <summary> first, then markdown headers', () => {
|
|
// When both <summary> tags and markdown headers exist,
|
|
// <summary> tags should take priority
|
|
const output = `
|
|
## Summary
|
|
|
|
This markdown summary should be ignored.
|
|
|
|
<summary>
|
|
This XML summary should be used.
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toBe('This XML summary should be used.');
|
|
expect(result).not.toContain('ignored');
|
|
});
|
|
|
|
it('should fall back to Feature/Changes/Implementation headers when no <summary> tag', () => {
|
|
// Note: The regex for these headers requires content before the header
|
|
// (^ at start or preceded by newline). Adding some content before.
|
|
const output = `
|
|
Agent output here...
|
|
|
|
## Feature
|
|
|
|
New authentication system with OAuth support.
|
|
|
|
## Next
|
|
`;
|
|
const result = extractSummary(output);
|
|
// Should find the Feature header and include it in result
|
|
// Note: Due to regex behavior, it captures content until next ##
|
|
expect(result).toContain('## Feature');
|
|
});
|
|
|
|
it('should fall back to completion phrases when no structured summary found', () => {
|
|
const output = `
|
|
Working on the feature...
|
|
Making progress...
|
|
|
|
All tasks completed successfully. The feature is ready.
|
|
|
|
🔧 Tool: Bash
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toContain('All tasks completed');
|
|
});
|
|
});
|
|
|
|
describe('edge cases', () => {
|
|
it('should return null for empty string', () => {
|
|
expect(extractSummary('')).toBeNull();
|
|
});
|
|
|
|
it('should return null for whitespace-only string', () => {
|
|
expect(extractSummary(' \n\n ')).toBeNull();
|
|
});
|
|
|
|
it('should return null when no summary pattern found', () => {
|
|
expect(extractSummary('Random agent output without any summary patterns')).toBeNull();
|
|
});
|
|
|
|
it('should handle malformed <summary> tags gracefully', () => {
|
|
const output = `
|
|
<summary>
|
|
This summary is never closed...
|
|
`;
|
|
// Without closing tag, the regex won't match
|
|
expect(extractSummary(output)).toBeNull();
|
|
});
|
|
|
|
it('should handle empty <summary> tags', () => {
|
|
const output = `
|
|
<summary></summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toBe(''); // Empty string is valid
|
|
});
|
|
|
|
it('should handle <summary> tags with only whitespace', () => {
|
|
const output = `
|
|
<summary>
|
|
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toBe(''); // Trimmed to empty string
|
|
});
|
|
|
|
it('should handle summary with markdown code blocks', () => {
|
|
const output = `
|
|
<summary>
|
|
## Changes
|
|
|
|
\`\`\`typescript
|
|
const x = 1;
|
|
\`\`\`
|
|
|
|
Done!
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toContain('```typescript');
|
|
expect(result).toContain('const x = 1;');
|
|
});
|
|
|
|
it('should handle summary with special characters', () => {
|
|
const output = `
|
|
<summary>
|
|
Fixed bug in parser.ts: "quotes" and 'apostrophes'
|
|
Special chars: <>&$@#%^*
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toContain('"quotes"');
|
|
expect(result).toContain('<>&$@#%^*');
|
|
});
|
|
});
|
|
|
|
describe('fragmented streaming text handling', () => {
|
|
it('should handle fragmented <summary> tags from streaming', () => {
|
|
// Sometimes streaming providers split text like "<sum\n\nmary>"
|
|
const output = `
|
|
<sum
|
|
|
|
mary>
|
|
Fixed the issue
|
|
</sum
|
|
|
|
mary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
// The cleanFragmentedText function should normalize this
|
|
expect(result).toBe('Fixed the issue');
|
|
});
|
|
|
|
it('should handle fragmented text within summary content', () => {
|
|
const output = `
|
|
<summary>
|
|
Fixed the bug in par
|
|
ser.ts
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
// cleanFragmentedText should join "par\n\nser" into "parser"
|
|
expect(result).toBe('Fixed the bug in parser.ts');
|
|
});
|
|
});
|
|
|
|
describe('completion phrase detection', () => {
|
|
it('should extract "All tasks completed" summaries', () => {
|
|
const output = `
|
|
Some output...
|
|
|
|
All tasks completed successfully. The feature is ready for review.
|
|
|
|
🔧 Tool: Bash
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toContain('All tasks completed');
|
|
});
|
|
|
|
it("should extract I've completed summaries", () => {
|
|
const output = `
|
|
Working on feature...
|
|
|
|
I've successfully implemented the feature with all requirements met.
|
|
|
|
🔧 Tool: Read
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toContain("I've successfully implemented");
|
|
});
|
|
|
|
it('should extract "I have finished" summaries', () => {
|
|
const output = `
|
|
Implementation phase...
|
|
|
|
I have finished the implementation.
|
|
|
|
📋 Planning
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toContain('I have finished');
|
|
});
|
|
});
|
|
|
|
describe('real-world pipeline scenarios', () => {
|
|
it('should handle typical multi-step pipeline output (returns last only)', () => {
|
|
// This test documents WHY server-side accumulation is essential:
|
|
// extractSummary only returns the last step's summary
|
|
const output = `
|
|
📋 Planning Mode: Full
|
|
|
|
🔧 Tool: Read
|
|
Input: {"file_path": "src/parser.ts"}
|
|
|
|
<summary>
|
|
## Code Review
|
|
- Analyzed parser.ts
|
|
- Found potential improvements
|
|
</summary>
|
|
|
|
---
|
|
|
|
## Follow-up Session
|
|
|
|
🔧 Tool: Edit
|
|
Input: {"file_path": "src/parser.ts"}
|
|
|
|
<summary>
|
|
## Implementation
|
|
- Applied suggested improvements
|
|
- Updated tests
|
|
</summary>
|
|
|
|
---
|
|
|
|
## Follow-up Session
|
|
|
|
🔧 Tool: Bash
|
|
Input: {"command": "npm test"}
|
|
|
|
<summary>
|
|
## Testing
|
|
- All 42 tests pass
|
|
- No regressions detected
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
// Only the LAST summary is returned
|
|
expect(result).toBe('## Testing\n- All 42 tests pass\n- No regressions detected');
|
|
// Earlier summaries are lost
|
|
expect(result).not.toContain('Code Review');
|
|
expect(result).not.toContain('Implementation');
|
|
});
|
|
|
|
it('should handle single-step non-pipeline output', () => {
|
|
// For non-pipeline features, extractSummary works correctly
|
|
const output = `
|
|
Working on feature...
|
|
|
|
<summary>
|
|
## Implementation Complete
|
|
- Created new component
|
|
- Added unit tests
|
|
- Updated documentation
|
|
</summary>
|
|
`;
|
|
const result = extractSummary(output);
|
|
expect(result).toContain('Implementation Complete');
|
|
expect(result).toContain('Created new component');
|
|
});
|
|
});
|
|
});
|
|
|
|
/**
|
|
* These tests verify the UI fallback behavior for summary extraction.
|
|
*
|
|
* KEY INSIGHT: The extractSummary() function returns only the LAST summary,
|
|
* which is why the server-side FeatureStateManager.saveFeatureSummary() method
|
|
* accumulates all step summaries into feature.summary.
|
|
*
|
|
* The UI's AgentOutputModal component uses this priority:
|
|
* 1. feature.summary (server-accumulated, contains all steps)
|
|
* 2. extractSummary(output) (client-side fallback, last summary only)
|
|
*
|
|
* For pipeline features, this ensures all step summaries are displayed.
|
|
*/
|