feat: Add raw output logging and endpoint for debugging

- Introduced a new environment variable `AUTOMAKER_DEBUG_RAW_OUTPUT` to enable raw output logging for agent streams.
- Added a new endpoint `/raw-output` to retrieve raw JSONL output for debugging purposes.
- Implemented functionality in `AutoModeService` to log raw output events and save them to `raw-output.jsonl`.
- Enhanced `FeatureLoader` to provide access to raw output files.
- Updated UI components to clean fragmented streaming text for better log parsing.
This commit is contained in:
Shirone
2025-12-28 02:34:10 +01:00
parent 52b1dc98b8
commit e404262cb0
7 changed files with 250 additions and 21 deletions

View File

@@ -48,3 +48,15 @@ TERMINAL_ENABLED=true
TERMINAL_PASSWORD= TERMINAL_PASSWORD=
ENABLE_REQUEST_LOGGING=false ENABLE_REQUEST_LOGGING=false
# ============================================
# OPTIONAL - Debugging
# ============================================
# Enable raw output logging for agent streams (default: false)
# When enabled, saves unprocessed stream events to raw-output.jsonl
# in each feature's directory (.automaker/features/{id}/raw-output.jsonl)
# Useful for debugging provider streaming issues, improving log parsing,
# or analyzing how different providers (Claude, Cursor) stream responses
# Note: This adds disk I/O overhead, only enable when debugging
AUTOMAKER_DEBUG_RAW_OUTPUT=false

View File

@@ -10,7 +10,7 @@ import { createGetHandler } from './routes/get.js';
import { createCreateHandler } from './routes/create.js'; import { createCreateHandler } from './routes/create.js';
import { createUpdateHandler } from './routes/update.js'; import { createUpdateHandler } from './routes/update.js';
import { createDeleteHandler } from './routes/delete.js'; import { createDeleteHandler } from './routes/delete.js';
import { createAgentOutputHandler } from './routes/agent-output.js'; import { createAgentOutputHandler, createRawOutputHandler } from './routes/agent-output.js';
import { createGenerateTitleHandler } from './routes/generate-title.js'; import { createGenerateTitleHandler } from './routes/generate-title.js';
export function createFeaturesRoutes(featureLoader: FeatureLoader): Router { export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
@@ -22,6 +22,7 @@ export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
router.post('/update', validatePathParams('projectPath'), createUpdateHandler(featureLoader)); router.post('/update', validatePathParams('projectPath'), createUpdateHandler(featureLoader));
router.post('/delete', validatePathParams('projectPath'), createDeleteHandler(featureLoader)); router.post('/delete', validatePathParams('projectPath'), createDeleteHandler(featureLoader));
router.post('/agent-output', createAgentOutputHandler(featureLoader)); router.post('/agent-output', createAgentOutputHandler(featureLoader));
router.post('/raw-output', createRawOutputHandler(featureLoader));
router.post('/generate-title', createGenerateTitleHandler()); router.post('/generate-title', createGenerateTitleHandler());
return router; return router;

View File

@@ -1,5 +1,6 @@
/** /**
* POST /agent-output endpoint - Get agent output for a feature * POST /agent-output endpoint - Get agent output for a feature
* POST /raw-output endpoint - Get raw JSONL output for debugging
*/ */
import type { Request, Response } from 'express'; import type { Request, Response } from 'express';
@@ -30,3 +31,31 @@ export function createAgentOutputHandler(featureLoader: FeatureLoader) {
} }
}; };
} }
/**
* Handler for getting raw JSONL output for debugging
*/
export function createRawOutputHandler(featureLoader: FeatureLoader) {
return async (req: Request, res: Response): Promise<void> => {
try {
const { projectPath, featureId } = req.body as {
projectPath: string;
featureId: string;
};
if (!projectPath || !featureId) {
res.status(400).json({
success: false,
error: 'projectPath and featureId are required',
});
return;
}
const content = await featureLoader.getRawOutput(projectPath, featureId);
res.json({ success: true, content });
} catch (error) {
logError(error, 'Get raw output failed');
res.status(500).json({ success: false, error: getErrorMessage(error) });
}
};
}

View File

@@ -1917,11 +1917,49 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
// Note: We use projectPath here, not workDir, because workDir might be a worktree path // Note: We use projectPath here, not workDir, because workDir might be a worktree path
const featureDirForOutput = getFeatureDir(projectPath, featureId); const featureDirForOutput = getFeatureDir(projectPath, featureId);
const outputPath = path.join(featureDirForOutput, 'agent-output.md'); const outputPath = path.join(featureDirForOutput, 'agent-output.md');
const rawOutputPath = path.join(featureDirForOutput, 'raw-output.jsonl');
// Raw output logging is configurable via environment variable
// Set AUTOMAKER_DEBUG_RAW_OUTPUT=true to enable raw stream event logging
const enableRawOutput =
process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === 'true' ||
process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === '1';
// Incremental file writing state // Incremental file writing state
let writeTimeout: ReturnType<typeof setTimeout> | null = null; let writeTimeout: ReturnType<typeof setTimeout> | null = null;
const WRITE_DEBOUNCE_MS = 500; // Batch writes every 500ms const WRITE_DEBOUNCE_MS = 500; // Batch writes every 500ms
// Raw output accumulator for debugging (NDJSON format)
let rawOutputLines: string[] = [];
let rawWriteTimeout: ReturnType<typeof setTimeout> | null = null;
// Helper to append raw stream event for debugging (only when enabled)
const appendRawEvent = (event: unknown): void => {
if (!enableRawOutput) return;
try {
const timestamp = new Date().toISOString();
const rawLine = JSON.stringify({ timestamp, event }, null, 4); // Pretty print for readability
rawOutputLines.push(rawLine);
// Debounced write of raw output
if (rawWriteTimeout) {
clearTimeout(rawWriteTimeout);
}
rawWriteTimeout = setTimeout(async () => {
try {
await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
rawOutputLines = []; // Clear after writing
} catch (error) {
console.error(`[AutoMode] Failed to write raw output for ${featureId}:`, error);
}
}, WRITE_DEBOUNCE_MS);
} catch {
// Ignore serialization errors
}
};
// Helper to write current responseText to file // Helper to write current responseText to file
const writeToFile = async (): Promise<void> => { const writeToFile = async (): Promise<void> => {
try { try {
@@ -1943,19 +1981,65 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
}, WRITE_DEBOUNCE_MS); }, WRITE_DEBOUNCE_MS);
}; };
// Track last text block for deduplication (Cursor sends duplicates)
let lastTextBlock = '';
streamLoop: for await (const msg of stream) { streamLoop: for await (const msg of stream) {
// Log raw stream event for debugging
appendRawEvent(msg);
if (msg.type === 'assistant' && msg.message?.content) { if (msg.type === 'assistant' && msg.message?.content) {
for (const block of msg.message.content) { for (const block of msg.message.content) {
if (block.type === 'text') { if (block.type === 'text') {
// Add separator before new text if we already have content and it doesn't end with newlines const newText = block.text || '';
if (responseText.length > 0 && !responseText.endsWith('\n\n')) {
if (responseText.endsWith('\n')) { // Skip empty text
responseText += '\n'; if (!newText) continue;
} else {
// Cursor-specific: Skip duplicate consecutive text blocks
// Cursor often sends the same text twice in a row
if (newText === lastTextBlock) {
continue;
}
// Cursor-specific: Skip final accumulated text block
// At the end, Cursor sends one large block containing ALL previous text
// Detect by checking if this block contains most of responseText
if (
responseText.length > 100 &&
newText.length > responseText.length * 0.8 &&
responseText.trim().length > 0
) {
// Check if this looks like accumulated text (contains our existing content)
const normalizedResponse = responseText.replace(/\s+/g, ' ').trim();
const normalizedNew = newText.replace(/\s+/g, ' ').trim();
if (normalizedNew.includes(normalizedResponse.slice(0, 100))) {
// This is the final accumulated block, skip it
continue;
}
}
lastTextBlock = newText;
// Only add separator when we're at a natural paragraph break:
// - Previous text ends with sentence terminator AND new text starts a new thought
// - Don't add separators mid-word or mid-sentence (for streaming providers like Cursor)
if (responseText.length > 0 && newText.length > 0) {
const lastChar = responseText.slice(-1);
const endsWithSentence = /[.!?:]\s*$/.test(responseText);
const endsWithNewline = /\n\s*$/.test(responseText);
const startsNewParagraph = /^[\n#\-*>]/.test(newText);
// Add paragraph break only at natural boundaries
if (
!endsWithNewline &&
(endsWithSentence || startsNewParagraph) &&
!/[a-zA-Z0-9]/.test(lastChar) // Not mid-word
) {
responseText += '\n\n'; responseText += '\n\n';
} }
} }
responseText += block.text || ''; responseText += newText;
// Check for authentication errors in the response // Check for authentication errors in the response
if ( if (
@@ -2431,6 +2515,21 @@ Implement all the changes described in the plan above.`;
} }
// Final write - ensure all accumulated content is saved // Final write - ensure all accumulated content is saved
await writeToFile(); await writeToFile();
// Flush remaining raw output (only if enabled)
if (enableRawOutput) {
if (rawWriteTimeout) {
clearTimeout(rawWriteTimeout);
}
if (rawOutputLines.length > 0) {
try {
await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
} catch (error) {
console.error(`[AutoMode] Failed to write final raw output for ${featureId}:`, error);
}
}
}
} }
private async executeFeatureWithContext( private async executeFeatureWithContext(

View File

@@ -158,6 +158,13 @@ export class FeatureLoader {
return path.join(this.getFeatureDir(projectPath, featureId), 'agent-output.md'); return path.join(this.getFeatureDir(projectPath, featureId), 'agent-output.md');
} }
/**
* Get the path to a feature's raw-output.jsonl file
*/
getRawOutputPath(projectPath: string, featureId: string): string {
return path.join(this.getFeatureDir(projectPath, featureId), 'raw-output.jsonl');
}
/** /**
* Generate a new feature ID * Generate a new feature ID
*/ */
@@ -357,6 +364,23 @@ export class FeatureLoader {
} }
} }
/**
* Get raw output for a feature (JSONL format for debugging)
*/
async getRawOutput(projectPath: string, featureId: string): Promise<string | null> {
try {
const rawOutputPath = this.getRawOutputPath(projectPath, featureId);
const content = (await secureFs.readFile(rawOutputPath, 'utf-8')) as string;
return content;
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
return null;
}
logger.error(`[FeatureLoader] Failed to get raw output for ${featureId}:`, error);
throw error;
}
}
/** /**
* Save agent output for a feature * Save agent output for a feature
*/ */

View File

@@ -130,38 +130,60 @@ function getCurrentPhase(content: string): 'planning' | 'action' | 'verification
return 'planning'; return 'planning';
} }
/**
* Cleans up fragmented streaming text by removing spurious newlines
* This handles cases where streaming providers send partial text chunks
* that got separated by newlines during accumulation
*/
function cleanFragmentedText(content: string): string {
// Remove newlines that break up words (newline between letters)
// e.g., "sum\n\nmary" -> "summary"
let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
// Also clean up fragmented XML-like tags
// e.g., "<sum\n\nmary>" -> "<summary>"
cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
return cleaned;
}
/** /**
* Extracts a summary from completed feature context * Extracts a summary from completed feature context
* Looks for content between <summary> and </summary> tags * Looks for content between <summary> and </summary> tags
*/ */
function extractSummary(content: string): string | undefined { function extractSummary(content: string): string | undefined {
// First, clean up any fragmented text from streaming
const cleanedContent = cleanFragmentedText(content);
// Look for <summary> tags - capture everything between opening and closing tags // Look for <summary> tags - capture everything between opening and closing tags
const summaryTagMatch = content.match(/<summary>([\s\S]*?)<\/summary>/i); const summaryTagMatch = cleanedContent.match(/<summary>([\s\S]*?)<\/summary>/i);
if (summaryTagMatch) { if (summaryTagMatch) {
return summaryTagMatch[1].trim(); // Clean up the extracted summary content as well
return cleanFragmentedText(summaryTagMatch[1]).trim();
} }
// Fallback: Look for summary sections - capture everything including subsections (###) // Fallback: Look for summary sections - capture everything including subsections (###)
// Stop at same-level ## sections (but not ###), or tool markers, or end // Stop at same-level ## sections (but not ###), or tool markers, or end
const summaryMatch = content.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i); const summaryMatch = cleanedContent.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
if (summaryMatch) { if (summaryMatch) {
return summaryMatch[1].trim(); return cleanFragmentedText(summaryMatch[1]).trim();
} }
// Look for completion markers and extract surrounding text // Look for completion markers and extract surrounding text
const completionMatch = content.match( const completionMatch = cleanedContent.match(
/✓ (?:Feature|Verification|Task) (?:successfully|completed|verified)[^\n]*(?:\n[^\n]{1,200})?/i /✓ (?:Feature|Verification|Task) (?:successfully|completed|verified)[^\n]*(?:\n[^\n]{1,200})?/i
); );
if (completionMatch) { if (completionMatch) {
return completionMatch[0].trim(); return cleanFragmentedText(completionMatch[0]).trim();
} }
// Look for "What was done" type sections // Look for "What was done" type sections
const whatWasDoneMatch = content.match( const whatWasDoneMatch = cleanedContent.match(
/(?:What was done|Changes made|Implemented)[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i /(?:What was done|Changes made|Implemented)[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i
); );
if (whatWasDoneMatch) { if (whatWasDoneMatch) {
return whatWasDoneMatch[1].trim(); return cleanFragmentedText(whatWasDoneMatch[1]).trim();
} }
return undefined; return undefined;

View File

@@ -11,6 +11,24 @@ import type {
CursorResultEvent, CursorResultEvent,
} from '@automaker/types'; } from '@automaker/types';
/**
* Cleans up fragmented streaming text by removing spurious newlines
* This handles cases where streaming providers send partial text chunks
* that got separated by newlines during accumulation
*/
function cleanFragmentedText(content: string): string {
// Remove newlines that break up words (newline between letters)
// e.g., "sum\n\nmary" -> "summary"
let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
// Also clean up fragmented XML-like tags
// e.g., "<sum\n\nmary>" -> "<summary>"
cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
return cleaned;
}
export type LogEntryType = export type LogEntryType =
| 'prompt' | 'prompt'
| 'tool_call' | 'tool_call'
@@ -100,6 +118,8 @@ const generateDeterministicId = (content: string, lineIndex: number): string =>
*/ */
function detectEntryType(content: string): LogEntryType { function detectEntryType(content: string): LogEntryType {
const trimmed = content.trim(); const trimmed = content.trim();
// Clean fragmented text for pattern matching
const cleaned = cleanFragmentedText(trimmed);
// Tool calls // Tool calls
if (trimmed.startsWith('🔧 Tool:') || trimmed.match(/^Tool:\s*/)) { if (trimmed.startsWith('🔧 Tool:') || trimmed.match(/^Tool:\s*/)) {
@@ -142,14 +162,17 @@ function detectEntryType(content: string): LogEntryType {
} }
// Success messages and summary sections // Success messages and summary sections
// Check both raw and cleaned content for summary tags (handles fragmented streaming)
if ( if (
trimmed.startsWith('✅') || trimmed.startsWith('✅') ||
trimmed.toLowerCase().includes('success') || trimmed.toLowerCase().includes('success') ||
trimmed.toLowerCase().includes('completed') || trimmed.toLowerCase().includes('completed') ||
// Summary tags (preferred format from agent) // Summary tags (preferred format from agent) - check both raw and cleaned
trimmed.startsWith('<summary>') || trimmed.startsWith('<summary>') ||
cleaned.startsWith('<summary>') ||
// Markdown summary headers (fallback) // Markdown summary headers (fallback)
trimmed.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) || trimmed.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
trimmed.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i) trimmed.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i)
) { ) {
return 'success'; return 'success';
@@ -585,6 +608,9 @@ export function shouldCollapseByDefault(entry: LogEntry): boolean {
* Generates a title for a log entry * Generates a title for a log entry
*/ */
function generateTitle(type: LogEntryType, content: string): string { function generateTitle(type: LogEntryType, content: string): string {
// Clean content for pattern matching
const cleaned = cleanFragmentedText(content);
switch (type) { switch (type) {
case 'tool_call': { case 'tool_call': {
const toolName = extractToolName(content); const toolName = extractToolName(content);
@@ -607,11 +633,19 @@ function generateTitle(type: LogEntryType, content: string): string {
case 'error': case 'error':
return 'Error'; return 'Error';
case 'success': { case 'success': {
// Check if it's a summary section // Check if it's a summary section (check both raw and cleaned)
if (content.startsWith('<summary>') || content.includes('<summary>')) { if (
content.startsWith('<summary>') ||
content.includes('<summary>') ||
cleaned.startsWith('<summary>') ||
cleaned.includes('<summary>')
) {
return 'Summary'; return 'Summary';
} }
if (content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)) { if (
content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)
) {
return 'Summary'; return 'Summary';
} }
if ( if (
@@ -803,10 +837,12 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
trimmedLine.match(/\[Status\]/i) || trimmedLine.match(/\[Status\]/i) ||
trimmedLine.toLowerCase().includes('ultrathink preparation') || trimmedLine.toLowerCase().includes('ultrathink preparation') ||
trimmedLine.match(/thinking level[:\s]*(low|medium|high|none|\d)/i) || trimmedLine.match(/thinking level[:\s]*(low|medium|high|none|\d)/i) ||
// Summary tags (preferred format from agent) // Summary tags (preferred format from agent) - check both raw and cleaned for fragmented streaming
trimmedLine.startsWith('<summary>') || trimmedLine.startsWith('<summary>') ||
cleanFragmentedText(trimmedLine).startsWith('<summary>') ||
// Agent summary sections (markdown headers - fallback) // Agent summary sections (markdown headers - fallback)
trimmedLine.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) || trimmedLine.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
cleanFragmentedText(trimmedLine).match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
// Summary introduction lines // Summary introduction lines
trimmedLine.match(/^All tasks completed/i) || trimmedLine.match(/^All tasks completed/i) ||
trimmedLine.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i); trimmedLine.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i);
@@ -834,7 +870,13 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
currentContent.push(trimmedLine); currentContent.push(trimmedLine);
// If this is a <summary> tag, start summary accumulation mode // If this is a <summary> tag, start summary accumulation mode
if (trimmedLine.startsWith('<summary>') && !trimmedLine.includes('</summary>')) { // Check both raw and cleaned for fragmented streaming
const cleanedTrimmed = cleanFragmentedText(trimmedLine);
if (
(trimmedLine.startsWith('<summary>') || cleanedTrimmed.startsWith('<summary>')) &&
!trimmedLine.includes('</summary>') &&
!cleanedTrimmed.includes('</summary>')
) {
inSummaryAccumulation = true; inSummaryAccumulation = true;
} }
} else if (isInputLine && currentEntry) { } else if (isInputLine && currentEntry) {