feat: Add raw output logging and endpoint for debugging

- Introduced a new environment variable `AUTOMAKER_DEBUG_RAW_OUTPUT` to enable raw output logging for agent streams.
- Added a new endpoint `/raw-output` to retrieve raw JSONL output for debugging purposes.
- Implemented functionality in `AutoModeService` to log raw output events and save them to `raw-output.jsonl`.
- Enhanced `FeatureLoader` to provide access to raw output files.
- Updated UI components to clean fragmented streaming text for better log parsing.
This commit is contained in:
Shirone
2025-12-28 02:34:10 +01:00
parent 52b1dc98b8
commit e404262cb0
7 changed files with 250 additions and 21 deletions

View File

@@ -48,3 +48,15 @@ TERMINAL_ENABLED=true
TERMINAL_PASSWORD=
ENABLE_REQUEST_LOGGING=false
# ============================================
# OPTIONAL - Debugging
# ============================================
# Enable raw output logging for agent streams (default: false)
# When enabled, saves unprocessed stream events to raw-output.jsonl
# in each feature's directory (.automaker/features/{id}/raw-output.jsonl)
# Useful for debugging provider streaming issues, improving log parsing,
# or analyzing how different providers (Claude, Cursor) stream responses
# Note: This adds disk I/O overhead, only enable when debugging
AUTOMAKER_DEBUG_RAW_OUTPUT=false

View File

@@ -10,7 +10,7 @@ import { createGetHandler } from './routes/get.js';
import { createCreateHandler } from './routes/create.js';
import { createUpdateHandler } from './routes/update.js';
import { createDeleteHandler } from './routes/delete.js';
import { createAgentOutputHandler } from './routes/agent-output.js';
import { createAgentOutputHandler, createRawOutputHandler } from './routes/agent-output.js';
import { createGenerateTitleHandler } from './routes/generate-title.js';
export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
@@ -22,6 +22,7 @@ export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
router.post('/update', validatePathParams('projectPath'), createUpdateHandler(featureLoader));
router.post('/delete', validatePathParams('projectPath'), createDeleteHandler(featureLoader));
router.post('/agent-output', createAgentOutputHandler(featureLoader));
router.post('/raw-output', createRawOutputHandler(featureLoader));
router.post('/generate-title', createGenerateTitleHandler());
return router;

View File

@@ -1,5 +1,6 @@
/**
* POST /agent-output endpoint - Get agent output for a feature
* POST /raw-output endpoint - Get raw JSONL output for debugging
*/
import type { Request, Response } from 'express';
@@ -30,3 +31,31 @@ export function createAgentOutputHandler(featureLoader: FeatureLoader) {
}
};
}
/**
* Handler for getting raw JSONL output for debugging
*/
export function createRawOutputHandler(featureLoader: FeatureLoader) {
return async (req: Request, res: Response): Promise<void> => {
try {
const { projectPath, featureId } = req.body as {
projectPath: string;
featureId: string;
};
if (!projectPath || !featureId) {
res.status(400).json({
success: false,
error: 'projectPath and featureId are required',
});
return;
}
const content = await featureLoader.getRawOutput(projectPath, featureId);
res.json({ success: true, content });
} catch (error) {
logError(error, 'Get raw output failed');
res.status(500).json({ success: false, error: getErrorMessage(error) });
}
};
}

View File

@@ -1917,11 +1917,49 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
// Note: We use projectPath here, not workDir, because workDir might be a worktree path
const featureDirForOutput = getFeatureDir(projectPath, featureId);
const outputPath = path.join(featureDirForOutput, 'agent-output.md');
const rawOutputPath = path.join(featureDirForOutput, 'raw-output.jsonl');
// Raw output logging is configurable via environment variable
// Set AUTOMAKER_DEBUG_RAW_OUTPUT=true to enable raw stream event logging
const enableRawOutput =
process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === 'true' ||
process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === '1';
// Incremental file writing state
let writeTimeout: ReturnType<typeof setTimeout> | null = null;
const WRITE_DEBOUNCE_MS = 500; // Batch writes every 500ms
// Raw output accumulator for debugging (NDJSON format)
let rawOutputLines: string[] = [];
let rawWriteTimeout: ReturnType<typeof setTimeout> | null = null;
// Helper to append raw stream event for debugging (only when enabled)
const appendRawEvent = (event: unknown): void => {
if (!enableRawOutput) return;
try {
const timestamp = new Date().toISOString();
const rawLine = JSON.stringify({ timestamp, event }, null, 4); // Pretty print for readability
rawOutputLines.push(rawLine);
// Debounced write of raw output
if (rawWriteTimeout) {
clearTimeout(rawWriteTimeout);
}
rawWriteTimeout = setTimeout(async () => {
try {
await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
rawOutputLines = []; // Clear after writing
} catch (error) {
console.error(`[AutoMode] Failed to write raw output for ${featureId}:`, error);
}
}, WRITE_DEBOUNCE_MS);
} catch {
// Ignore serialization errors
}
};
// Helper to write current responseText to file
const writeToFile = async (): Promise<void> => {
try {
@@ -1943,19 +1981,65 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
}, WRITE_DEBOUNCE_MS);
};
// Track last text block for deduplication (Cursor sends duplicates)
let lastTextBlock = '';
streamLoop: for await (const msg of stream) {
// Log raw stream event for debugging
appendRawEvent(msg);
if (msg.type === 'assistant' && msg.message?.content) {
for (const block of msg.message.content) {
if (block.type === 'text') {
// Add separator before new text if we already have content and it doesn't end with newlines
if (responseText.length > 0 && !responseText.endsWith('\n\n')) {
if (responseText.endsWith('\n')) {
responseText += '\n';
} else {
const newText = block.text || '';
// Skip empty text
if (!newText) continue;
// Cursor-specific: Skip duplicate consecutive text blocks
// Cursor often sends the same text twice in a row
if (newText === lastTextBlock) {
continue;
}
// Cursor-specific: Skip final accumulated text block
// At the end, Cursor sends one large block containing ALL previous text
// Detect by checking if this block contains most of responseText
if (
responseText.length > 100 &&
newText.length > responseText.length * 0.8 &&
responseText.trim().length > 0
) {
// Check if this looks like accumulated text (contains our existing content)
const normalizedResponse = responseText.replace(/\s+/g, ' ').trim();
const normalizedNew = newText.replace(/\s+/g, ' ').trim();
if (normalizedNew.includes(normalizedResponse.slice(0, 100))) {
// This is the final accumulated block, skip it
continue;
}
}
lastTextBlock = newText;
// Only add separator when we're at a natural paragraph break:
// - Previous text ends with sentence terminator AND new text starts a new thought
// - Don't add separators mid-word or mid-sentence (for streaming providers like Cursor)
if (responseText.length > 0 && newText.length > 0) {
const lastChar = responseText.slice(-1);
const endsWithSentence = /[.!?:]\s*$/.test(responseText);
const endsWithNewline = /\n\s*$/.test(responseText);
const startsNewParagraph = /^[\n#\-*>]/.test(newText);
// Add paragraph break only at natural boundaries
if (
!endsWithNewline &&
(endsWithSentence || startsNewParagraph) &&
!/[a-zA-Z0-9]/.test(lastChar) // Not mid-word
) {
responseText += '\n\n';
}
}
responseText += block.text || '';
responseText += newText;
// Check for authentication errors in the response
if (
@@ -2431,6 +2515,21 @@ Implement all the changes described in the plan above.`;
}
// Final write - ensure all accumulated content is saved
await writeToFile();
// Flush remaining raw output (only if enabled)
if (enableRawOutput) {
if (rawWriteTimeout) {
clearTimeout(rawWriteTimeout);
}
if (rawOutputLines.length > 0) {
try {
await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
} catch (error) {
console.error(`[AutoMode] Failed to write final raw output for ${featureId}:`, error);
}
}
}
}
private async executeFeatureWithContext(

View File

@@ -158,6 +158,13 @@ export class FeatureLoader {
return path.join(this.getFeatureDir(projectPath, featureId), 'agent-output.md');
}
/**
* Get the path to a feature's raw-output.jsonl file
*/
getRawOutputPath(projectPath: string, featureId: string): string {
return path.join(this.getFeatureDir(projectPath, featureId), 'raw-output.jsonl');
}
/**
* Generate a new feature ID
*/
@@ -357,6 +364,23 @@ export class FeatureLoader {
}
}
/**
* Get raw output for a feature (JSONL format for debugging)
*/
async getRawOutput(projectPath: string, featureId: string): Promise<string | null> {
try {
const rawOutputPath = this.getRawOutputPath(projectPath, featureId);
const content = (await secureFs.readFile(rawOutputPath, 'utf-8')) as string;
return content;
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
return null;
}
logger.error(`[FeatureLoader] Failed to get raw output for ${featureId}:`, error);
throw error;
}
}
/**
* Save agent output for a feature
*/

View File

@@ -130,38 +130,60 @@ function getCurrentPhase(content: string): 'planning' | 'action' | 'verification
return 'planning';
}
/**
* Cleans up fragmented streaming text by removing spurious newlines
* This handles cases where streaming providers send partial text chunks
* that got separated by newlines during accumulation
*/
function cleanFragmentedText(content: string): string {
// Remove newlines that break up words (newline between letters)
// e.g., "sum\n\nmary" -> "summary"
let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
// Also clean up fragmented XML-like tags
// e.g., "<sum\n\nmary>" -> "<summary>"
cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
return cleaned;
}
/**
* Extracts a summary from completed feature context
* Looks for content between <summary> and </summary> tags
*/
function extractSummary(content: string): string | undefined {
// First, clean up any fragmented text from streaming
const cleanedContent = cleanFragmentedText(content);
// Look for <summary> tags - capture everything between opening and closing tags
const summaryTagMatch = content.match(/<summary>([\s\S]*?)<\/summary>/i);
const summaryTagMatch = cleanedContent.match(/<summary>([\s\S]*?)<\/summary>/i);
if (summaryTagMatch) {
return summaryTagMatch[1].trim();
// Clean up the extracted summary content as well
return cleanFragmentedText(summaryTagMatch[1]).trim();
}
// Fallback: Look for summary sections - capture everything including subsections (###)
// Stop at same-level ## sections (but not ###), or tool markers, or end
const summaryMatch = content.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
const summaryMatch = cleanedContent.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
if (summaryMatch) {
return summaryMatch[1].trim();
return cleanFragmentedText(summaryMatch[1]).trim();
}
// Look for completion markers and extract surrounding text
const completionMatch = content.match(
const completionMatch = cleanedContent.match(
/✓ (?:Feature|Verification|Task) (?:successfully|completed|verified)[^\n]*(?:\n[^\n]{1,200})?/i
);
if (completionMatch) {
return completionMatch[0].trim();
return cleanFragmentedText(completionMatch[0]).trim();
}
// Look for "What was done" type sections
const whatWasDoneMatch = content.match(
const whatWasDoneMatch = cleanedContent.match(
/(?:What was done|Changes made|Implemented)[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i
);
if (whatWasDoneMatch) {
return whatWasDoneMatch[1].trim();
return cleanFragmentedText(whatWasDoneMatch[1]).trim();
}
return undefined;

View File

@@ -11,6 +11,24 @@ import type {
CursorResultEvent,
} from '@automaker/types';
/**
* Cleans up fragmented streaming text by removing spurious newlines
* This handles cases where streaming providers send partial text chunks
* that got separated by newlines during accumulation
*/
function cleanFragmentedText(content: string): string {
// Remove newlines that break up words (newline between letters)
// e.g., "sum\n\nmary" -> "summary"
let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
// Also clean up fragmented XML-like tags
// e.g., "<sum\n\nmary>" -> "<summary>"
cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
return cleaned;
}
export type LogEntryType =
| 'prompt'
| 'tool_call'
@@ -100,6 +118,8 @@ const generateDeterministicId = (content: string, lineIndex: number): string =>
*/
function detectEntryType(content: string): LogEntryType {
const trimmed = content.trim();
// Clean fragmented text for pattern matching
const cleaned = cleanFragmentedText(trimmed);
// Tool calls
if (trimmed.startsWith('🔧 Tool:') || trimmed.match(/^Tool:\s*/)) {
@@ -142,14 +162,17 @@ function detectEntryType(content: string): LogEntryType {
}
// Success messages and summary sections
// Check both raw and cleaned content for summary tags (handles fragmented streaming)
if (
trimmed.startsWith('✅') ||
trimmed.toLowerCase().includes('success') ||
trimmed.toLowerCase().includes('completed') ||
// Summary tags (preferred format from agent)
// Summary tags (preferred format from agent) - check both raw and cleaned
trimmed.startsWith('<summary>') ||
cleaned.startsWith('<summary>') ||
// Markdown summary headers (fallback)
trimmed.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
trimmed.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i)
) {
return 'success';
@@ -585,6 +608,9 @@ export function shouldCollapseByDefault(entry: LogEntry): boolean {
* Generates a title for a log entry
*/
function generateTitle(type: LogEntryType, content: string): string {
// Clean content for pattern matching
const cleaned = cleanFragmentedText(content);
switch (type) {
case 'tool_call': {
const toolName = extractToolName(content);
@@ -607,11 +633,19 @@ function generateTitle(type: LogEntryType, content: string): string {
case 'error':
return 'Error';
case 'success': {
// Check if it's a summary section
if (content.startsWith('<summary>') || content.includes('<summary>')) {
// Check if it's a summary section (check both raw and cleaned)
if (
content.startsWith('<summary>') ||
content.includes('<summary>') ||
cleaned.startsWith('<summary>') ||
cleaned.includes('<summary>')
) {
return 'Summary';
}
if (content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)) {
if (
content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)
) {
return 'Summary';
}
if (
@@ -803,10 +837,12 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
trimmedLine.match(/\[Status\]/i) ||
trimmedLine.toLowerCase().includes('ultrathink preparation') ||
trimmedLine.match(/thinking level[:\s]*(low|medium|high|none|\d)/i) ||
// Summary tags (preferred format from agent)
// Summary tags (preferred format from agent) - check both raw and cleaned for fragmented streaming
trimmedLine.startsWith('<summary>') ||
cleanFragmentedText(trimmedLine).startsWith('<summary>') ||
// Agent summary sections (markdown headers - fallback)
trimmedLine.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
cleanFragmentedText(trimmedLine).match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
// Summary introduction lines
trimmedLine.match(/^All tasks completed/i) ||
trimmedLine.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i);
@@ -834,7 +870,13 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
currentContent.push(trimmedLine);
// If this is a <summary> tag, start summary accumulation mode
if (trimmedLine.startsWith('<summary>') && !trimmedLine.includes('</summary>')) {
// Check both raw and cleaned for fragmented streaming
const cleanedTrimmed = cleanFragmentedText(trimmedLine);
if (
(trimmedLine.startsWith('<summary>') || cleanedTrimmed.startsWith('<summary>')) &&
!trimmedLine.includes('</summary>') &&
!cleanedTrimmed.includes('</summary>')
) {
inSummaryAccumulation = true;
}
} else if (isInputLine && currentEntry) {