mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-01-30 14:22:02 +00:00
feat: Add raw output logging and endpoint for debugging
- Introduced a new environment variable `AUTOMAKER_DEBUG_RAW_OUTPUT` to enable raw output logging for agent streams. - Added a new endpoint `/raw-output` to retrieve raw JSONL output for debugging purposes. - Implemented functionality in `AutoModeService` to log raw output events and save them to `raw-output.jsonl`. - Enhanced `FeatureLoader` to provide access to raw output files. - Updated UI components to clean fragmented streaming text for better log parsing.
This commit is contained in:
@@ -48,3 +48,15 @@ TERMINAL_ENABLED=true
|
||||
TERMINAL_PASSWORD=
|
||||
|
||||
ENABLE_REQUEST_LOGGING=false
|
||||
|
||||
# ============================================
|
||||
# OPTIONAL - Debugging
|
||||
# ============================================
|
||||
|
||||
# Enable raw output logging for agent streams (default: false)
|
||||
# When enabled, saves unprocessed stream events to raw-output.jsonl
|
||||
# in each feature's directory (.automaker/features/{id}/raw-output.jsonl)
|
||||
# Useful for debugging provider streaming issues, improving log parsing,
|
||||
# or analyzing how different providers (Claude, Cursor) stream responses
|
||||
# Note: This adds disk I/O overhead, only enable when debugging
|
||||
AUTOMAKER_DEBUG_RAW_OUTPUT=false
|
||||
|
||||
@@ -10,7 +10,7 @@ import { createGetHandler } from './routes/get.js';
|
||||
import { createCreateHandler } from './routes/create.js';
|
||||
import { createUpdateHandler } from './routes/update.js';
|
||||
import { createDeleteHandler } from './routes/delete.js';
|
||||
import { createAgentOutputHandler } from './routes/agent-output.js';
|
||||
import { createAgentOutputHandler, createRawOutputHandler } from './routes/agent-output.js';
|
||||
import { createGenerateTitleHandler } from './routes/generate-title.js';
|
||||
|
||||
export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
|
||||
@@ -22,6 +22,7 @@ export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
|
||||
router.post('/update', validatePathParams('projectPath'), createUpdateHandler(featureLoader));
|
||||
router.post('/delete', validatePathParams('projectPath'), createDeleteHandler(featureLoader));
|
||||
router.post('/agent-output', createAgentOutputHandler(featureLoader));
|
||||
router.post('/raw-output', createRawOutputHandler(featureLoader));
|
||||
router.post('/generate-title', createGenerateTitleHandler());
|
||||
|
||||
return router;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/**
|
||||
* POST /agent-output endpoint - Get agent output for a feature
|
||||
* POST /raw-output endpoint - Get raw JSONL output for debugging
|
||||
*/
|
||||
|
||||
import type { Request, Response } from 'express';
|
||||
@@ -30,3 +31,31 @@ export function createAgentOutputHandler(featureLoader: FeatureLoader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler for getting raw JSONL output for debugging
|
||||
*/
|
||||
export function createRawOutputHandler(featureLoader: FeatureLoader) {
|
||||
return async (req: Request, res: Response): Promise<void> => {
|
||||
try {
|
||||
const { projectPath, featureId } = req.body as {
|
||||
projectPath: string;
|
||||
featureId: string;
|
||||
};
|
||||
|
||||
if (!projectPath || !featureId) {
|
||||
res.status(400).json({
|
||||
success: false,
|
||||
error: 'projectPath and featureId are required',
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const content = await featureLoader.getRawOutput(projectPath, featureId);
|
||||
res.json({ success: true, content });
|
||||
} catch (error) {
|
||||
logError(error, 'Get raw output failed');
|
||||
res.status(500).json({ success: false, error: getErrorMessage(error) });
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1917,11 +1917,49 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
|
||||
// Note: We use projectPath here, not workDir, because workDir might be a worktree path
|
||||
const featureDirForOutput = getFeatureDir(projectPath, featureId);
|
||||
const outputPath = path.join(featureDirForOutput, 'agent-output.md');
|
||||
const rawOutputPath = path.join(featureDirForOutput, 'raw-output.jsonl');
|
||||
|
||||
// Raw output logging is configurable via environment variable
|
||||
// Set AUTOMAKER_DEBUG_RAW_OUTPUT=true to enable raw stream event logging
|
||||
const enableRawOutput =
|
||||
process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === 'true' ||
|
||||
process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === '1';
|
||||
|
||||
// Incremental file writing state
|
||||
let writeTimeout: ReturnType<typeof setTimeout> | null = null;
|
||||
const WRITE_DEBOUNCE_MS = 500; // Batch writes every 500ms
|
||||
|
||||
// Raw output accumulator for debugging (NDJSON format)
|
||||
let rawOutputLines: string[] = [];
|
||||
let rawWriteTimeout: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
// Helper to append raw stream event for debugging (only when enabled)
|
||||
const appendRawEvent = (event: unknown): void => {
|
||||
if (!enableRawOutput) return;
|
||||
|
||||
try {
|
||||
const timestamp = new Date().toISOString();
|
||||
const rawLine = JSON.stringify({ timestamp, event }, null, 4); // Pretty print for readability
|
||||
rawOutputLines.push(rawLine);
|
||||
|
||||
// Debounced write of raw output
|
||||
if (rawWriteTimeout) {
|
||||
clearTimeout(rawWriteTimeout);
|
||||
}
|
||||
rawWriteTimeout = setTimeout(async () => {
|
||||
try {
|
||||
await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
|
||||
await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
|
||||
rawOutputLines = []; // Clear after writing
|
||||
} catch (error) {
|
||||
console.error(`[AutoMode] Failed to write raw output for ${featureId}:`, error);
|
||||
}
|
||||
}, WRITE_DEBOUNCE_MS);
|
||||
} catch {
|
||||
// Ignore serialization errors
|
||||
}
|
||||
};
|
||||
|
||||
// Helper to write current responseText to file
|
||||
const writeToFile = async (): Promise<void> => {
|
||||
try {
|
||||
@@ -1943,19 +1981,65 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
|
||||
}, WRITE_DEBOUNCE_MS);
|
||||
};
|
||||
|
||||
// Track last text block for deduplication (Cursor sends duplicates)
|
||||
let lastTextBlock = '';
|
||||
|
||||
streamLoop: for await (const msg of stream) {
|
||||
// Log raw stream event for debugging
|
||||
appendRawEvent(msg);
|
||||
|
||||
if (msg.type === 'assistant' && msg.message?.content) {
|
||||
for (const block of msg.message.content) {
|
||||
if (block.type === 'text') {
|
||||
// Add separator before new text if we already have content and it doesn't end with newlines
|
||||
if (responseText.length > 0 && !responseText.endsWith('\n\n')) {
|
||||
if (responseText.endsWith('\n')) {
|
||||
responseText += '\n';
|
||||
} else {
|
||||
const newText = block.text || '';
|
||||
|
||||
// Skip empty text
|
||||
if (!newText) continue;
|
||||
|
||||
// Cursor-specific: Skip duplicate consecutive text blocks
|
||||
// Cursor often sends the same text twice in a row
|
||||
if (newText === lastTextBlock) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cursor-specific: Skip final accumulated text block
|
||||
// At the end, Cursor sends one large block containing ALL previous text
|
||||
// Detect by checking if this block contains most of responseText
|
||||
if (
|
||||
responseText.length > 100 &&
|
||||
newText.length > responseText.length * 0.8 &&
|
||||
responseText.trim().length > 0
|
||||
) {
|
||||
// Check if this looks like accumulated text (contains our existing content)
|
||||
const normalizedResponse = responseText.replace(/\s+/g, ' ').trim();
|
||||
const normalizedNew = newText.replace(/\s+/g, ' ').trim();
|
||||
if (normalizedNew.includes(normalizedResponse.slice(0, 100))) {
|
||||
// This is the final accumulated block, skip it
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
lastTextBlock = newText;
|
||||
|
||||
// Only add separator when we're at a natural paragraph break:
|
||||
// - Previous text ends with sentence terminator AND new text starts a new thought
|
||||
// - Don't add separators mid-word or mid-sentence (for streaming providers like Cursor)
|
||||
if (responseText.length > 0 && newText.length > 0) {
|
||||
const lastChar = responseText.slice(-1);
|
||||
const endsWithSentence = /[.!?:]\s*$/.test(responseText);
|
||||
const endsWithNewline = /\n\s*$/.test(responseText);
|
||||
const startsNewParagraph = /^[\n#\-*>]/.test(newText);
|
||||
|
||||
// Add paragraph break only at natural boundaries
|
||||
if (
|
||||
!endsWithNewline &&
|
||||
(endsWithSentence || startsNewParagraph) &&
|
||||
!/[a-zA-Z0-9]/.test(lastChar) // Not mid-word
|
||||
) {
|
||||
responseText += '\n\n';
|
||||
}
|
||||
}
|
||||
responseText += block.text || '';
|
||||
responseText += newText;
|
||||
|
||||
// Check for authentication errors in the response
|
||||
if (
|
||||
@@ -2431,6 +2515,21 @@ Implement all the changes described in the plan above.`;
|
||||
}
|
||||
// Final write - ensure all accumulated content is saved
|
||||
await writeToFile();
|
||||
|
||||
// Flush remaining raw output (only if enabled)
|
||||
if (enableRawOutput) {
|
||||
if (rawWriteTimeout) {
|
||||
clearTimeout(rawWriteTimeout);
|
||||
}
|
||||
if (rawOutputLines.length > 0) {
|
||||
try {
|
||||
await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
|
||||
await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
|
||||
} catch (error) {
|
||||
console.error(`[AutoMode] Failed to write final raw output for ${featureId}:`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async executeFeatureWithContext(
|
||||
|
||||
@@ -158,6 +158,13 @@ export class FeatureLoader {
|
||||
return path.join(this.getFeatureDir(projectPath, featureId), 'agent-output.md');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path to a feature's raw-output.jsonl file
|
||||
*/
|
||||
getRawOutputPath(projectPath: string, featureId: string): string {
|
||||
return path.join(this.getFeatureDir(projectPath, featureId), 'raw-output.jsonl');
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a new feature ID
|
||||
*/
|
||||
@@ -357,6 +364,23 @@ export class FeatureLoader {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get raw output for a feature (JSONL format for debugging)
|
||||
*/
|
||||
async getRawOutput(projectPath: string, featureId: string): Promise<string | null> {
|
||||
try {
|
||||
const rawOutputPath = this.getRawOutputPath(projectPath, featureId);
|
||||
const content = (await secureFs.readFile(rawOutputPath, 'utf-8')) as string;
|
||||
return content;
|
||||
} catch (error) {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
return null;
|
||||
}
|
||||
logger.error(`[FeatureLoader] Failed to get raw output for ${featureId}:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save agent output for a feature
|
||||
*/
|
||||
|
||||
@@ -130,38 +130,60 @@ function getCurrentPhase(content: string): 'planning' | 'action' | 'verification
|
||||
return 'planning';
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleans up fragmented streaming text by removing spurious newlines
|
||||
* This handles cases where streaming providers send partial text chunks
|
||||
* that got separated by newlines during accumulation
|
||||
*/
|
||||
function cleanFragmentedText(content: string): string {
|
||||
// Remove newlines that break up words (newline between letters)
|
||||
// e.g., "sum\n\nmary" -> "summary"
|
||||
let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
|
||||
|
||||
// Also clean up fragmented XML-like tags
|
||||
// e.g., "<sum\n\nmary>" -> "<summary>"
|
||||
cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
|
||||
cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts a summary from completed feature context
|
||||
* Looks for content between <summary> and </summary> tags
|
||||
*/
|
||||
function extractSummary(content: string): string | undefined {
|
||||
// First, clean up any fragmented text from streaming
|
||||
const cleanedContent = cleanFragmentedText(content);
|
||||
|
||||
// Look for <summary> tags - capture everything between opening and closing tags
|
||||
const summaryTagMatch = content.match(/<summary>([\s\S]*?)<\/summary>/i);
|
||||
const summaryTagMatch = cleanedContent.match(/<summary>([\s\S]*?)<\/summary>/i);
|
||||
if (summaryTagMatch) {
|
||||
return summaryTagMatch[1].trim();
|
||||
// Clean up the extracted summary content as well
|
||||
return cleanFragmentedText(summaryTagMatch[1]).trim();
|
||||
}
|
||||
|
||||
// Fallback: Look for summary sections - capture everything including subsections (###)
|
||||
// Stop at same-level ## sections (but not ###), or tool markers, or end
|
||||
const summaryMatch = content.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
|
||||
const summaryMatch = cleanedContent.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
|
||||
if (summaryMatch) {
|
||||
return summaryMatch[1].trim();
|
||||
return cleanFragmentedText(summaryMatch[1]).trim();
|
||||
}
|
||||
|
||||
// Look for completion markers and extract surrounding text
|
||||
const completionMatch = content.match(
|
||||
const completionMatch = cleanedContent.match(
|
||||
/✓ (?:Feature|Verification|Task) (?:successfully|completed|verified)[^\n]*(?:\n[^\n]{1,200})?/i
|
||||
);
|
||||
if (completionMatch) {
|
||||
return completionMatch[0].trim();
|
||||
return cleanFragmentedText(completionMatch[0]).trim();
|
||||
}
|
||||
|
||||
// Look for "What was done" type sections
|
||||
const whatWasDoneMatch = content.match(
|
||||
const whatWasDoneMatch = cleanedContent.match(
|
||||
/(?:What was done|Changes made|Implemented)[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i
|
||||
);
|
||||
if (whatWasDoneMatch) {
|
||||
return whatWasDoneMatch[1].trim();
|
||||
return cleanFragmentedText(whatWasDoneMatch[1]).trim();
|
||||
}
|
||||
|
||||
return undefined;
|
||||
|
||||
@@ -11,6 +11,24 @@ import type {
|
||||
CursorResultEvent,
|
||||
} from '@automaker/types';
|
||||
|
||||
/**
|
||||
* Cleans up fragmented streaming text by removing spurious newlines
|
||||
* This handles cases where streaming providers send partial text chunks
|
||||
* that got separated by newlines during accumulation
|
||||
*/
|
||||
function cleanFragmentedText(content: string): string {
|
||||
// Remove newlines that break up words (newline between letters)
|
||||
// e.g., "sum\n\nmary" -> "summary"
|
||||
let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
|
||||
|
||||
// Also clean up fragmented XML-like tags
|
||||
// e.g., "<sum\n\nmary>" -> "<summary>"
|
||||
cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
|
||||
cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
export type LogEntryType =
|
||||
| 'prompt'
|
||||
| 'tool_call'
|
||||
@@ -100,6 +118,8 @@ const generateDeterministicId = (content: string, lineIndex: number): string =>
|
||||
*/
|
||||
function detectEntryType(content: string): LogEntryType {
|
||||
const trimmed = content.trim();
|
||||
// Clean fragmented text for pattern matching
|
||||
const cleaned = cleanFragmentedText(trimmed);
|
||||
|
||||
// Tool calls
|
||||
if (trimmed.startsWith('🔧 Tool:') || trimmed.match(/^Tool:\s*/)) {
|
||||
@@ -142,14 +162,17 @@ function detectEntryType(content: string): LogEntryType {
|
||||
}
|
||||
|
||||
// Success messages and summary sections
|
||||
// Check both raw and cleaned content for summary tags (handles fragmented streaming)
|
||||
if (
|
||||
trimmed.startsWith('✅') ||
|
||||
trimmed.toLowerCase().includes('success') ||
|
||||
trimmed.toLowerCase().includes('completed') ||
|
||||
// Summary tags (preferred format from agent)
|
||||
// Summary tags (preferred format from agent) - check both raw and cleaned
|
||||
trimmed.startsWith('<summary>') ||
|
||||
cleaned.startsWith('<summary>') ||
|
||||
// Markdown summary headers (fallback)
|
||||
trimmed.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
|
||||
cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
|
||||
trimmed.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i)
|
||||
) {
|
||||
return 'success';
|
||||
@@ -585,6 +608,9 @@ export function shouldCollapseByDefault(entry: LogEntry): boolean {
|
||||
* Generates a title for a log entry
|
||||
*/
|
||||
function generateTitle(type: LogEntryType, content: string): string {
|
||||
// Clean content for pattern matching
|
||||
const cleaned = cleanFragmentedText(content);
|
||||
|
||||
switch (type) {
|
||||
case 'tool_call': {
|
||||
const toolName = extractToolName(content);
|
||||
@@ -607,11 +633,19 @@ function generateTitle(type: LogEntryType, content: string): string {
|
||||
case 'error':
|
||||
return 'Error';
|
||||
case 'success': {
|
||||
// Check if it's a summary section
|
||||
if (content.startsWith('<summary>') || content.includes('<summary>')) {
|
||||
// Check if it's a summary section (check both raw and cleaned)
|
||||
if (
|
||||
content.startsWith('<summary>') ||
|
||||
content.includes('<summary>') ||
|
||||
cleaned.startsWith('<summary>') ||
|
||||
cleaned.includes('<summary>')
|
||||
) {
|
||||
return 'Summary';
|
||||
}
|
||||
if (content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)) {
|
||||
if (
|
||||
content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
|
||||
cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)
|
||||
) {
|
||||
return 'Summary';
|
||||
}
|
||||
if (
|
||||
@@ -803,10 +837,12 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
|
||||
trimmedLine.match(/\[Status\]/i) ||
|
||||
trimmedLine.toLowerCase().includes('ultrathink preparation') ||
|
||||
trimmedLine.match(/thinking level[:\s]*(low|medium|high|none|\d)/i) ||
|
||||
// Summary tags (preferred format from agent)
|
||||
// Summary tags (preferred format from agent) - check both raw and cleaned for fragmented streaming
|
||||
trimmedLine.startsWith('<summary>') ||
|
||||
cleanFragmentedText(trimmedLine).startsWith('<summary>') ||
|
||||
// Agent summary sections (markdown headers - fallback)
|
||||
trimmedLine.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
|
||||
cleanFragmentedText(trimmedLine).match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
|
||||
// Summary introduction lines
|
||||
trimmedLine.match(/^All tasks completed/i) ||
|
||||
trimmedLine.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i);
|
||||
@@ -834,7 +870,13 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
|
||||
currentContent.push(trimmedLine);
|
||||
|
||||
// If this is a <summary> tag, start summary accumulation mode
|
||||
if (trimmedLine.startsWith('<summary>') && !trimmedLine.includes('</summary>')) {
|
||||
// Check both raw and cleaned for fragmented streaming
|
||||
const cleanedTrimmed = cleanFragmentedText(trimmedLine);
|
||||
if (
|
||||
(trimmedLine.startsWith('<summary>') || cleanedTrimmed.startsWith('<summary>')) &&
|
||||
!trimmedLine.includes('</summary>') &&
|
||||
!cleanedTrimmed.includes('</summary>')
|
||||
) {
|
||||
inSummaryAccumulation = true;
|
||||
}
|
||||
} else if (isInputLine && currentEntry) {
|
||||
|
||||
Reference in New Issue
Block a user