feat: Add raw output logging and endpoint for debugging

- Introduced a new environment variable `AUTOMAKER_DEBUG_RAW_OUTPUT` to enable raw output logging for agent streams. - Added a new endpoint `/raw-output` to retrieve raw JSONL output for debugging purposes. - Implemented functionality in `AutoModeService` to log raw output events and save them to `raw-output.jsonl`. - Enhanced `FeatureLoader` to provide access to raw output files. - Updated UI components to clean fragmented streaming text for better log parsing.
2026-03-16 21:53:07 +00:00 · 2025-12-28 02:34:10 +01:00
parent 52b1dc98b8
commit e404262cb0
7 changed files with 250 additions and 21 deletions
--- a/apps/server/.env.example
+++ b/apps/server/.env.example
@@ -48,3 +48,15 @@ TERMINAL_ENABLED=true
 TERMINAL_PASSWORD=

 ENABLE_REQUEST_LOGGING=false
+
+# ============================================
+# OPTIONAL - Debugging
+# ============================================
+
+# Enable raw output logging for agent streams (default: false)
+# When enabled, saves unprocessed stream events to raw-output.jsonl
+# in each feature's directory (.automaker/features/{id}/raw-output.jsonl)
+# Useful for debugging provider streaming issues, improving log parsing,
+# or analyzing how different providers (Claude, Cursor) stream responses
+# Note: This adds disk I/O overhead, only enable when debugging
+AUTOMAKER_DEBUG_RAW_OUTPUT=false
--- a/apps/server/src/routes/features/index.ts
+++ b/apps/server/src/routes/features/index.ts
@@ -10,7 +10,7 @@ import { createGetHandler } from './routes/get.js';
 import { createCreateHandler } from './routes/create.js';
 import { createUpdateHandler } from './routes/update.js';
 import { createDeleteHandler } from './routes/delete.js';
-import { createAgentOutputHandler } from './routes/agent-output.js';
+import { createAgentOutputHandler, createRawOutputHandler } from './routes/agent-output.js';
 import { createGenerateTitleHandler } from './routes/generate-title.js';

 export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
@@ -22,6 +22,7 @@ export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
  router.post('/update', validatePathParams('projectPath'), createUpdateHandler(featureLoader));
  router.post('/delete', validatePathParams('projectPath'), createDeleteHandler(featureLoader));
  router.post('/agent-output', createAgentOutputHandler(featureLoader));
+  router.post('/raw-output', createRawOutputHandler(featureLoader));
  router.post('/generate-title', createGenerateTitleHandler());

  return router;
--- a/apps/server/src/routes/features/routes/agent-output.ts
+++ b/apps/server/src/routes/features/routes/agent-output.ts
@@ -1,5 +1,6 @@
 /**
 * POST /agent-output endpoint - Get agent output for a feature
+ * POST /raw-output endpoint - Get raw JSONL output for debugging
 */

 import type { Request, Response } from 'express';
@@ -30,3 +31,31 @@ export function createAgentOutputHandler(featureLoader: FeatureLoader) {
    }
  };
 }
+
+/**
+ * Handler for getting raw JSONL output for debugging
+ */
+export function createRawOutputHandler(featureLoader: FeatureLoader) {
+  return async (req: Request, res: Response): Promise<void> => {
+    try {
+      const { projectPath, featureId } = req.body as {
+        projectPath: string;
+        featureId: string;
+      };
+
+      if (!projectPath || !featureId) {
+        res.status(400).json({
+          success: false,
+          error: 'projectPath and featureId are required',
+        });
+        return;
+      }
+
+      const content = await featureLoader.getRawOutput(projectPath, featureId);
+      res.json({ success: true, content });
+    } catch (error) {
+      logError(error, 'Get raw output failed');
+      res.status(500).json({ success: false, error: getErrorMessage(error) });
+    }
+  };
+}
--- a/apps/server/src/services/auto-mode-service.ts
+++ b/apps/server/src/services/auto-mode-service.ts
@@ -1917,11 +1917,49 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
    // Note: We use projectPath here, not workDir, because workDir might be a worktree path
    const featureDirForOutput = getFeatureDir(projectPath, featureId);
    const outputPath = path.join(featureDirForOutput, 'agent-output.md');
+    const rawOutputPath = path.join(featureDirForOutput, 'raw-output.jsonl');
+
+    // Raw output logging is configurable via environment variable
+    // Set AUTOMAKER_DEBUG_RAW_OUTPUT=true to enable raw stream event logging
+    const enableRawOutput =
+      process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === 'true' ||
+      process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === '1';

    // Incremental file writing state
    let writeTimeout: ReturnType<typeof setTimeout> | null = null;
    const WRITE_DEBOUNCE_MS = 500; // Batch writes every 500ms

+    // Raw output accumulator for debugging (NDJSON format)
+    let rawOutputLines: string[] = [];
+    let rawWriteTimeout: ReturnType<typeof setTimeout> | null = null;
+
+    // Helper to append raw stream event for debugging (only when enabled)
+    const appendRawEvent = (event: unknown): void => {
+      if (!enableRawOutput) return;
+
+      try {
+        const timestamp = new Date().toISOString();
+        const rawLine = JSON.stringify({ timestamp, event }, null, 4); // Pretty print for readability
+        rawOutputLines.push(rawLine);
+
+        // Debounced write of raw output
+        if (rawWriteTimeout) {
+          clearTimeout(rawWriteTimeout);
+        }
+        rawWriteTimeout = setTimeout(async () => {
+          try {
+            await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
+            await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
+            rawOutputLines = []; // Clear after writing
+          } catch (error) {
+            console.error(`[AutoMode] Failed to write raw output for ${featureId}:`, error);
+          }
+        }, WRITE_DEBOUNCE_MS);
+      } catch {
+        // Ignore serialization errors
+      }
+    };
+
    // Helper to write current responseText to file
    const writeToFile = async (): Promise<void> => {
      try {
@@ -1943,19 +1981,65 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
      }, WRITE_DEBOUNCE_MS);
    };

+    // Track last text block for deduplication (Cursor sends duplicates)
+    let lastTextBlock = '';
+
    streamLoop: for await (const msg of stream) {
+      // Log raw stream event for debugging
+      appendRawEvent(msg);
+
      if (msg.type === 'assistant' && msg.message?.content) {
        for (const block of msg.message.content) {
          if (block.type === 'text') {
-            // Add separator before new text if we already have content and it doesn't end with newlines
-            if (responseText.length > 0 && !responseText.endsWith('\n\n')) {
-              if (responseText.endsWith('\n')) {
-                responseText += '\n';
-              } else {
+            const newText = block.text || '';
+
+            // Skip empty text
+            if (!newText) continue;
+
+            // Cursor-specific: Skip duplicate consecutive text blocks
+            // Cursor often sends the same text twice in a row
+            if (newText === lastTextBlock) {
+              continue;
+            }
+
+            // Cursor-specific: Skip final accumulated text block
+            // At the end, Cursor sends one large block containing ALL previous text
+            // Detect by checking if this block contains most of responseText
+            if (
+              responseText.length > 100 &&
+              newText.length > responseText.length * 0.8 &&
+              responseText.trim().length > 0
+            ) {
+              // Check if this looks like accumulated text (contains our existing content)
+              const normalizedResponse = responseText.replace(/\s+/g, ' ').trim();
+              const normalizedNew = newText.replace(/\s+/g, ' ').trim();
+              if (normalizedNew.includes(normalizedResponse.slice(0, 100))) {
+                // This is the final accumulated block, skip it
+                continue;
+              }
+            }
+
+            lastTextBlock = newText;
+
+            // Only add separator when we're at a natural paragraph break:
+            // - Previous text ends with sentence terminator AND new text starts a new thought
+            // - Don't add separators mid-word or mid-sentence (for streaming providers like Cursor)
+            if (responseText.length > 0 && newText.length > 0) {
+              const lastChar = responseText.slice(-1);
+              const endsWithSentence = /[.!?:]\s*$/.test(responseText);
+              const endsWithNewline = /\n\s*$/.test(responseText);
+              const startsNewParagraph = /^[\n#\-*>]/.test(newText);
+
+              // Add paragraph break only at natural boundaries
+              if (
+                !endsWithNewline &&
+                (endsWithSentence || startsNewParagraph) &&
+                !/[a-zA-Z0-9]/.test(lastChar) // Not mid-word
+              ) {
                responseText += '\n\n';
              }
            }
-            responseText += block.text || '';
+            responseText += newText;

            // Check for authentication errors in the response
            if (
@@ -2431,6 +2515,21 @@ Implement all the changes described in the plan above.`;
    }
    // Final write - ensure all accumulated content is saved
    await writeToFile();
+
+    // Flush remaining raw output (only if enabled)
+    if (enableRawOutput) {
+      if (rawWriteTimeout) {
+        clearTimeout(rawWriteTimeout);
+      }
+      if (rawOutputLines.length > 0) {
+        try {
+          await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
+          await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
+        } catch (error) {
+          console.error(`[AutoMode] Failed to write final raw output for ${featureId}:`, error);
+        }
+      }
+    }
  }

  private async executeFeatureWithContext(
--- a/apps/server/src/services/feature-loader.ts
+++ b/apps/server/src/services/feature-loader.ts
@@ -158,6 +158,13 @@ export class FeatureLoader {
    return path.join(this.getFeatureDir(projectPath, featureId), 'agent-output.md');
  }

+  /**
+   * Get the path to a feature's raw-output.jsonl file
+   */
+  getRawOutputPath(projectPath: string, featureId: string): string {
+    return path.join(this.getFeatureDir(projectPath, featureId), 'raw-output.jsonl');
+  }
+
  /**
   * Generate a new feature ID
   */
@@ -357,6 +364,23 @@ export class FeatureLoader {
    }
  }

+  /**
+   * Get raw output for a feature (JSONL format for debugging)
+   */
+  async getRawOutput(projectPath: string, featureId: string): Promise<string | null> {
+    try {
+      const rawOutputPath = this.getRawOutputPath(projectPath, featureId);
+      const content = (await secureFs.readFile(rawOutputPath, 'utf-8')) as string;
+      return content;
+    } catch (error) {
+      if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
+        return null;
+      }
+      logger.error(`[FeatureLoader] Failed to get raw output for ${featureId}:`, error);
+      throw error;
+    }
+  }
+
  /**
   * Save agent output for a feature
   */
--- a/apps/ui/src/lib/agent-context-parser.ts
+++ b/apps/ui/src/lib/agent-context-parser.ts
@@ -130,38 +130,60 @@ function getCurrentPhase(content: string): 'planning' | 'action' | 'verification
  return 'planning';
 }

+/**
+ * Cleans up fragmented streaming text by removing spurious newlines
+ * This handles cases where streaming providers send partial text chunks
+ * that got separated by newlines during accumulation
+ */
+function cleanFragmentedText(content: string): string {
+  // Remove newlines that break up words (newline between letters)
+  // e.g., "sum\n\nmary" -> "summary"
+  let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
+
+  // Also clean up fragmented XML-like tags
+  // e.g., "<sum\n\nmary>" -> "<summary>"
+  cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
+  cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
+
+  return cleaned;
+}
+
 /**
 * Extracts a summary from completed feature context
 * Looks for content between <summary> and </summary> tags
 */
 function extractSummary(content: string): string | undefined {
+  // First, clean up any fragmented text from streaming
+  const cleanedContent = cleanFragmentedText(content);
+
  // Look for <summary> tags - capture everything between opening and closing tags
-  const summaryTagMatch = content.match(/<summary>([\s\S]*?)<\/summary>/i);
+  const summaryTagMatch = cleanedContent.match(/<summary>([\s\S]*?)<\/summary>/i);
  if (summaryTagMatch) {
-    return summaryTagMatch[1].trim();
+    // Clean up the extracted summary content as well
+    return cleanFragmentedText(summaryTagMatch[1]).trim();
  }

  // Fallback: Look for summary sections - capture everything including subsections (###)
  // Stop at same-level ## sections (but not ###), or tool markers, or end
-  const summaryMatch = content.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
+  const summaryMatch = cleanedContent.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
  if (summaryMatch) {
-    return summaryMatch[1].trim();
+    return cleanFragmentedText(summaryMatch[1]).trim();
  }

  // Look for completion markers and extract surrounding text
-  const completionMatch = content.match(
+  const completionMatch = cleanedContent.match(
    /✓ (?:Feature|Verification|Task) (?:successfully|completed|verified)[^\n]*(?:\n[^\n]{1,200})?/i
  );
  if (completionMatch) {
-    return completionMatch[0].trim();
+    return cleanFragmentedText(completionMatch[0]).trim();
  }

  // Look for "What was done" type sections
-  const whatWasDoneMatch = content.match(
+  const whatWasDoneMatch = cleanedContent.match(
    /(?:What was done|Changes made|Implemented)[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i
  );
  if (whatWasDoneMatch) {
-    return whatWasDoneMatch[1].trim();
+    return cleanFragmentedText(whatWasDoneMatch[1]).trim();
  }

  return undefined;
--- a/apps/ui/src/lib/log-parser.ts
+++ b/apps/ui/src/lib/log-parser.ts
@@ -11,6 +11,24 @@ import type {
  CursorResultEvent,
 } from '@automaker/types';

+/**
+ * Cleans up fragmented streaming text by removing spurious newlines
+ * This handles cases where streaming providers send partial text chunks
+ * that got separated by newlines during accumulation
+ */
+function cleanFragmentedText(content: string): string {
+  // Remove newlines that break up words (newline between letters)
+  // e.g., "sum\n\nmary" -> "summary"
+  let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
+
+  // Also clean up fragmented XML-like tags
+  // e.g., "<sum\n\nmary>" -> "<summary>"
+  cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
+  cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
+
+  return cleaned;
+}
+
 export type LogEntryType =
  | 'prompt'
  | 'tool_call'
@@ -100,6 +118,8 @@ const generateDeterministicId = (content: string, lineIndex: number): string =>
 */
 function detectEntryType(content: string): LogEntryType {
  const trimmed = content.trim();
+  // Clean fragmented text for pattern matching
+  const cleaned = cleanFragmentedText(trimmed);

  // Tool calls
  if (trimmed.startsWith('🔧 Tool:') || trimmed.match(/^Tool:\s*/)) {
@@ -142,14 +162,17 @@ function detectEntryType(content: string): LogEntryType {
  }

  // Success messages and summary sections
+  // Check both raw and cleaned content for summary tags (handles fragmented streaming)
  if (
    trimmed.startsWith('✅') ||
    trimmed.toLowerCase().includes('success') ||
    trimmed.toLowerCase().includes('completed') ||
-    // Summary tags (preferred format from agent)
+    // Summary tags (preferred format from agent) - check both raw and cleaned
    trimmed.startsWith('<summary>') ||
+    cleaned.startsWith('<summary>') ||
    // Markdown summary headers (fallback)
    trimmed.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
+    cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
    trimmed.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i)
  ) {
    return 'success';
@@ -585,6 +608,9 @@ export function shouldCollapseByDefault(entry: LogEntry): boolean {
 * Generates a title for a log entry
 */
 function generateTitle(type: LogEntryType, content: string): string {
+  // Clean content for pattern matching
+  const cleaned = cleanFragmentedText(content);
+
  switch (type) {
    case 'tool_call': {
      const toolName = extractToolName(content);
@@ -607,11 +633,19 @@ function generateTitle(type: LogEntryType, content: string): string {
    case 'error':
      return 'Error';
    case 'success': {
-      // Check if it's a summary section
-      if (content.startsWith('<summary>') || content.includes('<summary>')) {
+      // Check if it's a summary section (check both raw and cleaned)
+      if (
+        content.startsWith('<summary>') ||
+        content.includes('<summary>') ||
+        cleaned.startsWith('<summary>') ||
+        cleaned.includes('<summary>')
+      ) {
        return 'Summary';
      }
-      if (content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)) {
+      if (
+        content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
+        cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)
+      ) {
        return 'Summary';
      }
      if (
@@ -803,10 +837,12 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
      trimmedLine.match(/\[Status\]/i) ||
      trimmedLine.toLowerCase().includes('ultrathink preparation') ||
      trimmedLine.match(/thinking level[:\s]*(low|medium|high|none|\d)/i) ||
-      // Summary tags (preferred format from agent)
+      // Summary tags (preferred format from agent) - check both raw and cleaned for fragmented streaming
      trimmedLine.startsWith('<summary>') ||
+      cleanFragmentedText(trimmedLine).startsWith('<summary>') ||
      // Agent summary sections (markdown headers - fallback)
      trimmedLine.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
+      cleanFragmentedText(trimmedLine).match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
      // Summary introduction lines
      trimmedLine.match(/^All tasks completed/i) ||
      trimmedLine.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i);
@@ -834,7 +870,13 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
      currentContent.push(trimmedLine);

      // If this is a <summary> tag, start summary accumulation mode
-      if (trimmedLine.startsWith('<summary>') && !trimmedLine.includes('</summary>')) {
+      // Check both raw and cleaned for fragmented streaming
+      const cleanedTrimmed = cleanFragmentedText(trimmedLine);
+      if (
+        (trimmedLine.startsWith('<summary>') || cleanedTrimmed.startsWith('<summary>')) &&
+        !trimmedLine.includes('</summary>') &&
+        !cleanedTrimmed.includes('</summary>')
+      ) {
        inSummaryAccumulation = true;
      }
    } else if (isInputLine && currentEntry) {