feat: Add raw output logging and endpoint for debugging

- Introduced a new environment variable `AUTOMAKER_DEBUG_RAW_OUTPUT` to enable raw output logging for agent streams. - Added a new endpoint `/raw-output` to retrieve raw JSONL output for debugging purposes. - Implemented functionality in `AutoModeService` to log raw output events and save them to `raw-output.jsonl`. - Enhanced `FeatureLoader` to provide access to raw output files. - Updated UI components to clean fragmented streaming text for better log parsing.
2026-02-03 21:03:08 +00:00 · 2025-12-28 02:34:10 +01:00
parent 52b1dc98b8
commit e404262cb0
7 changed files with 250 additions and 21 deletions
--- a/apps/server/.env.example
+++ b/apps/server/.env.example
@@ -48,3 +48,15 @@ TERMINAL_ENABLED=true
 TERMINAL_PASSWORD=
 ENABLE_REQUEST_LOGGING=false
 # ============================================
 # OPTIONAL - Debugging
 # ============================================
 # Enable raw output logging for agent streams (default: false)
 # When enabled, saves unprocessed stream events to raw-output.jsonl
 # in each feature's directory (.automaker/features/{id}/raw-output.jsonl)
 # Useful for debugging provider streaming issues, improving log parsing,
 # or analyzing how different providers (Claude, Cursor) stream responses
 # Note: This adds disk I/O overhead, only enable when debugging
 AUTOMAKER_DEBUG_RAW_OUTPUT=false
--- a/apps/server/src/routes/features/index.ts
+++ b/apps/server/src/routes/features/index.ts
@@ -10,7 +10,7 @@ import { createGetHandler } from './routes/get.js';
 import { createCreateHandler } from './routes/create.js';
 import { createUpdateHandler } from './routes/update.js';
 import { createDeleteHandler } from './routes/delete.js';
-import { createAgentOutputHandler } from './routes/agent-output.js';
+import { createAgentOutputHandler, createRawOutputHandler } from './routes/agent-output.js';
 import { createGenerateTitleHandler } from './routes/generate-title.js';
 export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
@@ -22,6 +22,7 @@ export function createFeaturesRoutes(featureLoader: FeatureLoader): Router {
  router.post('/update', validatePathParams('projectPath'), createUpdateHandler(featureLoader));
  router.post('/delete', validatePathParams('projectPath'), createDeleteHandler(featureLoader));
  router.post('/agent-output', createAgentOutputHandler(featureLoader));
  router.post('/raw-output', createRawOutputHandler(featureLoader));
  router.post('/generate-title', createGenerateTitleHandler());
  return router;
--- a/apps/server/src/routes/features/routes/agent-output.ts
+++ b/apps/server/src/routes/features/routes/agent-output.ts
@@ -1,5 +1,6 @@
 /**
 * POST /agent-output endpoint - Get agent output for a feature
 * POST /raw-output endpoint - Get raw JSONL output for debugging
 */
 import type { Request, Response } from 'express';
@@ -30,3 +31,31 @@ export function createAgentOutputHandler(featureLoader: FeatureLoader) {
    }
  };
 }
 /**
 * Handler for getting raw JSONL output for debugging
 */
 export function createRawOutputHandler(featureLoader: FeatureLoader) {
  return async (req: Request, res: Response): Promise<void> => {
    try {
      const { projectPath, featureId } = req.body as {
        projectPath: string;
        featureId: string;
      };
      if (!projectPath || !featureId) {
        res.status(400).json({
          success: false,
          error: 'projectPath and featureId are required',
        });
        return;
      }
      const content = await featureLoader.getRawOutput(projectPath, featureId);
      res.json({ success: true, content });
    } catch (error) {
      logError(error, 'Get raw output failed');
      res.status(500).json({ success: false, error: getErrorMessage(error) });
    }
  };
 }
--- a/apps/server/src/services/auto-mode-service.ts
+++ b/apps/server/src/services/auto-mode-service.ts
@@ -1917,11 +1917,49 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
    // Note: We use projectPath here, not workDir, because workDir might be a worktree path
    const featureDirForOutput = getFeatureDir(projectPath, featureId);
    const outputPath = path.join(featureDirForOutput, 'agent-output.md');
    const rawOutputPath = path.join(featureDirForOutput, 'raw-output.jsonl');
    // Raw output logging is configurable via environment variable
    // Set AUTOMAKER_DEBUG_RAW_OUTPUT=true to enable raw stream event logging
    const enableRawOutput =
      process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === 'true' ||
      process.env.AUTOMAKER_DEBUG_RAW_OUTPUT === '1';
    // Incremental file writing state
    let writeTimeout: ReturnType<typeof setTimeout> | null = null;
    const WRITE_DEBOUNCE_MS = 500; // Batch writes every 500ms
    // Raw output accumulator for debugging (NDJSON format)
    let rawOutputLines: string[] = [];
    let rawWriteTimeout: ReturnType<typeof setTimeout> | null = null;
    // Helper to append raw stream event for debugging (only when enabled)
    const appendRawEvent = (event: unknown): void => {
      if (!enableRawOutput) return;
      try {
        const timestamp = new Date().toISOString();
        const rawLine = JSON.stringify({ timestamp, event }, null, 4); // Pretty print for readability
        rawOutputLines.push(rawLine);
        // Debounced write of raw output
        if (rawWriteTimeout) {
          clearTimeout(rawWriteTimeout);
        }
        rawWriteTimeout = setTimeout(async () => {
          try {
            await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
            await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
            rawOutputLines = []; // Clear after writing
          } catch (error) {
            console.error(`[AutoMode] Failed to write raw output for ${featureId}:`, error);
          }
        }, WRITE_DEBOUNCE_MS);
      } catch {
        // Ignore serialization errors
      }
    };
    // Helper to write current responseText to file
    const writeToFile = async (): Promise<void> => {
      try {
@@ -1943,19 +1981,65 @@ This mock response was generated because AUTOMAKER_MOCK_AGENT=true was set.
      }, WRITE_DEBOUNCE_MS);
    };
    // Track last text block for deduplication (Cursor sends duplicates)
    let lastTextBlock = '';
    streamLoop: for await (const msg of stream) {
      // Log raw stream event for debugging
      appendRawEvent(msg);
      if (msg.type === 'assistant' && msg.message?.content) {
        for (const block of msg.message.content) {
          if (block.type === 'text') {
-            // Add separator before new text if we already have content and it doesn't end with newlines
+            const newText = block.text || '';
-            if (responseText.length > 0 && !responseText.endsWith('\n\n')) {
+
-              if (responseText.endsWith('\n')) {
+            // Skip empty text
-                responseText += '\n';
+            if (!newText) continue;
-              } else {
+
            // Cursor-specific: Skip duplicate consecutive text blocks
            // Cursor often sends the same text twice in a row
            if (newText === lastTextBlock) {
              continue;
            }
            // Cursor-specific: Skip final accumulated text block
            // At the end, Cursor sends one large block containing ALL previous text
            // Detect by checking if this block contains most of responseText
            if (
              responseText.length > 100 &&
              newText.length > responseText.length * 0.8 &&
              responseText.trim().length > 0
            ) {
              // Check if this looks like accumulated text (contains our existing content)
              const normalizedResponse = responseText.replace(/\s+/g, ' ').trim();
              const normalizedNew = newText.replace(/\s+/g, ' ').trim();
              if (normalizedNew.includes(normalizedResponse.slice(0, 100))) {
                // This is the final accumulated block, skip it
                continue;
              }
            }
            lastTextBlock = newText;
            // Only add separator when we're at a natural paragraph break:
            // - Previous text ends with sentence terminator AND new text starts a new thought
            // - Don't add separators mid-word or mid-sentence (for streaming providers like Cursor)
            if (responseText.length > 0 && newText.length > 0) {
              const lastChar = responseText.slice(-1);
              const endsWithSentence = /[.!?:]\s*$/.test(responseText);
              const endsWithNewline = /\n\s*$/.test(responseText);
              const startsNewParagraph = /^[\n#\-*>]/.test(newText);
              // Add paragraph break only at natural boundaries
              if (
                !endsWithNewline &&
                (endsWithSentence || startsNewParagraph) &&
                !/[a-zA-Z0-9]/.test(lastChar) // Not mid-word
              ) {
                responseText += '\n\n';
              }
            }
-            responseText += block.text || '';
+            responseText += newText;
            // Check for authentication errors in the response
            if (
@@ -2431,6 +2515,21 @@ Implement all the changes described in the plan above.`;
    }
    // Final write - ensure all accumulated content is saved
    await writeToFile();
    // Flush remaining raw output (only if enabled)
    if (enableRawOutput) {
      if (rawWriteTimeout) {
        clearTimeout(rawWriteTimeout);
      }
      if (rawOutputLines.length > 0) {
        try {
          await secureFs.mkdir(path.dirname(rawOutputPath), { recursive: true });
          await secureFs.appendFile(rawOutputPath, rawOutputLines.join('\n') + '\n');
        } catch (error) {
          console.error(`[AutoMode] Failed to write final raw output for ${featureId}:`, error);
        }
      }
    }
  }
  private async executeFeatureWithContext(
--- a/apps/server/src/services/feature-loader.ts
+++ b/apps/server/src/services/feature-loader.ts
@@ -158,6 +158,13 @@ export class FeatureLoader {
    return path.join(this.getFeatureDir(projectPath, featureId), 'agent-output.md');
  }
  /**
   * Get the path to a feature's raw-output.jsonl file
   */
  getRawOutputPath(projectPath: string, featureId: string): string {
    return path.join(this.getFeatureDir(projectPath, featureId), 'raw-output.jsonl');
  }
  /**
   * Generate a new feature ID
   */
@@ -357,6 +364,23 @@ export class FeatureLoader {
    }
  }
  /**
   * Get raw output for a feature (JSONL format for debugging)
   */
  async getRawOutput(projectPath: string, featureId: string): Promise<string | null> {
    try {
      const rawOutputPath = this.getRawOutputPath(projectPath, featureId);
      const content = (await secureFs.readFile(rawOutputPath, 'utf-8')) as string;
      return content;
    } catch (error) {
      if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
        return null;
      }
      logger.error(`[FeatureLoader] Failed to get raw output for ${featureId}:`, error);
      throw error;
    }
  }
  /**
   * Save agent output for a feature
   */
--- a/apps/ui/src/lib/agent-context-parser.ts
+++ b/apps/ui/src/lib/agent-context-parser.ts
@@ -130,38 +130,60 @@ function getCurrentPhase(content: string): 'planning' | 'action' | 'verification
  return 'planning';
 }
 /**
 * Cleans up fragmented streaming text by removing spurious newlines
 * This handles cases where streaming providers send partial text chunks
 * that got separated by newlines during accumulation
 */
 function cleanFragmentedText(content: string): string {
  // Remove newlines that break up words (newline between letters)
  // e.g., "sum\n\nmary" -> "summary"
  let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
  // Also clean up fragmented XML-like tags
  // e.g., "<sum\n\nmary>" -> "<summary>"
  cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
  cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
  return cleaned;
 }
 /**
 * Extracts a summary from completed feature context
 * Looks for content between <summary> and </summary> tags
 */
 function extractSummary(content: string): string | undefined {
  // First, clean up any fragmented text from streaming
  const cleanedContent = cleanFragmentedText(content);
  // Look for <summary> tags - capture everything between opening and closing tags
-  const summaryTagMatch = content.match(/<summary>([\s\S]*?)<\/summary>/i);
+  const summaryTagMatch = cleanedContent.match(/<summary>([\s\S]*?)<\/summary>/i);
  if (summaryTagMatch) {
-    return summaryTagMatch[1].trim();
+    // Clean up the extracted summary content as well
    return cleanFragmentedText(summaryTagMatch[1]).trim();
  }
  // Fallback: Look for summary sections - capture everything including subsections (###)
  // Stop at same-level ## sections (but not ###), or tool markers, or end
-  const summaryMatch = content.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
+  const summaryMatch = cleanedContent.match(/## Summary[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i);
  if (summaryMatch) {
-    return summaryMatch[1].trim();
+    return cleanFragmentedText(summaryMatch[1]).trim();
  }
  // Look for completion markers and extract surrounding text
-  const completionMatch = content.match(
+  const completionMatch = cleanedContent.match(
    /✓ (?:Feature|Verification|Task) (?:successfully|completed|verified)[^\n]*(?:\n[^\n]{1,200})?/i
  );
  if (completionMatch) {
-    return completionMatch[0].trim();
+    return cleanFragmentedText(completionMatch[0]).trim();
  }
  // Look for "What was done" type sections
-  const whatWasDoneMatch = content.match(
+  const whatWasDoneMatch = cleanedContent.match(
    /(?:What was done|Changes made|Implemented)[^\n]*\n([\s\S]*?)(?=\n## [^#]|\n🔧|$)/i
  );
  if (whatWasDoneMatch) {
-    return whatWasDoneMatch[1].trim();
+    return cleanFragmentedText(whatWasDoneMatch[1]).trim();
  }
  return undefined;
--- a/apps/ui/src/lib/log-parser.ts
+++ b/apps/ui/src/lib/log-parser.ts
@@ -11,6 +11,24 @@ import type {
  CursorResultEvent,
 } from '@automaker/types';
 /**
 * Cleans up fragmented streaming text by removing spurious newlines
 * This handles cases where streaming providers send partial text chunks
 * that got separated by newlines during accumulation
 */
 function cleanFragmentedText(content: string): string {
  // Remove newlines that break up words (newline between letters)
  // e.g., "sum\n\nmary" -> "summary"
  let cleaned = content.replace(/([a-zA-Z])\n+([a-zA-Z])/g, '$1$2');
  // Also clean up fragmented XML-like tags
  // e.g., "<sum\n\nmary>" -> "<summary>"
  cleaned = cleaned.replace(/<([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '<$1$2>');
  cleaned = cleaned.replace(/<\/([a-zA-Z]+)\n*([a-zA-Z]*)\n*>/g, '</$1$2>');
  return cleaned;
 }
 export type LogEntryType =
  | 'prompt'
  | 'tool_call'
@@ -100,6 +118,8 @@ const generateDeterministicId = (content: string, lineIndex: number): string =>
 */
 function detectEntryType(content: string): LogEntryType {
  const trimmed = content.trim();
  // Clean fragmented text for pattern matching
  const cleaned = cleanFragmentedText(trimmed);
  // Tool calls
  if (trimmed.startsWith('🔧 Tool:') || trimmed.match(/^Tool:\s*/)) {
@@ -142,14 +162,17 @@ function detectEntryType(content: string): LogEntryType {
  }
  // Success messages and summary sections
  // Check both raw and cleaned content for summary tags (handles fragmented streaming)
  if (
    trimmed.startsWith('✅') ||
    trimmed.toLowerCase().includes('success') ||
    trimmed.toLowerCase().includes('completed') ||
-    // Summary tags (preferred format from agent)
+    // Summary tags (preferred format from agent) - check both raw and cleaned
    trimmed.startsWith('<summary>') ||
    cleaned.startsWith('<summary>') ||
    // Markdown summary headers (fallback)
    trimmed.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
    cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
    trimmed.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i)
  ) {
    return 'success';
@@ -585,6 +608,9 @@ export function shouldCollapseByDefault(entry: LogEntry): boolean {
 * Generates a title for a log entry
 */
 function generateTitle(type: LogEntryType, content: string): string {
  // Clean content for pattern matching
  const cleaned = cleanFragmentedText(content);
  switch (type) {
    case 'tool_call': {
      const toolName = extractToolName(content);
@@ -607,11 +633,19 @@ function generateTitle(type: LogEntryType, content: string): string {
    case 'error':
      return 'Error';
    case 'success': {
-      // Check if it's a summary section
+      // Check if it's a summary section (check both raw and cleaned)
-      if (content.startsWith('<summary>') || content.includes('<summary>')) {
+      if (
        content.startsWith('<summary>') ||
        content.includes('<summary>') ||
        cleaned.startsWith('<summary>') ||
        cleaned.includes('<summary>')
      ) {
        return 'Summary';
      }
-      if (content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)) {
+      if (
        content.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
        cleaned.match(/^##\s+(Summary|Feature|Changes|Implementation)/i)
      ) {
        return 'Summary';
      }
      if (
@@ -803,10 +837,12 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
      trimmedLine.match(/\[Status\]/i) ||
      trimmedLine.toLowerCase().includes('ultrathink preparation') ||
      trimmedLine.match(/thinking level[:\s]*(low|medium|high|none|\d)/i) ||
-      // Summary tags (preferred format from agent)
+      // Summary tags (preferred format from agent) - check both raw and cleaned for fragmented streaming
      trimmedLine.startsWith('<summary>') ||
      cleanFragmentedText(trimmedLine).startsWith('<summary>') ||
      // Agent summary sections (markdown headers - fallback)
      trimmedLine.match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
      cleanFragmentedText(trimmedLine).match(/^##\s+(Summary|Feature|Changes|Implementation)/i) ||
      // Summary introduction lines
      trimmedLine.match(/^All tasks completed/i) ||
      trimmedLine.match(/^(I've|I have) (successfully |now )?(completed|finished|implemented)/i);
@@ -834,7 +870,13 @@ export function parseLogOutput(rawOutput: string): LogEntry[] {
      currentContent.push(trimmedLine);
      // If this is a <summary> tag, start summary accumulation mode
-      if (trimmedLine.startsWith('<summary>') && !trimmedLine.includes('</summary>')) {
+      // Check both raw and cleaned for fragmented streaming
      const cleanedTrimmed = cleanFragmentedText(trimmedLine);
      if (
        (trimmedLine.startsWith('<summary>') || cleanedTrimmed.startsWith('<summary>')) &&
        !trimmedLine.includes('</summary>') &&
        !cleanedTrimmed.includes('</summary>')
      ) {
        inSummaryAccumulation = true;
      }
    } else if (isInputLine && currentEntry) {