Fix agent output summary for pipeline steps (#812)

* Changes from fix/agent-output-summary-for-pipeline-steps * feat: Optimize pipeline summary extraction and fix regex vulnerability * fix: Use fallback summary for pipeline steps when extraction fails * fix: Strip follow-up session scaffold from pipeline step fallback summaries
2026-03-19 10:43:08 +00:00 · 2026-02-25 22:13:38 -08:00
parent 70c9fd77f6
commit 9747faf1b9
37 changed files with 7164 additions and 163 deletions
--- a/apps/server/src/index.ts
+++ b/apps/server/src/index.ts
@@ -434,21 +434,18 @@ eventHookService.initialize(events, settingsService, eventHistoryService, featur
          logger.info('[STARTUP] Feature state reconciliation complete - no stale states found');
        }

-        // Resume interrupted features in the background after reconciliation.
-        // This uses the saved execution state to identify features that were running
-        // before the restart (their statuses have been reset to ready/backlog by
-        // reconciliation above). Running in background so it doesn't block startup.
-        if (totalReconciled > 0) {
-          for (const project of globalSettings.projects) {
-            autoModeService.resumeInterruptedFeatures(project.path).catch((err) => {
-              logger.warn(
-                `[STARTUP] Failed to resume interrupted features for ${project.path}:`,
-                err
-              );
-            });
-          }
-          logger.info('[STARTUP] Initiated background resume of interrupted features');
+        // Resume interrupted features in the background for all projects.
+        // This handles features stuck in transient states (in_progress, pipeline_*)
+        // or explicitly marked as interrupted. Running in background so it doesn't block startup.
+        for (const project of globalSettings.projects) {
+          autoModeService.resumeInterruptedFeatures(project.path).catch((err) => {
+            logger.warn(
+              `[STARTUP] Failed to resume interrupted features for ${project.path}:`,
+              err
+            );
+          });
        }
+        logger.info('[STARTUP] Initiated background resume of interrupted features');
      }
    } catch (err) {
      logger.warn('[STARTUP] Failed to reconcile feature states:', err);
--- a/apps/server/src/services/agent-executor-types.ts
+++ b/apps/server/src/services/agent-executor-types.ts
@@ -44,6 +44,8 @@ export interface AgentExecutionOptions {
  specAlreadyDetected?: boolean;
  existingApprovedPlanContent?: string;
  persistedTasks?: ParsedTask[];
+  /** Feature status - used to check if pipeline summary extraction is required */
+  status?: string;
 }

 export interface AgentExecutionResult {
--- a/apps/server/src/services/agent-executor.ts
+++ b/apps/server/src/services/agent-executor.ts
@@ -4,6 +4,7 @@

 import path from 'path';
 import type { ExecuteOptions, ParsedTask } from '@automaker/types';
+import { isPipelineStatus } from '@automaker/types';
 import { buildPromptWithImages, createLogger, isAuthenticationError } from '@automaker/utils';
 import { getFeatureDir } from '@automaker/platform';
 import * as secureFs from '../lib/secure-fs.js';
@@ -91,6 +92,7 @@ export class AgentExecutor {
      existingApprovedPlanContent,
      persistedTasks,
      credentials,
+      status, // Feature status for pipeline summary check
      claudeCompatibleProvider,
      mcpServers,
      sdkSessionId,
@@ -207,6 +209,17 @@ export class AgentExecutor {
      if (writeTimeout) clearTimeout(writeTimeout);
      if (rawWriteTimeout) clearTimeout(rawWriteTimeout);
      await writeToFile();
+
+      // Extract and save summary from the new content generated in this session
+      await this.extractAndSaveSessionSummary(
+        projectPath,
+        featureId,
+        result.responseText,
+        previousContent,
+        callbacks,
+        status
+      );
+
      return {
        responseText: result.responseText,
        specDetected: true,
@@ -340,9 +353,78 @@ export class AgentExecutor {
        }
      }
    }
+
+    // Capture summary if it hasn't been captured by handleSpecGenerated or executeTasksLoop
+    // or if we're in a simple execution mode (planningMode='skip')
+    await this.extractAndSaveSessionSummary(
+      projectPath,
+      featureId,
+      responseText,
+      previousContent,
+      callbacks,
+      status
+    );
+
    return { responseText, specDetected, tasksCompleted, aborted };
  }

+  /**
+   * Strip the follow-up session scaffold marker from content.
+   * The scaffold is added when resuming a session with previous content:
+   *   "\n\n---\n\n## Follow-up Session\n\n"
+   * This ensures fallback summaries don't include the scaffold header.
+   *
+   * The regex pattern handles variations in whitespace while matching the
+   * scaffold structure: dashes followed by "## Follow-up Session" at the
+   * start of the content.
+   */
+  private static stripFollowUpScaffold(content: string): string {
+    // Pattern matches: ^\s*---\s*##\s*Follow-up Session\s*
+    // - ^ = start of content (scaffold is always at the beginning of sessionContent)
+    // - \s* = any whitespace (handles \n\n before ---, spaces/tabs between markers)
+    // - --- = literal dashes
+    // - \s* = whitespace between dashes and heading
+    // - ## = heading marker
+    // - \s* = whitespace before "Follow-up"
+    // - Follow-up Session = literal heading text
+    // - \s* = trailing whitespace/newlines after heading
+    const scaffoldPattern = /^\s*---\s*##\s*Follow-up Session\s*/;
+    return content.replace(scaffoldPattern, '');
+  }
+
+  /**
+   * Extract summary ONLY from the new content generated in this session
+   * and save it via the provided callback.
+   */
+  private async extractAndSaveSessionSummary(
+    projectPath: string,
+    featureId: string,
+    responseText: string,
+    previousContent: string | undefined,
+    callbacks: AgentExecutorCallbacks,
+    status?: string
+  ): Promise<void> {
+    const sessionContent = responseText.substring(previousContent ? previousContent.length : 0);
+    const summary = extractSummary(sessionContent);
+    if (summary) {
+      await callbacks.saveFeatureSummary(projectPath, featureId, summary);
+      return;
+    }
+
+    // If we're in a pipeline step, a summary is expected. Use a fallback if extraction fails.
+    if (isPipelineStatus(status)) {
+      // Strip any follow-up session scaffold before using as fallback
+      const cleanSessionContent = AgentExecutor.stripFollowUpScaffold(sessionContent);
+      const fallback = cleanSessionContent.trim();
+      if (fallback) {
+        await callbacks.saveFeatureSummary(projectPath, featureId, fallback);
+      }
+      logger.warn(
+        `[AgentExecutor] Mandatory summary extraction failed for pipeline feature ${featureId} (status="${status}")`
+      );
+    }
+  }
+
  private async executeTasksLoop(
    options: AgentExecutionOptions,
    tasks: ParsedTask[],
@@ -439,14 +521,15 @@ export class AgentExecutor {
                }
              }
              if (!taskCompleteDetected) {
-                const cid = detectTaskCompleteMarker(taskOutput);
-                if (cid) {
+                const completeMarker = detectTaskCompleteMarker(taskOutput);
+                if (completeMarker) {
                  taskCompleteDetected = true;
                  await this.featureStateManager.updateTaskStatus(
                    projectPath,
                    featureId,
-                    cid,
-                    'completed'
+                    completeMarker.id,
+                    'completed',
+                    completeMarker.summary
                  );
                }
              }
@@ -524,8 +607,6 @@ export class AgentExecutor {
        }
      }
    }
-    const summary = extractSummary(responseText);
-    if (summary) await callbacks.saveFeatureSummary(projectPath, featureId, summary);
    return { responseText, tasksCompleted, aborted: false };
  }

@@ -722,8 +803,6 @@ export class AgentExecutor {
      );
      responseText = r.responseText;
    }
-    const summary = extractSummary(responseText);
-    if (summary) await callbacks.saveFeatureSummary(projectPath, featureId, summary);
    return { responseText, tasksCompleted };
  }

--- a/apps/server/src/services/auto-mode/facade.ts
+++ b/apps/server/src/services/auto-mode/facade.ts
@@ -15,7 +15,12 @@ import path from 'path';
 import { exec } from 'child_process';
 import { promisify } from 'util';
 import type { Feature, PlanningMode, ThinkingLevel, ReasoningEffort } from '@automaker/types';
-import { DEFAULT_MAX_CONCURRENCY, DEFAULT_MODELS, stripProviderPrefix } from '@automaker/types';
+import {
+  DEFAULT_MAX_CONCURRENCY,
+  DEFAULT_MODELS,
+  stripProviderPrefix,
+  isPipelineStatus,
+} from '@automaker/types';
 import { resolveModelString } from '@automaker/model-resolver';
 import { createLogger, loadContextFiles, classifyError } from '@automaker/utils';
 import { getFeatureDir } from '@automaker/platform';
@@ -79,6 +84,37 @@ export class AutoModeServiceFacade {
    private readonly settingsService: SettingsService | null
  ) {}

+  /**
+   * Determine if a feature is eligible to be picked up by the auto-mode loop.
+   *
+   * @param feature - The feature to check
+   * @param branchName - The current worktree branch name (null for main)
+   * @param primaryBranch - The resolved primary branch name for the project
+   * @returns True if the feature is eligible for auto-dispatch
+   */
+  public static isFeatureEligibleForAutoMode(
+    feature: Feature,
+    branchName: string | null,
+    primaryBranch: string | null
+  ): boolean {
+    const isEligibleStatus =
+      feature.status === 'backlog' ||
+      feature.status === 'ready' ||
+      feature.status === 'interrupted' ||
+      isPipelineStatus(feature.status);
+
+    if (!isEligibleStatus) return false;
+
+    // Filter by branch/worktree alignment
+    if (branchName === null) {
+      // For main worktree, include features with no branch or matching primary branch
+      return !feature.branchName || (primaryBranch != null && feature.branchName === primaryBranch);
+    } else {
+      // For named worktrees, only include features matching that branch
+      return feature.branchName === branchName;
+    }
+  }
+
  /**
   * Classify and log an error at the facade boundary.
   * Emits an error event to the UI so failures are surfaced to the user.
@@ -217,6 +253,7 @@ export class AutoModeServiceFacade {
          thinkingLevel?: ThinkingLevel;
          reasoningEffort?: ReasoningEffort;
          branchName?: string | null;
+          status?: string; // Feature status for pipeline summary check
          [key: string]: unknown;
        }
      ): Promise<void> => {
@@ -300,6 +337,7 @@ export class AutoModeServiceFacade {
            thinkingLevel: opts?.thinkingLevel as ThinkingLevel | undefined,
            reasoningEffort: opts?.reasoningEffort as ReasoningEffort | undefined,
            branchName: opts?.branchName as string | null | undefined,
+            status: opts?.status as string | undefined,
            provider,
            effectiveBareModel,
            credentials,
@@ -373,12 +411,8 @@ export class AutoModeServiceFacade {
        if (branchName === null) {
          primaryBranch = await worktreeResolver.getCurrentBranch(pPath);
        }
-        return features.filter(
-          (f) =>
-            (f.status === 'backlog' || f.status === 'ready') &&
-            (branchName === null
-              ? !f.branchName || (primaryBranch && f.branchName === primaryBranch)
-              : f.branchName === branchName)
+        return features.filter((f) =>
+          AutoModeServiceFacade.isFeatureEligibleForAutoMode(f, branchName, primaryBranch)
        );
      },
      (pPath, branchName, maxConcurrency) =>
--- a/apps/server/src/services/execution-service.ts
+++ b/apps/server/src/services/execution-service.ts
@@ -461,7 +461,10 @@ Please continue from where you left off and complete all remaining tasks. Use th
      const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;

      try {
-        if (agentOutput) {
+        // Only save summary if feature doesn't already have one (e.g., accumulated from pipeline steps)
+        // This prevents overwriting accumulated summaries with just the last step's output
+        // The agent-executor already extracts and saves summaries during execution
+        if (agentOutput && !completedFeature?.summary) {
          const summary = extractSummary(agentOutput);
          if (summary) await this.saveFeatureSummaryFn(projectPath, featureId, summary);
        }
--- a/apps/server/src/services/feature-state-manager.ts
+++ b/apps/server/src/services/feature-state-manager.ts
@@ -14,7 +14,8 @@
 */

 import path from 'path';
-import type { Feature, ParsedTask, PlanSpec } from '@automaker/types';
+import type { Feature, FeatureStatusWithPipeline, ParsedTask, PlanSpec } from '@automaker/types';
+import { isPipelineStatus } from '@automaker/types';
 import {
  atomicWriteJson,
  readJsonWithRecovery,
@@ -28,6 +29,7 @@ import type { EventEmitter } from '../lib/events.js';
 import type { AutoModeEventType } from './typed-event-bus.js';
 import { getNotificationService } from './notification-service.js';
 import { FeatureLoader } from './feature-loader.js';
+import { pipelineService } from './pipeline-service.js';

 const logger = createLogger('FeatureStateManager');

@@ -252,7 +254,7 @@ export class FeatureStateManager {
    const currentStatus = feature?.status;

    // Preserve pipeline_* statuses so resumePipelineFeature can resume from the correct step
-    if (currentStatus && currentStatus.startsWith('pipeline_')) {
+    if (isPipelineStatus(currentStatus)) {
      logger.info(
        `Feature ${featureId} was in ${currentStatus}; preserving pipeline status for resume`
      );
@@ -270,7 +272,8 @@ export class FeatureStateManager {

  /**
   * Shared helper that scans features in a project directory and resets any stuck
-   * in transient states (in_progress, interrupted, pipeline_*) back to resting states.
+   * in transient states (in_progress, interrupted) back to resting states.
+   * Pipeline_* statuses are preserved so they can be resumed.
   *
   * Also resets:
   * - generating planSpec status back to pending
@@ -324,10 +327,7 @@ export class FeatureStateManager {

        // Reset features in active execution states back to a resting state
        // After a server restart, no processes are actually running
-        const isActiveState =
-          originalStatus === 'in_progress' ||
-          originalStatus === 'interrupted' ||
-          (originalStatus != null && originalStatus.startsWith('pipeline_'));
+        const isActiveState = originalStatus === 'in_progress' || originalStatus === 'interrupted';

        if (isActiveState) {
          const hasApprovedPlan = feature.planSpec?.status === 'approved';
@@ -338,6 +338,17 @@ export class FeatureStateManager {
          );
        }

+        // Handle pipeline_* statuses separately: preserve them so they can be resumed
+        // but still count them as needing attention if they were stuck.
+        if (isPipelineStatus(originalStatus)) {
+          // We don't change the status, but we still want to reset planSpec/task states
+          // if they were stuck in transient generation/execution modes.
+          // No feature.status change here.
+          logger.debug(
+            `[${callerLabel}] Preserving pipeline status for feature ${feature.id}: ${originalStatus}`
+          );
+        }
+
        // Reset generating planSpec status back to pending (spec generation was interrupted)
        if (feature.planSpec?.status === 'generating') {
          feature.planSpec.status = 'pending';
@@ -396,10 +407,12 @@ export class FeatureStateManager {
   * Resets:
   * - in_progress features back to ready (if has plan) or backlog (if no plan)
   * - interrupted features back to ready (if has plan) or backlog (if no plan)
-   * - pipeline_* features back to ready (if has plan) or backlog (if no plan)
   * - generating planSpec status back to pending
   * - in_progress tasks back to pending
   *
+   * Preserves:
+   * - pipeline_* statuses (so resumePipelineFeature can resume from correct step)
+   *
   * @param projectPath - The project path to reset features for
   */
  async resetStuckFeatures(projectPath: string): Promise<void> {
@@ -530,6 +543,10 @@ export class FeatureStateManager {
   * This is called after agent execution completes to save a summary
   * extracted from the agent's output using <summary> tags.
   *
+   * For pipeline features (status starts with pipeline_), summaries are accumulated
+   * across steps with a header identifying each step. For non-pipeline features,
+   * the summary is replaced entirely.
+   *
   * @param projectPath - The project path
   * @param featureId - The feature ID
   * @param summary - The summary text to save
@@ -537,6 +554,7 @@ export class FeatureStateManager {
  async saveFeatureSummary(projectPath: string, featureId: string, summary: string): Promise<void> {
    const featureDir = getFeatureDir(projectPath, featureId);
    const featurePath = path.join(featureDir, 'feature.json');
+    const normalizedSummary = summary.trim();

    try {
      const result = await readJsonWithRecovery<Feature | null>(featurePath, null, {
@@ -552,7 +570,63 @@ export class FeatureStateManager {
        return;
      }

-      feature.summary = summary;
+      if (!normalizedSummary) {
+        logger.debug(
+          `[saveFeatureSummary] Skipping empty summary for feature ${featureId} (status="${feature.status}")`
+        );
+        return;
+      }
+
+      // For pipeline features, accumulate summaries across steps
+      if (isPipelineStatus(feature.status)) {
+        // If we already have a non-phase summary (typically the initial implementation
+        // summary from in_progress), normalize it into a named phase before appending
+        // pipeline step summaries. This keeps the format consistent for UI phase parsing.
+        const implementationHeader = '### Implementation';
+        if (feature.summary && !feature.summary.trimStart().startsWith('### ')) {
+          feature.summary = `${implementationHeader}\n\n${feature.summary.trim()}`;
+        }
+
+        const stepName = await this.getPipelineStepName(projectPath, feature.status);
+        const stepHeader = `### ${stepName}`;
+        const stepSection = `${stepHeader}\n\n${normalizedSummary}`;
+
+        if (feature.summary) {
+          // Check if this step already exists in the summary (e.g., if retried)
+          // Use section splitting to only match real section boundaries, not text in body content
+          const separator = '\n\n---\n\n';
+          const sections = feature.summary.split(separator);
+          let replaced = false;
+          const updatedSections = sections.map((section) => {
+            if (section.startsWith(`${stepHeader}\n\n`)) {
+              replaced = true;
+              return stepSection;
+            }
+            return section;
+          });
+
+          if (replaced) {
+            feature.summary = updatedSections.join(separator);
+            logger.info(
+              `[saveFeatureSummary] Updated existing pipeline step summary for feature ${featureId}: step="${stepName}"`
+            );
+          } else {
+            // Append as a new section
+            feature.summary = `${feature.summary}${separator}${stepSection}`;
+            logger.info(
+              `[saveFeatureSummary] Appended new pipeline step summary for feature ${featureId}: step="${stepName}"`
+            );
+          }
+        } else {
+          feature.summary = stepSection;
+          logger.info(
+            `[saveFeatureSummary] Initialized pipeline summary for feature ${featureId}: step="${stepName}"`
+          );
+        }
+      } else {
+        feature.summary = normalizedSummary;
+      }
+
      feature.updatedAt = new Date().toISOString();

      // PERSIST BEFORE EMIT
@@ -562,13 +636,42 @@ export class FeatureStateManager {
      this.emitAutoModeEvent('auto_mode_summary', {
        featureId,
        projectPath,
-        summary,
+        summary: feature.summary,
      });
    } catch (error) {
      logger.error(`Failed to save summary for ${featureId}:`, error);
    }
  }

+  /**
+   * Look up the pipeline step name from the current pipeline status.
+   *
+   * @param projectPath - The project path
+   * @param status - The current pipeline status (e.g., 'pipeline_abc123')
+   * @returns The step name, or a fallback based on the step ID
+   */
+  private async getPipelineStepName(projectPath: string, status: string): Promise<string> {
+    try {
+      const stepId = pipelineService.getStepIdFromStatus(status as FeatureStatusWithPipeline);
+      if (stepId) {
+        const step = await pipelineService.getStep(projectPath, stepId);
+        if (step) return step.name;
+      }
+    } catch (error) {
+      logger.debug(
+        `[getPipelineStepName] Failed to look up step name for status "${status}", using fallback:`,
+        error
+      );
+    }
+    // Fallback: derive a human-readable name from the status suffix
+    // e.g., 'pipeline_code_review' → 'Code Review'
+    const suffix = status.replace('pipeline_', '');
+    return suffix
+      .split('_')
+      .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
+      .join(' ');
+  }
+
  /**
   * Update the status of a specific task within planSpec.tasks
   *
@@ -581,7 +684,8 @@ export class FeatureStateManager {
    projectPath: string,
    featureId: string,
    taskId: string,
-    status: ParsedTask['status']
+    status: ParsedTask['status'],
+    summary?: string
  ): Promise<void> {
    const featureDir = getFeatureDir(projectPath, featureId);
    const featurePath = path.join(featureDir, 'feature.json');
@@ -604,6 +708,9 @@ export class FeatureStateManager {
      const task = feature.planSpec.tasks.find((t) => t.id === taskId);
      if (task) {
        task.status = status;
+        if (summary) {
+          task.summary = summary;
+        }
        feature.updatedAt = new Date().toISOString();

        // PERSIST BEFORE EMIT
@@ -615,6 +722,7 @@ export class FeatureStateManager {
          projectPath,
          taskId,
          status,
+          summary,
          tasks: feature.planSpec.tasks,
        });
      } else {
--- a/apps/server/src/services/pipeline-orchestrator.ts
+++ b/apps/server/src/services/pipeline-orchestrator.ts
@@ -115,6 +115,7 @@ export class PipelineOrchestrator {
        projectPath,
      });
      const model = resolveModelString(feature.model, DEFAULT_MODELS.claude);
+      const currentStatus = `pipeline_${step.id}`;
      await this.runAgentFn(
        workDir,
        featureId,
@@ -133,6 +134,7 @@ export class PipelineOrchestrator {
          useClaudeCodeSystemPrompt,
          thinkingLevel: feature.thinkingLevel,
          reasoningEffort: feature.reasoningEffort,
+          status: currentStatus,
        }
      );
      try {
@@ -165,7 +167,18 @@ export class PipelineOrchestrator {
    if (previousContext) prompt += `### Previous Work\n${previousContext}\n\n`;
    return (
      prompt +
-      `### Pipeline Step Instructions\n${step.instructions}\n\n### Task\nComplete the pipeline step instructions above.`
+      `### Pipeline Step Instructions\n${step.instructions}\n\n### Task\nComplete the pipeline step instructions above.\n\n` +
+      `**CRITICAL: After completing the instructions, you MUST output a summary using this EXACT format:**\n\n` +
+      `<summary>\n` +
+      `## Summary: ${step.name}\n\n` +
+      `### Changes Implemented\n` +
+      `- [List all changes made in this step]\n\n` +
+      `### Files Modified\n` +
+      `- [List all files modified in this step]\n\n` +
+      `### Outcome\n` +
+      `- [Describe the result of this step]\n` +
+      `</summary>\n\n` +
+      `The <summary> and </summary> tags MUST be on their own lines. This is REQUIRED.`
    );
  }

@@ -491,6 +504,7 @@ export class PipelineOrchestrator {
            useClaudeCodeSystemPrompt: context.useClaudeCodeSystemPrompt,
            autoLoadClaudeMd: context.autoLoadClaudeMd,
            reasoningEffort: context.feature.reasoningEffort,
+            status: context.feature.status,
          }
        );
      }
--- a/apps/server/src/services/spec-parser.ts
+++ b/apps/server/src/services/spec-parser.ts
@@ -101,12 +101,32 @@ export function detectTaskStartMarker(text: string): string | null {
 }

 /**
- * Detect [TASK_COMPLETE] marker in text and extract task ID
+ * Detect [TASK_COMPLETE] marker in text and extract task ID and summary
 * Format: [TASK_COMPLETE] T###: Brief summary
 */
-export function detectTaskCompleteMarker(text: string): string | null {
-  const match = text.match(/\[TASK_COMPLETE\]\s*(T\d{3})/);
-  return match ? match[1] : null;
+export function detectTaskCompleteMarker(text: string): { id: string; summary?: string } | null {
+  // Use a regex that captures the summary until newline or next task marker
+  // Allow brackets in summary content (e.g., "supports array[index] access")
+  // Pattern breakdown:
+  // - \[TASK_COMPLETE\]\s* - Match the marker
+  // - (T\d{3}) - Capture task ID
+  // - (?::\s*([^\n\[]+))? - Optionally capture summary (stops at newline or bracket)
+  // - But we want to allow brackets in summary, so we use a different approach:
+  // - Match summary until newline, then trim any trailing markers in post-processing
+  const match = text.match(/\[TASK_COMPLETE\]\s*(T\d{3})(?::\s*(.+?))?(?=\n|$)/i);
+  if (!match) return null;
+
+  // Post-process: remove trailing task markers from summary if present
+  let summary = match[2]?.trim();
+  if (summary) {
+    // Remove trailing content that looks like another marker
+    summary = summary.replace(/\s*\[TASK_[A-Z_]+\].*$/i, '').trim();
+  }
+
+  return {
+    id: match[1],
+    summary: summary || undefined,
+  };
 }

 /**