Fix agent output summary for pipeline steps (#812)

* Changes from fix/agent-output-summary-for-pipeline-steps

* feat: Optimize pipeline summary extraction and fix regex vulnerability

* fix: Use fallback summary for pipeline steps when extraction fails

* fix: Strip follow-up session scaffold from pipeline step fallback summaries
This commit is contained in:
gsxdsm
2026-02-25 22:13:38 -08:00
committed by GitHub
parent 70c9fd77f6
commit 9747faf1b9
37 changed files with 7164 additions and 163 deletions

View File

@@ -434,21 +434,18 @@ eventHookService.initialize(events, settingsService, eventHistoryService, featur
logger.info('[STARTUP] Feature state reconciliation complete - no stale states found');
}
// Resume interrupted features in the background after reconciliation.
// This uses the saved execution state to identify features that were running
// before the restart (their statuses have been reset to ready/backlog by
// reconciliation above). Running in background so it doesn't block startup.
if (totalReconciled > 0) {
for (const project of globalSettings.projects) {
autoModeService.resumeInterruptedFeatures(project.path).catch((err) => {
logger.warn(
`[STARTUP] Failed to resume interrupted features for ${project.path}:`,
err
);
});
}
logger.info('[STARTUP] Initiated background resume of interrupted features');
// Resume interrupted features in the background for all projects.
// This handles features stuck in transient states (in_progress, pipeline_*)
// or explicitly marked as interrupted. Running in background so it doesn't block startup.
for (const project of globalSettings.projects) {
autoModeService.resumeInterruptedFeatures(project.path).catch((err) => {
logger.warn(
`[STARTUP] Failed to resume interrupted features for ${project.path}:`,
err
);
});
}
logger.info('[STARTUP] Initiated background resume of interrupted features');
}
} catch (err) {
logger.warn('[STARTUP] Failed to reconcile feature states:', err);

View File

@@ -44,6 +44,8 @@ export interface AgentExecutionOptions {
specAlreadyDetected?: boolean;
existingApprovedPlanContent?: string;
persistedTasks?: ParsedTask[];
/** Feature status - used to check if pipeline summary extraction is required */
status?: string;
}
export interface AgentExecutionResult {

View File

@@ -4,6 +4,7 @@
import path from 'path';
import type { ExecuteOptions, ParsedTask } from '@automaker/types';
import { isPipelineStatus } from '@automaker/types';
import { buildPromptWithImages, createLogger, isAuthenticationError } from '@automaker/utils';
import { getFeatureDir } from '@automaker/platform';
import * as secureFs from '../lib/secure-fs.js';
@@ -91,6 +92,7 @@ export class AgentExecutor {
existingApprovedPlanContent,
persistedTasks,
credentials,
status, // Feature status for pipeline summary check
claudeCompatibleProvider,
mcpServers,
sdkSessionId,
@@ -207,6 +209,17 @@ export class AgentExecutor {
if (writeTimeout) clearTimeout(writeTimeout);
if (rawWriteTimeout) clearTimeout(rawWriteTimeout);
await writeToFile();
// Extract and save summary from the new content generated in this session
await this.extractAndSaveSessionSummary(
projectPath,
featureId,
result.responseText,
previousContent,
callbacks,
status
);
return {
responseText: result.responseText,
specDetected: true,
@@ -340,9 +353,78 @@ export class AgentExecutor {
}
}
}
// Capture summary if it hasn't been captured by handleSpecGenerated or executeTasksLoop
// or if we're in a simple execution mode (planningMode='skip')
await this.extractAndSaveSessionSummary(
projectPath,
featureId,
responseText,
previousContent,
callbacks,
status
);
return { responseText, specDetected, tasksCompleted, aborted };
}
/**
* Strip the follow-up session scaffold marker from content.
* The scaffold is added when resuming a session with previous content:
* "\n\n---\n\n## Follow-up Session\n\n"
* This ensures fallback summaries don't include the scaffold header.
*
* The regex pattern handles variations in whitespace while matching the
* scaffold structure: dashes followed by "## Follow-up Session" at the
* start of the content.
*/
private static stripFollowUpScaffold(content: string): string {
// Pattern matches: ^\s*---\s*##\s*Follow-up Session\s*
// - ^ = start of content (scaffold is always at the beginning of sessionContent)
// - \s* = any whitespace (handles \n\n before ---, spaces/tabs between markers)
// - --- = literal dashes
// - \s* = whitespace between dashes and heading
// - ## = heading marker
// - \s* = whitespace before "Follow-up"
// - Follow-up Session = literal heading text
// - \s* = trailing whitespace/newlines after heading
const scaffoldPattern = /^\s*---\s*##\s*Follow-up Session\s*/;
return content.replace(scaffoldPattern, '');
}
/**
* Extract summary ONLY from the new content generated in this session
* and save it via the provided callback.
*/
private async extractAndSaveSessionSummary(
projectPath: string,
featureId: string,
responseText: string,
previousContent: string | undefined,
callbacks: AgentExecutorCallbacks,
status?: string
): Promise<void> {
const sessionContent = responseText.substring(previousContent ? previousContent.length : 0);
const summary = extractSummary(sessionContent);
if (summary) {
await callbacks.saveFeatureSummary(projectPath, featureId, summary);
return;
}
// If we're in a pipeline step, a summary is expected. Use a fallback if extraction fails.
if (isPipelineStatus(status)) {
// Strip any follow-up session scaffold before using as fallback
const cleanSessionContent = AgentExecutor.stripFollowUpScaffold(sessionContent);
const fallback = cleanSessionContent.trim();
if (fallback) {
await callbacks.saveFeatureSummary(projectPath, featureId, fallback);
}
logger.warn(
`[AgentExecutor] Mandatory summary extraction failed for pipeline feature ${featureId} (status="${status}")`
);
}
}
private async executeTasksLoop(
options: AgentExecutionOptions,
tasks: ParsedTask[],
@@ -439,14 +521,15 @@ export class AgentExecutor {
}
}
if (!taskCompleteDetected) {
const cid = detectTaskCompleteMarker(taskOutput);
if (cid) {
const completeMarker = detectTaskCompleteMarker(taskOutput);
if (completeMarker) {
taskCompleteDetected = true;
await this.featureStateManager.updateTaskStatus(
projectPath,
featureId,
cid,
'completed'
completeMarker.id,
'completed',
completeMarker.summary
);
}
}
@@ -524,8 +607,6 @@ export class AgentExecutor {
}
}
}
const summary = extractSummary(responseText);
if (summary) await callbacks.saveFeatureSummary(projectPath, featureId, summary);
return { responseText, tasksCompleted, aborted: false };
}
@@ -722,8 +803,6 @@ export class AgentExecutor {
);
responseText = r.responseText;
}
const summary = extractSummary(responseText);
if (summary) await callbacks.saveFeatureSummary(projectPath, featureId, summary);
return { responseText, tasksCompleted };
}

View File

@@ -15,7 +15,12 @@ import path from 'path';
import { exec } from 'child_process';
import { promisify } from 'util';
import type { Feature, PlanningMode, ThinkingLevel, ReasoningEffort } from '@automaker/types';
import { DEFAULT_MAX_CONCURRENCY, DEFAULT_MODELS, stripProviderPrefix } from '@automaker/types';
import {
DEFAULT_MAX_CONCURRENCY,
DEFAULT_MODELS,
stripProviderPrefix,
isPipelineStatus,
} from '@automaker/types';
import { resolveModelString } from '@automaker/model-resolver';
import { createLogger, loadContextFiles, classifyError } from '@automaker/utils';
import { getFeatureDir } from '@automaker/platform';
@@ -79,6 +84,37 @@ export class AutoModeServiceFacade {
private readonly settingsService: SettingsService | null
) {}
/**
* Determine if a feature is eligible to be picked up by the auto-mode loop.
*
* @param feature - The feature to check
* @param branchName - The current worktree branch name (null for main)
* @param primaryBranch - The resolved primary branch name for the project
* @returns True if the feature is eligible for auto-dispatch
*/
public static isFeatureEligibleForAutoMode(
feature: Feature,
branchName: string | null,
primaryBranch: string | null
): boolean {
const isEligibleStatus =
feature.status === 'backlog' ||
feature.status === 'ready' ||
feature.status === 'interrupted' ||
isPipelineStatus(feature.status);
if (!isEligibleStatus) return false;
// Filter by branch/worktree alignment
if (branchName === null) {
// For main worktree, include features with no branch or matching primary branch
return !feature.branchName || (primaryBranch != null && feature.branchName === primaryBranch);
} else {
// For named worktrees, only include features matching that branch
return feature.branchName === branchName;
}
}
/**
* Classify and log an error at the facade boundary.
* Emits an error event to the UI so failures are surfaced to the user.
@@ -217,6 +253,7 @@ export class AutoModeServiceFacade {
thinkingLevel?: ThinkingLevel;
reasoningEffort?: ReasoningEffort;
branchName?: string | null;
status?: string; // Feature status for pipeline summary check
[key: string]: unknown;
}
): Promise<void> => {
@@ -300,6 +337,7 @@ export class AutoModeServiceFacade {
thinkingLevel: opts?.thinkingLevel as ThinkingLevel | undefined,
reasoningEffort: opts?.reasoningEffort as ReasoningEffort | undefined,
branchName: opts?.branchName as string | null | undefined,
status: opts?.status as string | undefined,
provider,
effectiveBareModel,
credentials,
@@ -373,12 +411,8 @@ export class AutoModeServiceFacade {
if (branchName === null) {
primaryBranch = await worktreeResolver.getCurrentBranch(pPath);
}
return features.filter(
(f) =>
(f.status === 'backlog' || f.status === 'ready') &&
(branchName === null
? !f.branchName || (primaryBranch && f.branchName === primaryBranch)
: f.branchName === branchName)
return features.filter((f) =>
AutoModeServiceFacade.isFeatureEligibleForAutoMode(f, branchName, primaryBranch)
);
},
(pPath, branchName, maxConcurrency) =>

View File

@@ -461,7 +461,10 @@ Please continue from where you left off and complete all remaining tasks. Use th
const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
try {
if (agentOutput) {
// Only save summary if feature doesn't already have one (e.g., accumulated from pipeline steps)
// This prevents overwriting accumulated summaries with just the last step's output
// The agent-executor already extracts and saves summaries during execution
if (agentOutput && !completedFeature?.summary) {
const summary = extractSummary(agentOutput);
if (summary) await this.saveFeatureSummaryFn(projectPath, featureId, summary);
}

View File

@@ -14,7 +14,8 @@
*/
import path from 'path';
import type { Feature, ParsedTask, PlanSpec } from '@automaker/types';
import type { Feature, FeatureStatusWithPipeline, ParsedTask, PlanSpec } from '@automaker/types';
import { isPipelineStatus } from '@automaker/types';
import {
atomicWriteJson,
readJsonWithRecovery,
@@ -28,6 +29,7 @@ import type { EventEmitter } from '../lib/events.js';
import type { AutoModeEventType } from './typed-event-bus.js';
import { getNotificationService } from './notification-service.js';
import { FeatureLoader } from './feature-loader.js';
import { pipelineService } from './pipeline-service.js';
const logger = createLogger('FeatureStateManager');
@@ -252,7 +254,7 @@ export class FeatureStateManager {
const currentStatus = feature?.status;
// Preserve pipeline_* statuses so resumePipelineFeature can resume from the correct step
if (currentStatus && currentStatus.startsWith('pipeline_')) {
if (isPipelineStatus(currentStatus)) {
logger.info(
`Feature ${featureId} was in ${currentStatus}; preserving pipeline status for resume`
);
@@ -270,7 +272,8 @@ export class FeatureStateManager {
/**
* Shared helper that scans features in a project directory and resets any stuck
* in transient states (in_progress, interrupted, pipeline_*) back to resting states.
* in transient states (in_progress, interrupted) back to resting states.
* Pipeline_* statuses are preserved so they can be resumed.
*
* Also resets:
* - generating planSpec status back to pending
@@ -324,10 +327,7 @@ export class FeatureStateManager {
// Reset features in active execution states back to a resting state
// After a server restart, no processes are actually running
const isActiveState =
originalStatus === 'in_progress' ||
originalStatus === 'interrupted' ||
(originalStatus != null && originalStatus.startsWith('pipeline_'));
const isActiveState = originalStatus === 'in_progress' || originalStatus === 'interrupted';
if (isActiveState) {
const hasApprovedPlan = feature.planSpec?.status === 'approved';
@@ -338,6 +338,17 @@ export class FeatureStateManager {
);
}
// Handle pipeline_* statuses separately: preserve them so they can be resumed
// but still count them as needing attention if they were stuck.
if (isPipelineStatus(originalStatus)) {
// We don't change the status, but we still want to reset planSpec/task states
// if they were stuck in transient generation/execution modes.
// No feature.status change here.
logger.debug(
`[${callerLabel}] Preserving pipeline status for feature ${feature.id}: ${originalStatus}`
);
}
// Reset generating planSpec status back to pending (spec generation was interrupted)
if (feature.planSpec?.status === 'generating') {
feature.planSpec.status = 'pending';
@@ -396,10 +407,12 @@ export class FeatureStateManager {
* Resets:
* - in_progress features back to ready (if has plan) or backlog (if no plan)
* - interrupted features back to ready (if has plan) or backlog (if no plan)
* - pipeline_* features back to ready (if has plan) or backlog (if no plan)
* - generating planSpec status back to pending
* - in_progress tasks back to pending
*
* Preserves:
* - pipeline_* statuses (so resumePipelineFeature can resume from correct step)
*
* @param projectPath - The project path to reset features for
*/
async resetStuckFeatures(projectPath: string): Promise<void> {
@@ -530,6 +543,10 @@ export class FeatureStateManager {
* This is called after agent execution completes to save a summary
* extracted from the agent's output using <summary> tags.
*
* For pipeline features (status starts with pipeline_), summaries are accumulated
* across steps with a header identifying each step. For non-pipeline features,
* the summary is replaced entirely.
*
* @param projectPath - The project path
* @param featureId - The feature ID
* @param summary - The summary text to save
@@ -537,6 +554,7 @@ export class FeatureStateManager {
async saveFeatureSummary(projectPath: string, featureId: string, summary: string): Promise<void> {
const featureDir = getFeatureDir(projectPath, featureId);
const featurePath = path.join(featureDir, 'feature.json');
const normalizedSummary = summary.trim();
try {
const result = await readJsonWithRecovery<Feature | null>(featurePath, null, {
@@ -552,7 +570,63 @@ export class FeatureStateManager {
return;
}
feature.summary = summary;
if (!normalizedSummary) {
logger.debug(
`[saveFeatureSummary] Skipping empty summary for feature ${featureId} (status="${feature.status}")`
);
return;
}
// For pipeline features, accumulate summaries across steps
if (isPipelineStatus(feature.status)) {
// If we already have a non-phase summary (typically the initial implementation
// summary from in_progress), normalize it into a named phase before appending
// pipeline step summaries. This keeps the format consistent for UI phase parsing.
const implementationHeader = '### Implementation';
if (feature.summary && !feature.summary.trimStart().startsWith('### ')) {
feature.summary = `${implementationHeader}\n\n${feature.summary.trim()}`;
}
const stepName = await this.getPipelineStepName(projectPath, feature.status);
const stepHeader = `### ${stepName}`;
const stepSection = `${stepHeader}\n\n${normalizedSummary}`;
if (feature.summary) {
// Check if this step already exists in the summary (e.g., if retried)
// Use section splitting to only match real section boundaries, not text in body content
const separator = '\n\n---\n\n';
const sections = feature.summary.split(separator);
let replaced = false;
const updatedSections = sections.map((section) => {
if (section.startsWith(`${stepHeader}\n\n`)) {
replaced = true;
return stepSection;
}
return section;
});
if (replaced) {
feature.summary = updatedSections.join(separator);
logger.info(
`[saveFeatureSummary] Updated existing pipeline step summary for feature ${featureId}: step="${stepName}"`
);
} else {
// Append as a new section
feature.summary = `${feature.summary}${separator}${stepSection}`;
logger.info(
`[saveFeatureSummary] Appended new pipeline step summary for feature ${featureId}: step="${stepName}"`
);
}
} else {
feature.summary = stepSection;
logger.info(
`[saveFeatureSummary] Initialized pipeline summary for feature ${featureId}: step="${stepName}"`
);
}
} else {
feature.summary = normalizedSummary;
}
feature.updatedAt = new Date().toISOString();
// PERSIST BEFORE EMIT
@@ -562,13 +636,42 @@ export class FeatureStateManager {
this.emitAutoModeEvent('auto_mode_summary', {
featureId,
projectPath,
summary,
summary: feature.summary,
});
} catch (error) {
logger.error(`Failed to save summary for ${featureId}:`, error);
}
}
/**
* Look up the pipeline step name from the current pipeline status.
*
* @param projectPath - The project path
* @param status - The current pipeline status (e.g., 'pipeline_abc123')
* @returns The step name, or a fallback based on the step ID
*/
private async getPipelineStepName(projectPath: string, status: string): Promise<string> {
try {
const stepId = pipelineService.getStepIdFromStatus(status as FeatureStatusWithPipeline);
if (stepId) {
const step = await pipelineService.getStep(projectPath, stepId);
if (step) return step.name;
}
} catch (error) {
logger.debug(
`[getPipelineStepName] Failed to look up step name for status "${status}", using fallback:`,
error
);
}
// Fallback: derive a human-readable name from the status suffix
// e.g., 'pipeline_code_review' → 'Code Review'
const suffix = status.replace('pipeline_', '');
return suffix
.split('_')
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ');
}
/**
* Update the status of a specific task within planSpec.tasks
*
@@ -581,7 +684,8 @@ export class FeatureStateManager {
projectPath: string,
featureId: string,
taskId: string,
status: ParsedTask['status']
status: ParsedTask['status'],
summary?: string
): Promise<void> {
const featureDir = getFeatureDir(projectPath, featureId);
const featurePath = path.join(featureDir, 'feature.json');
@@ -604,6 +708,9 @@ export class FeatureStateManager {
const task = feature.planSpec.tasks.find((t) => t.id === taskId);
if (task) {
task.status = status;
if (summary) {
task.summary = summary;
}
feature.updatedAt = new Date().toISOString();
// PERSIST BEFORE EMIT
@@ -615,6 +722,7 @@ export class FeatureStateManager {
projectPath,
taskId,
status,
summary,
tasks: feature.planSpec.tasks,
});
} else {

View File

@@ -115,6 +115,7 @@ export class PipelineOrchestrator {
projectPath,
});
const model = resolveModelString(feature.model, DEFAULT_MODELS.claude);
const currentStatus = `pipeline_${step.id}`;
await this.runAgentFn(
workDir,
featureId,
@@ -133,6 +134,7 @@ export class PipelineOrchestrator {
useClaudeCodeSystemPrompt,
thinkingLevel: feature.thinkingLevel,
reasoningEffort: feature.reasoningEffort,
status: currentStatus,
}
);
try {
@@ -165,7 +167,18 @@ export class PipelineOrchestrator {
if (previousContext) prompt += `### Previous Work\n${previousContext}\n\n`;
return (
prompt +
`### Pipeline Step Instructions\n${step.instructions}\n\n### Task\nComplete the pipeline step instructions above.`
`### Pipeline Step Instructions\n${step.instructions}\n\n### Task\nComplete the pipeline step instructions above.\n\n` +
`**CRITICAL: After completing the instructions, you MUST output a summary using this EXACT format:**\n\n` +
`<summary>\n` +
`## Summary: ${step.name}\n\n` +
`### Changes Implemented\n` +
`- [List all changes made in this step]\n\n` +
`### Files Modified\n` +
`- [List all files modified in this step]\n\n` +
`### Outcome\n` +
`- [Describe the result of this step]\n` +
`</summary>\n\n` +
`The <summary> and </summary> tags MUST be on their own lines. This is REQUIRED.`
);
}
@@ -491,6 +504,7 @@ export class PipelineOrchestrator {
useClaudeCodeSystemPrompt: context.useClaudeCodeSystemPrompt,
autoLoadClaudeMd: context.autoLoadClaudeMd,
reasoningEffort: context.feature.reasoningEffort,
status: context.feature.status,
}
);
}

View File

@@ -101,12 +101,32 @@ export function detectTaskStartMarker(text: string): string | null {
}
/**
* Detect [TASK_COMPLETE] marker in text and extract task ID
* Detect [TASK_COMPLETE] marker in text and extract task ID and summary
* Format: [TASK_COMPLETE] T###: Brief summary
*/
export function detectTaskCompleteMarker(text: string): string | null {
const match = text.match(/\[TASK_COMPLETE\]\s*(T\d{3})/);
return match ? match[1] : null;
export function detectTaskCompleteMarker(text: string): { id: string; summary?: string } | null {
// Use a regex that captures the summary until newline or next task marker
// Allow brackets in summary content (e.g., "supports array[index] access")
// Pattern breakdown:
// - \[TASK_COMPLETE\]\s* - Match the marker
// - (T\d{3}) - Capture task ID
// - (?::\s*([^\n\[]+))? - Optionally capture summary (stops at newline or bracket)
// - But we want to allow brackets in summary, so we use a different approach:
// - Match summary until newline, then trim any trailing markers in post-processing
const match = text.match(/\[TASK_COMPLETE\]\s*(T\d{3})(?::\s*(.+?))?(?=\n|$)/i);
if (!match) return null;
// Post-process: remove trailing task markers from summary if present
let summary = match[2]?.trim();
if (summary) {
// Remove trailing content that looks like another marker
summary = summary.replace(/\s*\[TASK_[A-Z_]+\].*$/i, '').trim();
}
return {
id: match[1],
summary: summary || undefined,
};
}
/**