Merge pull request #781 from gsxdsm/fix/improve-restart-recovery

feat: Add feature state reconciliation on server startup
This commit is contained in:
gsxdsm
2026-02-17 11:15:03 -08:00
committed by GitHub
10 changed files with 300 additions and 37 deletions

View File

@@ -368,24 +368,61 @@ eventHookService.initialize(events, settingsService, eventHistoryService, featur
logger.warn('Failed to check for legacy settings migration:', err);
}
// Apply logging settings from saved settings
// Fetch global settings once and reuse for logging config and feature reconciliation
let globalSettings: Awaited<ReturnType<typeof settingsService.getGlobalSettings>> | null = null;
try {
const settings = await settingsService.getGlobalSettings();
if (settings.serverLogLevel && LOG_LEVEL_MAP[settings.serverLogLevel] !== undefined) {
setLogLevel(LOG_LEVEL_MAP[settings.serverLogLevel]);
logger.info(`Server log level set to: ${settings.serverLogLevel}`);
}
// Apply request logging setting (default true if not set)
const enableRequestLog = settings.enableRequestLogging ?? true;
setRequestLoggingEnabled(enableRequestLog);
logger.info(`HTTP request logging: ${enableRequestLog ? 'enabled' : 'disabled'}`);
globalSettings = await settingsService.getGlobalSettings();
} catch (err) {
logger.warn('Failed to load logging settings, using defaults');
logger.warn('Failed to load global settings, using defaults');
}
// Apply logging settings from saved settings
if (globalSettings) {
try {
if (
globalSettings.serverLogLevel &&
LOG_LEVEL_MAP[globalSettings.serverLogLevel] !== undefined
) {
setLogLevel(LOG_LEVEL_MAP[globalSettings.serverLogLevel]);
logger.info(`Server log level set to: ${globalSettings.serverLogLevel}`);
}
// Apply request logging setting (default true if not set)
const enableRequestLog = globalSettings.enableRequestLogging ?? true;
setRequestLoggingEnabled(enableRequestLog);
logger.info(`HTTP request logging: ${enableRequestLog ? 'enabled' : 'disabled'}`);
} catch (err) {
logger.warn('Failed to apply logging settings, using defaults');
}
}
await agentService.initialize();
logger.info('Agent service initialized');
// Reconcile feature states on startup
// After any type of restart (clean, forced, crash), features may be stuck in
// transient states (in_progress, interrupted, pipeline_*) that don't match reality.
// Reconcile them back to resting states before the UI is served.
if (globalSettings) {
try {
if (globalSettings.projects && globalSettings.projects.length > 0) {
let totalReconciled = 0;
for (const project of globalSettings.projects) {
const count = await autoModeService.reconcileFeatureStates(project.path);
totalReconciled += count;
}
if (totalReconciled > 0) {
logger.info(
`[STARTUP] Reconciled ${totalReconciled} feature(s) across ${globalSettings.projects.length} project(s)`
);
} else {
logger.info('[STARTUP] Feature state reconciliation complete - no stale states found');
}
}
} catch (err) {
logger.warn('[STARTUP] Failed to reconcile feature states:', err);
}
}
// Bootstrap Codex model cache in background (don't block server startup)
void codexModelCacheService.getModels().catch((err) => {
logger.error('Failed to bootstrap Codex model cache:', err);

View File

@@ -21,6 +21,7 @@ import { createFollowUpFeatureHandler } from './routes/follow-up-feature.js';
import { createCommitFeatureHandler } from './routes/commit-feature.js';
import { createApprovePlanHandler } from './routes/approve-plan.js';
import { createResumeInterruptedHandler } from './routes/resume-interrupted.js';
import { createReconcileHandler } from './routes/reconcile.js';
/**
* Create auto-mode routes.
@@ -81,6 +82,11 @@ export function createAutoModeRoutes(autoModeService: AutoModeServiceCompat): Ro
validatePathParams('projectPath'),
createResumeInterruptedHandler(autoModeService)
);
router.post(
'/reconcile',
validatePathParams('projectPath'),
createReconcileHandler(autoModeService)
);
return router;
}

View File

@@ -0,0 +1,53 @@
/**
* Reconcile Feature States Handler
*
* On-demand endpoint to reconcile all feature states for a project.
* Resets features stuck in transient states (in_progress, interrupted, pipeline_*)
* back to resting states (ready/backlog) and emits events to update the UI.
*
* This is useful when:
* - The UI reconnects after a server restart
* - A client detects stale feature states
* - An admin wants to force-reset stuck features
*/
import type { Request, Response } from 'express';
import { createLogger } from '@automaker/utils';
import type { AutoModeServiceCompat } from '../../../services/auto-mode/index.js';
const logger = createLogger('ReconcileFeatures');
interface ReconcileRequest {
projectPath: string;
}
export function createReconcileHandler(autoModeService: AutoModeServiceCompat) {
return async (req: Request, res: Response): Promise<void> => {
const { projectPath } = req.body as ReconcileRequest;
if (!projectPath) {
res.status(400).json({ error: 'Project path is required' });
return;
}
logger.info(`Reconciling feature states for ${projectPath}`);
try {
const reconciledCount = await autoModeService.reconcileFeatureStates(projectPath);
res.json({
success: true,
reconciledCount,
message:
reconciledCount > 0
? `Reconciled ${reconciledCount} feature(s)`
: 'No features needed reconciliation',
});
} catch (error) {
logger.error('Error reconciling feature states:', error);
res.status(500).json({
error: error instanceof Error ? error.message : 'Unknown error',
});
}
};
}

View File

@@ -88,6 +88,10 @@ export class AutoModeServiceCompat {
return this.globalService.markAllRunningFeaturesInterrupted(reason);
}
async reconcileFeatureStates(projectPath: string): Promise<number> {
return this.globalService.reconcileFeatureStates(projectPath);
}
// ===========================================================================
// PER-PROJECT OPERATIONS (delegated to facades)
// ===========================================================================

View File

@@ -205,4 +205,21 @@ export class GlobalAutoModeService {
);
}
}
/**
* Reconcile all feature states for a project on server startup.
*
* Resets features stuck in transient states (in_progress, interrupted, pipeline_*)
* back to a resting state and emits events so the UI reflects corrected states.
*
* This should be called during server initialization to handle:
* - Clean shutdown: features already marked as interrupted
* - Forced kill / crash: features left in in_progress or pipeline_* states
*
* @param projectPath - The project path to reconcile
* @returns The number of features that were reconciled
*/
async reconcileFeatureStates(projectPath: string): Promise<number> {
return this.featureStateManager.reconcileAllFeatureStates(projectPath);
}
}

View File

@@ -25,6 +25,7 @@ import {
import { getFeatureDir, getFeaturesDir } from '@automaker/platform';
import * as secureFs from '../lib/secure-fs.js';
import type { EventEmitter } from '../lib/events.js';
import type { AutoModeEventType } from './typed-event-bus.js';
import { getNotificationService } from './notification-service.js';
import { FeatureLoader } from './feature-loader.js';
@@ -268,20 +269,39 @@ export class FeatureStateManager {
}
/**
* Reset features that were stuck in transient states due to server crash.
* Called when auto mode is enabled to clean up from previous session.
* Shared helper that scans features in a project directory and resets any stuck
* in transient states (in_progress, interrupted, pipeline_*) back to resting states.
*
* Resets:
* - in_progress features back to ready (if has plan) or backlog (if no plan)
* Also resets:
* - generating planSpec status back to pending
* - in_progress tasks back to pending
*
* @param projectPath - The project path to reset features for
* @param projectPath - The project path to scan
* @param callerLabel - Label for log messages (e.g., 'resetStuckFeatures', 'reconcileAllFeatureStates')
* @returns Object with reconciledFeatures (id + status info), reconciledCount, and scanned count
*/
async resetStuckFeatures(projectPath: string): Promise<void> {
private async scanAndResetFeatures(
projectPath: string,
callerLabel: string
): Promise<{
reconciledFeatures: Array<{
id: string;
previousStatus: string | undefined;
newStatus: string | undefined;
}>;
reconciledFeatureIds: string[];
reconciledCount: number;
scanned: number;
}> {
const featuresDir = getFeaturesDir(projectPath);
let featuresScanned = 0;
let featuresReset = 0;
let scanned = 0;
let reconciledCount = 0;
const reconciledFeatureIds: string[] = [];
const reconciledFeatures: Array<{
id: string;
previousStatus: string | undefined;
newStatus: string | undefined;
}> = [];
try {
const entries = await secureFs.readdir(featuresDir, { withFileTypes: true });
@@ -289,7 +309,7 @@ export class FeatureStateManager {
for (const entry of entries) {
if (!entry.isDirectory()) continue;
featuresScanned++;
scanned++;
const featurePath = path.join(featuresDir, entry.name, 'feature.json');
const result = await readJsonWithRecovery<Feature | null>(featurePath, null, {
maxBackups: DEFAULT_BACKUP_COUNT,
@@ -300,14 +320,21 @@ export class FeatureStateManager {
if (!feature) continue;
let needsUpdate = false;
const originalStatus = feature.status;
// Reset in_progress features back to ready/backlog
if (feature.status === 'in_progress') {
// Reset features in active execution states back to a resting state
// After a server restart, no processes are actually running
const isActiveState =
originalStatus === 'in_progress' ||
originalStatus === 'interrupted' ||
(originalStatus != null && originalStatus.startsWith('pipeline_'));
if (isActiveState) {
const hasApprovedPlan = feature.planSpec?.status === 'approved';
feature.status = hasApprovedPlan ? 'ready' : 'backlog';
needsUpdate = true;
logger.info(
`[resetStuckFeatures] Reset feature ${feature.id} from in_progress to ${feature.status}`
`[${callerLabel}] Reset feature ${feature.id} from ${originalStatus} to ${feature.status}`
);
}
@@ -316,7 +343,7 @@ export class FeatureStateManager {
feature.planSpec.status = 'pending';
needsUpdate = true;
logger.info(
`[resetStuckFeatures] Reset feature ${feature.id} planSpec status from generating to pending`
`[${callerLabel}] Reset feature ${feature.id} planSpec status from generating to pending`
);
}
@@ -327,13 +354,13 @@ export class FeatureStateManager {
task.status = 'pending';
needsUpdate = true;
logger.info(
`[resetStuckFeatures] Reset task ${task.id} for feature ${feature.id} from in_progress to pending`
`[${callerLabel}] Reset task ${task.id} for feature ${feature.id} from in_progress to pending`
);
// Clear currentTaskId if it points to this reverted task
if (feature.planSpec?.currentTaskId === task.id) {
feature.planSpec.currentTaskId = undefined;
logger.info(
`[resetStuckFeatures] Cleared planSpec.currentTaskId for feature ${feature.id} (was pointing to reverted task ${task.id})`
`[${callerLabel}] Cleared planSpec.currentTaskId for feature ${feature.id} (was pointing to reverted task ${task.id})`
);
}
}
@@ -343,19 +370,94 @@ export class FeatureStateManager {
if (needsUpdate) {
feature.updatedAt = new Date().toISOString();
await atomicWriteJson(featurePath, feature, { backupCount: DEFAULT_BACKUP_COUNT });
featuresReset++;
reconciledCount++;
reconciledFeatureIds.push(feature.id);
reconciledFeatures.push({
id: feature.id,
previousStatus: originalStatus,
newStatus: feature.status,
});
}
}
logger.info(
`[resetStuckFeatures] Scanned ${featuresScanned} features, reset ${featuresReset} features for ${projectPath}`
);
} catch (error) {
// If features directory doesn't exist, that's fine
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
logger.error(`[resetStuckFeatures] Error resetting features for ${projectPath}:`, error);
logger.error(`[${callerLabel}] Error resetting features for ${projectPath}:`, error);
}
}
return { reconciledFeatures, reconciledFeatureIds, reconciledCount, scanned };
}
/**
* Reset features that were stuck in transient states due to server crash.
* Called when auto mode is enabled to clean up from previous session.
*
* Resets:
* - in_progress features back to ready (if has plan) or backlog (if no plan)
* - interrupted features back to ready (if has plan) or backlog (if no plan)
* - pipeline_* features back to ready (if has plan) or backlog (if no plan)
* - generating planSpec status back to pending
* - in_progress tasks back to pending
*
* @param projectPath - The project path to reset features for
*/
async resetStuckFeatures(projectPath: string): Promise<void> {
const { reconciledCount, scanned } = await this.scanAndResetFeatures(
projectPath,
'resetStuckFeatures'
);
logger.info(
`[resetStuckFeatures] Scanned ${scanned} features, reset ${reconciledCount} features for ${projectPath}`
);
}
/**
* Reconcile all feature states on server startup.
*
* This method resets all features stuck in transient states (in_progress,
* interrupted, pipeline_*) and emits events so connected UI clients
* immediately reflect the corrected states.
*
* Should be called once during server initialization, before the UI is served,
* to ensure feature state consistency after any type of restart (clean, forced, crash).
*
* @param projectPath - The project path to reconcile features for
* @returns The number of features that were reconciled
*/
async reconcileAllFeatureStates(projectPath: string): Promise<number> {
logger.info(`[reconcileAllFeatureStates] Starting reconciliation for ${projectPath}`);
const { reconciledFeatures, reconciledFeatureIds, reconciledCount, scanned } =
await this.scanAndResetFeatures(projectPath, 'reconcileAllFeatureStates');
// Emit per-feature status change events so UI invalidates its cache
for (const { id, previousStatus, newStatus } of reconciledFeatures) {
this.emitAutoModeEvent('feature_status_changed', {
featureId: id,
projectPath,
status: newStatus,
previousStatus,
reason: 'server_restart_reconciliation',
});
}
// Emit a bulk reconciliation event for the UI
if (reconciledCount > 0) {
this.emitAutoModeEvent('features_reconciled', {
projectPath,
reconciledCount,
reconciledFeatureIds,
message: `Reconciled ${reconciledCount} feature(s) after server restart`,
});
}
logger.info(
`[reconcileAllFeatureStates] Scanned ${scanned} features, reconciled ${reconciledCount} for ${projectPath}`
);
return reconciledCount;
}
/**
@@ -532,7 +634,7 @@ export class FeatureStateManager {
* @param eventType - The event type (e.g., 'auto_mode_summary')
* @param data - The event payload
*/
private emitAutoModeEvent(eventType: string, data: Record<string, unknown>): void {
private emitAutoModeEvent(eventType: AutoModeEventType, data: Record<string, unknown>): void {
// Wrap the event in auto-mode:event format expected by the client
this.events.emit('auto-mode:event', {
type: eventType,

View File

@@ -40,9 +40,13 @@ export type AutoModeEventType =
| 'plan_rejected'
| 'plan_revision_requested'
| 'plan_revision_warning'
| 'plan_spec_updated'
| 'pipeline_step_started'
| 'pipeline_step_complete'
| string; // Allow other strings for extensibility
| 'pipeline_test_failed'
| 'pipeline_merge_conflict'
| 'feature_status_changed'
| 'features_reconciled';
/**
* TypedEventBus wraps an EventEmitter to provide type-safe event emission

View File

@@ -38,6 +38,8 @@ const FEATURE_LIST_INVALIDATION_EVENTS: AutoModeEvent['type'][] = [
'plan_rejected',
'pipeline_step_started',
'pipeline_step_complete',
'feature_status_changed',
'features_reconciled',
];
/**

View File

@@ -3,7 +3,7 @@
*/
import type { ClaudeUsageResponse, CodexUsageResponse } from '@/store/app-store';
import type { ParsedTask } from '@automaker/types';
import type { ParsedTask, FeatureStatusWithPipeline } from '@automaker/types';
export interface ImageAttachment {
id?: string; // Optional - may not be present in messages loaded from server
@@ -359,6 +359,21 @@ export type AutoModeEvent =
title?: string;
status?: string;
}>;
}
| {
type: 'feature_status_changed';
featureId: string;
projectPath?: string;
status: FeatureStatusWithPipeline;
previousStatus: FeatureStatusWithPipeline;
reason?: string;
}
| {
type: 'features_reconciled';
projectPath?: string;
reconciledCount: number;
reconciledFeatureIds: string[];
message: string;
};
export type SpecRegenerationEvent =