feat: Add task retry logic and improve max turns limit

This commit is contained in:
gsxdsm
2026-02-16 22:10:50 -08:00
parent 30fce3f746
commit aa940d44ff
5 changed files with 419 additions and 6 deletions

View File

@@ -331,7 +331,7 @@ export class AgentExecutor {
userFeedback
);
const taskStream = provider.executeQuery(
this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 50))
this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 150))
);
let taskOutput = '',
taskStartDetected = false,

View File

@@ -270,6 +270,84 @@ ${feature.spec}
}
);
// Check for incomplete tasks after agent execution.
// The agent may have finished early (hit max turns, decided it was done, etc.)
// while tasks are still pending. If so, re-run the agent to complete remaining tasks.
const MAX_TASK_RETRY_ATTEMPTS = 3;
let taskRetryAttempts = 0;
while (!abortController.signal.aborted && taskRetryAttempts < MAX_TASK_RETRY_ATTEMPTS) {
const currentFeature = await this.loadFeatureFn(projectPath, featureId);
if (!currentFeature?.planSpec?.tasks) break;
const pendingTasks = currentFeature.planSpec.tasks.filter(
(t) => t.status === 'pending' || t.status === 'in_progress'
);
if (pendingTasks.length === 0) break;
taskRetryAttempts++;
const totalTasks = currentFeature.planSpec.tasks.length;
const completedTasks = currentFeature.planSpec.tasks.filter(
(t) => t.status === 'completed'
).length;
logger.info(
`[executeFeature] Feature ${featureId} has ${pendingTasks.length} incomplete tasks (${completedTasks}/${totalTasks} completed). Re-running agent (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})`
);
this.eventBus.emitAutoModeEvent('auto_mode_progress', {
featureId,
branchName: feature.branchName ?? null,
content: `Agent finished with ${pendingTasks.length} tasks remaining. Re-running to complete tasks (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})...`,
projectPath,
});
// Build a continuation prompt that tells the agent to finish remaining tasks
const remainingTasksList = pendingTasks
.map((t) => `- ${t.id}: ${t.description} (${t.status})`)
.join('\n');
const continuationPrompt = `## Continue Implementation - Incomplete Tasks
The previous agent session ended before all tasks were completed. Please continue implementing the remaining tasks.
**Completed:** ${completedTasks}/${totalTasks} tasks
**Remaining tasks:**
${remainingTasksList}
Please continue from where you left off and complete all remaining tasks. Use the same [TASK_START:ID] and [TASK_COMPLETE:ID] markers for each task.`;
await this.runAgentFn(
workDir,
featureId,
continuationPrompt,
abortController,
projectPath,
undefined,
model,
{
projectPath,
planningMode: 'skip',
requirePlanApproval: false,
systemPrompt: combinedSystemPrompt || undefined,
autoLoadClaudeMd,
thinkingLevel: feature.thinkingLevel,
branchName: feature.branchName ?? null,
}
);
}
// Log if tasks are still incomplete after retry attempts
if (taskRetryAttempts >= MAX_TASK_RETRY_ATTEMPTS) {
const finalFeature = await this.loadFeatureFn(projectPath, featureId);
const stillPending = finalFeature?.planSpec?.tasks?.filter(
(t) => t.status === 'pending' || t.status === 'in_progress'
);
if (stillPending && stillPending.length > 0) {
logger.warn(
`[executeFeature] Feature ${featureId} still has ${stillPending.length} incomplete tasks after ${MAX_TASK_RETRY_ATTEMPTS} retry attempts. Moving to final status.`
);
}
}
const pipelineConfig = await pipelineService.getPipelineConfig(projectPath);
const excludedStepIds = new Set(feature.excludedPipelineSteps || []);
const sortedSteps = [...(pipelineConfig?.steps || [])]
@@ -300,6 +378,13 @@ ${feature.spec}
await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
this.recordSuccessFn();
// Check final task completion state for accurate reporting
const completedFeature = await this.loadFeatureFn(projectPath, featureId);
const totalTasks = completedFeature?.planSpec?.tasks?.length ?? 0;
const completedTasks =
completedFeature?.planSpec?.tasks?.filter((t) => t.status === 'completed').length ?? 0;
const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
try {
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
let agentOutput = '';
@@ -326,12 +411,18 @@ ${feature.spec}
/* learnings recording failed */
}
const elapsedSeconds = Math.round((Date.now() - tempRunningFeature.startTime) / 1000);
let completionMessage = `Feature completed in ${elapsedSeconds}s`;
if (finalStatus === 'verified') completionMessage += ' - auto-verified';
if (hasIncompleteTasks)
completionMessage += ` (${completedTasks}/${totalTasks} tasks completed)`;
this.eventBus.emitAutoModeEvent('auto_mode_feature_complete', {
featureId,
featureName: feature.title,
branchName: feature.branchName ?? null,
passes: true,
message: `Feature completed in ${Math.round((Date.now() - tempRunningFeature.startTime) / 1000)}s${finalStatus === 'verified' ? ' - auto-verified' : ''}`,
message: completionMessage,
projectPath,
model: tempRunningFeature.model,
provider: tempRunningFeature.provider,

View File

@@ -115,10 +115,13 @@ export class FeatureStateManager {
// This prevents cards in "waiting for review" from appearing to still have running tasks
if (feature.planSpec?.tasks) {
let tasksFinalized = 0;
let tasksPending = 0;
for (const task of feature.planSpec.tasks) {
if (task.status === 'in_progress') {
task.status = 'completed';
tasksFinalized++;
} else if (task.status === 'pending') {
tasksPending++;
}
}
if (tasksFinalized > 0) {
@@ -126,6 +129,11 @@ export class FeatureStateManager {
`[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to waiting_approval`
);
}
if (tasksPending > 0) {
logger.warn(
`[updateFeatureStatus] Feature ${featureId} moving to waiting_approval with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total`
);
}
// Update tasksCompleted count to reflect actual completed tasks
feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter(
(t) => t.status === 'completed'
@@ -136,11 +144,26 @@ export class FeatureStateManager {
// Also finalize in_progress tasks when moving directly to verified (skipTests=false)
// Do NOT mark pending tasks as completed - they were never started
if (feature.planSpec?.tasks) {
let tasksFinalized = 0;
let tasksPending = 0;
for (const task of feature.planSpec.tasks) {
if (task.status === 'in_progress') {
task.status = 'completed';
tasksFinalized++;
} else if (task.status === 'pending') {
tasksPending++;
}
}
if (tasksFinalized > 0) {
logger.info(
`[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to verified`
);
}
if (tasksPending > 0) {
logger.warn(
`[updateFeatureStatus] Feature ${featureId} moving to verified with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total`
);
}
feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter(
(t) => t.status === 'completed'
).length;

View File

@@ -677,6 +677,302 @@ describe('execution-service.ts', () => {
});
});
describe('executeFeature - incomplete task retry', () => {
const createServiceWithMocks = () => {
return new ExecutionService(
mockEventBus,
mockConcurrencyManager,
mockWorktreeResolver,
mockSettingsService,
mockRunAgentFn,
mockExecutePipelineFn,
mockUpdateFeatureStatusFn,
mockLoadFeatureFn,
mockGetPlanningPromptPrefixFn,
mockSaveFeatureSummaryFn,
mockRecordLearningsFn,
mockContextExistsFn,
mockResumeFeatureFn,
mockTrackFailureFn,
mockSignalPauseFn,
mockRecordSuccessFn,
mockSaveExecutionStateFn,
mockLoadContextFilesFn
);
};
it('does not re-run agent when feature has no tasks', async () => {
// Feature with no planSpec/tasks - should complete normally with 1 agent call
mockLoadFeatureFn = vi.fn().mockResolvedValue(testFeature);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
});
it('does not re-run agent when all tasks are completed', async () => {
const featureWithCompletedTasks: Feature = {
...testFeature,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
],
tasksCompleted: 2,
},
};
mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithCompletedTasks);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Only the initial agent call + the approved-plan recursive call
// The approved plan triggers recursive executeFeature, so runAgentFn is called once in the inner call
expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
});
it('re-runs agent when there are pending tasks after initial execution', async () => {
const featureWithPendingTasks: Feature = {
...testFeature,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
{ id: 'T003', title: 'Task 3', status: 'pending', description: 'Third task' },
],
tasksCompleted: 1,
},
};
// After first agent run, loadFeature returns feature with pending tasks
// After second agent run, loadFeature returns feature with all tasks completed
const featureAllDone: Feature = {
...testFeature,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
{ id: 'T003', title: 'Task 3', status: 'completed', description: 'Third task' },
],
tasksCompleted: 3,
},
};
let loadCallCount = 0;
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
loadCallCount++;
// First call: initial feature load at the top of executeFeature
// Second call: after first agent run (check for incomplete tasks) - has pending tasks
// Third call: after second agent run (check for incomplete tasks) - all done
if (loadCallCount <= 2) return featureWithPendingTasks;
return featureAllDone;
});
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
continuationPrompt: 'Continue',
_calledInternally: true,
});
// Should have called runAgentFn twice: initial + one retry
expect(mockRunAgentFn).toHaveBeenCalledTimes(2);
// The retry call should contain continuation prompt about incomplete tasks
const retryCallArgs = mockRunAgentFn.mock.calls[1];
expect(retryCallArgs[2]).toContain('Continue Implementation - Incomplete Tasks');
expect(retryCallArgs[2]).toContain('T002');
expect(retryCallArgs[2]).toContain('T003');
// Should have emitted a progress event about retrying
expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
'auto_mode_progress',
expect.objectContaining({
featureId: 'feature-1',
content: expect.stringContaining('Re-running to complete tasks'),
})
);
});
it('respects maximum retry attempts', async () => {
const featureAlwaysPending: Feature = {
...testFeature,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
],
tasksCompleted: 1,
},
};
// Always return feature with pending tasks (agent never completes T002)
mockLoadFeatureFn = vi.fn().mockResolvedValue(featureAlwaysPending);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
continuationPrompt: 'Continue',
_calledInternally: true,
});
// Initial run + 3 retry attempts = 4 total
expect(mockRunAgentFn).toHaveBeenCalledTimes(4);
// Should still set final status even with incomplete tasks
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('stops retrying when abort signal is triggered', async () => {
const featureWithPendingTasks: Feature = {
...testFeature,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
],
tasksCompleted: 1,
},
};
mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithPendingTasks);
// Simulate abort after first agent run
let runCount = 0;
const capturedAbortController = { current: null as AbortController | null };
mockRunAgentFn = vi.fn().mockImplementation((_wd, _fid, _prompt, abortCtrl) => {
capturedAbortController.current = abortCtrl;
runCount++;
if (runCount >= 1) {
// Abort after first run
abortCtrl.abort();
}
return Promise.resolve();
});
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
continuationPrompt: 'Continue',
_calledInternally: true,
});
// Should only have the initial run, then abort prevents retries
expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
});
it('re-runs agent for in_progress tasks (not just pending)', async () => {
const featureWithInProgressTask: Feature = {
...testFeature,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'in_progress', description: 'Second task' },
],
tasksCompleted: 1,
currentTaskId: 'T002',
},
};
const featureAllDone: Feature = {
...testFeature,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
],
tasksCompleted: 2,
},
};
let loadCallCount = 0;
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
loadCallCount++;
if (loadCallCount <= 2) return featureWithInProgressTask;
return featureAllDone;
});
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
continuationPrompt: 'Continue',
_calledInternally: true,
});
// Should have retried for the in_progress task
expect(mockRunAgentFn).toHaveBeenCalledTimes(2);
// The retry prompt should mention the in_progress task
const retryCallArgs = mockRunAgentFn.mock.calls[1];
expect(retryCallArgs[2]).toContain('T002');
expect(retryCallArgs[2]).toContain('in_progress');
});
it('uses planningMode skip and no plan approval for retry runs', async () => {
const featureWithPendingTasks: Feature = {
...testFeature,
planningMode: 'full',
requirePlanApproval: true,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
],
tasksCompleted: 1,
},
};
const featureAllDone: Feature = {
...testFeature,
planSpec: {
status: 'approved',
content: 'Plan',
tasks: [
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
{ id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
],
tasksCompleted: 2,
},
};
let loadCallCount = 0;
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
loadCallCount++;
if (loadCallCount <= 2) return featureWithPendingTasks;
return featureAllDone;
});
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
continuationPrompt: 'Continue',
_calledInternally: true,
});
// The retry agent call should use planningMode: 'skip' and requirePlanApproval: false
const retryCallArgs = mockRunAgentFn.mock.calls[1];
const retryOptions = retryCallArgs[7]; // options object
expect(retryOptions.planningMode).toBe('skip');
expect(retryOptions.requirePlanApproval).toBe(false);
});
});
describe('executeFeature - error handling', () => {
it('classifies and emits error event', async () => {
const testError = new Error('Test error');

View File

@@ -164,13 +164,16 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({
const currentTaskId = planSpec.currentTaskId;
return planSpec.tasks.map((task: ParsedTask, index: number) => {
// If the feature is done (waiting_approval/verified), all tasks are completed
// This is a defensive UI-side check: the server should have already finalized
// task statuses, but stale data from before the fix could still show spinners
// When feature is finished (waiting_approval/verified), finalize task display:
// - in_progress tasks → completed (agent was working on them when it finished)
// - pending tasks stay pending (they were never started)
// - completed tasks stay completed
// This matches server-side behavior in feature-state-manager.ts
if (isFeatureFinished) {
const finalStatus = task.status === 'in_progress' ? 'completed' : task.status;
return {
content: task.description,
status: 'completed' as const,
status: (finalStatus || 'completed') as 'pending' | 'in_progress' | 'completed',
};
}