mirror of
https://github.com/AutoMaker-Org/automaker.git
synced 2026-03-16 21:53:07 +00:00
feat: Add task retry logic and improve max turns limit
This commit is contained in:
@@ -331,7 +331,7 @@ export class AgentExecutor {
|
||||
userFeedback
|
||||
);
|
||||
const taskStream = provider.executeQuery(
|
||||
this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 50))
|
||||
this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 150))
|
||||
);
|
||||
let taskOutput = '',
|
||||
taskStartDetected = false,
|
||||
|
||||
@@ -270,6 +270,84 @@ ${feature.spec}
|
||||
}
|
||||
);
|
||||
|
||||
// Check for incomplete tasks after agent execution.
|
||||
// The agent may have finished early (hit max turns, decided it was done, etc.)
|
||||
// while tasks are still pending. If so, re-run the agent to complete remaining tasks.
|
||||
const MAX_TASK_RETRY_ATTEMPTS = 3;
|
||||
let taskRetryAttempts = 0;
|
||||
while (!abortController.signal.aborted && taskRetryAttempts < MAX_TASK_RETRY_ATTEMPTS) {
|
||||
const currentFeature = await this.loadFeatureFn(projectPath, featureId);
|
||||
if (!currentFeature?.planSpec?.tasks) break;
|
||||
|
||||
const pendingTasks = currentFeature.planSpec.tasks.filter(
|
||||
(t) => t.status === 'pending' || t.status === 'in_progress'
|
||||
);
|
||||
if (pendingTasks.length === 0) break;
|
||||
|
||||
taskRetryAttempts++;
|
||||
const totalTasks = currentFeature.planSpec.tasks.length;
|
||||
const completedTasks = currentFeature.planSpec.tasks.filter(
|
||||
(t) => t.status === 'completed'
|
||||
).length;
|
||||
logger.info(
|
||||
`[executeFeature] Feature ${featureId} has ${pendingTasks.length} incomplete tasks (${completedTasks}/${totalTasks} completed). Re-running agent (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})`
|
||||
);
|
||||
|
||||
this.eventBus.emitAutoModeEvent('auto_mode_progress', {
|
||||
featureId,
|
||||
branchName: feature.branchName ?? null,
|
||||
content: `Agent finished with ${pendingTasks.length} tasks remaining. Re-running to complete tasks (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})...`,
|
||||
projectPath,
|
||||
});
|
||||
|
||||
// Build a continuation prompt that tells the agent to finish remaining tasks
|
||||
const remainingTasksList = pendingTasks
|
||||
.map((t) => `- ${t.id}: ${t.description} (${t.status})`)
|
||||
.join('\n');
|
||||
|
||||
const continuationPrompt = `## Continue Implementation - Incomplete Tasks
|
||||
|
||||
The previous agent session ended before all tasks were completed. Please continue implementing the remaining tasks.
|
||||
|
||||
**Completed:** ${completedTasks}/${totalTasks} tasks
|
||||
**Remaining tasks:**
|
||||
${remainingTasksList}
|
||||
|
||||
Please continue from where you left off and complete all remaining tasks. Use the same [TASK_START:ID] and [TASK_COMPLETE:ID] markers for each task.`;
|
||||
|
||||
await this.runAgentFn(
|
||||
workDir,
|
||||
featureId,
|
||||
continuationPrompt,
|
||||
abortController,
|
||||
projectPath,
|
||||
undefined,
|
||||
model,
|
||||
{
|
||||
projectPath,
|
||||
planningMode: 'skip',
|
||||
requirePlanApproval: false,
|
||||
systemPrompt: combinedSystemPrompt || undefined,
|
||||
autoLoadClaudeMd,
|
||||
thinkingLevel: feature.thinkingLevel,
|
||||
branchName: feature.branchName ?? null,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
// Log if tasks are still incomplete after retry attempts
|
||||
if (taskRetryAttempts >= MAX_TASK_RETRY_ATTEMPTS) {
|
||||
const finalFeature = await this.loadFeatureFn(projectPath, featureId);
|
||||
const stillPending = finalFeature?.planSpec?.tasks?.filter(
|
||||
(t) => t.status === 'pending' || t.status === 'in_progress'
|
||||
);
|
||||
if (stillPending && stillPending.length > 0) {
|
||||
logger.warn(
|
||||
`[executeFeature] Feature ${featureId} still has ${stillPending.length} incomplete tasks after ${MAX_TASK_RETRY_ATTEMPTS} retry attempts. Moving to final status.`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const pipelineConfig = await pipelineService.getPipelineConfig(projectPath);
|
||||
const excludedStepIds = new Set(feature.excludedPipelineSteps || []);
|
||||
const sortedSteps = [...(pipelineConfig?.steps || [])]
|
||||
@@ -300,6 +378,13 @@ ${feature.spec}
|
||||
await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
|
||||
this.recordSuccessFn();
|
||||
|
||||
// Check final task completion state for accurate reporting
|
||||
const completedFeature = await this.loadFeatureFn(projectPath, featureId);
|
||||
const totalTasks = completedFeature?.planSpec?.tasks?.length ?? 0;
|
||||
const completedTasks =
|
||||
completedFeature?.planSpec?.tasks?.filter((t) => t.status === 'completed').length ?? 0;
|
||||
const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
|
||||
|
||||
try {
|
||||
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
|
||||
let agentOutput = '';
|
||||
@@ -326,12 +411,18 @@ ${feature.spec}
|
||||
/* learnings recording failed */
|
||||
}
|
||||
|
||||
const elapsedSeconds = Math.round((Date.now() - tempRunningFeature.startTime) / 1000);
|
||||
let completionMessage = `Feature completed in ${elapsedSeconds}s`;
|
||||
if (finalStatus === 'verified') completionMessage += ' - auto-verified';
|
||||
if (hasIncompleteTasks)
|
||||
completionMessage += ` (${completedTasks}/${totalTasks} tasks completed)`;
|
||||
|
||||
this.eventBus.emitAutoModeEvent('auto_mode_feature_complete', {
|
||||
featureId,
|
||||
featureName: feature.title,
|
||||
branchName: feature.branchName ?? null,
|
||||
passes: true,
|
||||
message: `Feature completed in ${Math.round((Date.now() - tempRunningFeature.startTime) / 1000)}s${finalStatus === 'verified' ? ' - auto-verified' : ''}`,
|
||||
message: completionMessage,
|
||||
projectPath,
|
||||
model: tempRunningFeature.model,
|
||||
provider: tempRunningFeature.provider,
|
||||
|
||||
@@ -115,10 +115,13 @@ export class FeatureStateManager {
|
||||
// This prevents cards in "waiting for review" from appearing to still have running tasks
|
||||
if (feature.planSpec?.tasks) {
|
||||
let tasksFinalized = 0;
|
||||
let tasksPending = 0;
|
||||
for (const task of feature.planSpec.tasks) {
|
||||
if (task.status === 'in_progress') {
|
||||
task.status = 'completed';
|
||||
tasksFinalized++;
|
||||
} else if (task.status === 'pending') {
|
||||
tasksPending++;
|
||||
}
|
||||
}
|
||||
if (tasksFinalized > 0) {
|
||||
@@ -126,6 +129,11 @@ export class FeatureStateManager {
|
||||
`[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to waiting_approval`
|
||||
);
|
||||
}
|
||||
if (tasksPending > 0) {
|
||||
logger.warn(
|
||||
`[updateFeatureStatus] Feature ${featureId} moving to waiting_approval with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total`
|
||||
);
|
||||
}
|
||||
// Update tasksCompleted count to reflect actual completed tasks
|
||||
feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter(
|
||||
(t) => t.status === 'completed'
|
||||
@@ -136,11 +144,26 @@ export class FeatureStateManager {
|
||||
// Also finalize in_progress tasks when moving directly to verified (skipTests=false)
|
||||
// Do NOT mark pending tasks as completed - they were never started
|
||||
if (feature.planSpec?.tasks) {
|
||||
let tasksFinalized = 0;
|
||||
let tasksPending = 0;
|
||||
for (const task of feature.planSpec.tasks) {
|
||||
if (task.status === 'in_progress') {
|
||||
task.status = 'completed';
|
||||
tasksFinalized++;
|
||||
} else if (task.status === 'pending') {
|
||||
tasksPending++;
|
||||
}
|
||||
}
|
||||
if (tasksFinalized > 0) {
|
||||
logger.info(
|
||||
`[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to verified`
|
||||
);
|
||||
}
|
||||
if (tasksPending > 0) {
|
||||
logger.warn(
|
||||
`[updateFeatureStatus] Feature ${featureId} moving to verified with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total`
|
||||
);
|
||||
}
|
||||
feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter(
|
||||
(t) => t.status === 'completed'
|
||||
).length;
|
||||
|
||||
@@ -677,6 +677,302 @@ describe('execution-service.ts', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('executeFeature - incomplete task retry', () => {
|
||||
const createServiceWithMocks = () => {
|
||||
return new ExecutionService(
|
||||
mockEventBus,
|
||||
mockConcurrencyManager,
|
||||
mockWorktreeResolver,
|
||||
mockSettingsService,
|
||||
mockRunAgentFn,
|
||||
mockExecutePipelineFn,
|
||||
mockUpdateFeatureStatusFn,
|
||||
mockLoadFeatureFn,
|
||||
mockGetPlanningPromptPrefixFn,
|
||||
mockSaveFeatureSummaryFn,
|
||||
mockRecordLearningsFn,
|
||||
mockContextExistsFn,
|
||||
mockResumeFeatureFn,
|
||||
mockTrackFailureFn,
|
||||
mockSignalPauseFn,
|
||||
mockRecordSuccessFn,
|
||||
mockSaveExecutionStateFn,
|
||||
mockLoadContextFilesFn
|
||||
);
|
||||
};
|
||||
|
||||
it('does not re-run agent when feature has no tasks', async () => {
|
||||
// Feature with no planSpec/tasks - should complete normally with 1 agent call
|
||||
mockLoadFeatureFn = vi.fn().mockResolvedValue(testFeature);
|
||||
const svc = createServiceWithMocks();
|
||||
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does not re-run agent when all tasks are completed', async () => {
|
||||
const featureWithCompletedTasks: Feature = {
|
||||
...testFeature,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
|
||||
],
|
||||
tasksCompleted: 2,
|
||||
},
|
||||
};
|
||||
mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithCompletedTasks);
|
||||
const svc = createServiceWithMocks();
|
||||
|
||||
await svc.executeFeature('/test/project', 'feature-1');
|
||||
|
||||
// Only the initial agent call + the approved-plan recursive call
|
||||
// The approved plan triggers recursive executeFeature, so runAgentFn is called once in the inner call
|
||||
expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('re-runs agent when there are pending tasks after initial execution', async () => {
|
||||
const featureWithPendingTasks: Feature = {
|
||||
...testFeature,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
|
||||
{ id: 'T003', title: 'Task 3', status: 'pending', description: 'Third task' },
|
||||
],
|
||||
tasksCompleted: 1,
|
||||
},
|
||||
};
|
||||
|
||||
// After first agent run, loadFeature returns feature with pending tasks
|
||||
// After second agent run, loadFeature returns feature with all tasks completed
|
||||
const featureAllDone: Feature = {
|
||||
...testFeature,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
|
||||
{ id: 'T003', title: 'Task 3', status: 'completed', description: 'Third task' },
|
||||
],
|
||||
tasksCompleted: 3,
|
||||
},
|
||||
};
|
||||
|
||||
let loadCallCount = 0;
|
||||
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
|
||||
loadCallCount++;
|
||||
// First call: initial feature load at the top of executeFeature
|
||||
// Second call: after first agent run (check for incomplete tasks) - has pending tasks
|
||||
// Third call: after second agent run (check for incomplete tasks) - all done
|
||||
if (loadCallCount <= 2) return featureWithPendingTasks;
|
||||
return featureAllDone;
|
||||
});
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
|
||||
continuationPrompt: 'Continue',
|
||||
_calledInternally: true,
|
||||
});
|
||||
|
||||
// Should have called runAgentFn twice: initial + one retry
|
||||
expect(mockRunAgentFn).toHaveBeenCalledTimes(2);
|
||||
|
||||
// The retry call should contain continuation prompt about incomplete tasks
|
||||
const retryCallArgs = mockRunAgentFn.mock.calls[1];
|
||||
expect(retryCallArgs[2]).toContain('Continue Implementation - Incomplete Tasks');
|
||||
expect(retryCallArgs[2]).toContain('T002');
|
||||
expect(retryCallArgs[2]).toContain('T003');
|
||||
|
||||
// Should have emitted a progress event about retrying
|
||||
expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
|
||||
'auto_mode_progress',
|
||||
expect.objectContaining({
|
||||
featureId: 'feature-1',
|
||||
content: expect.stringContaining('Re-running to complete tasks'),
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
it('respects maximum retry attempts', async () => {
|
||||
const featureAlwaysPending: Feature = {
|
||||
...testFeature,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
|
||||
],
|
||||
tasksCompleted: 1,
|
||||
},
|
||||
};
|
||||
|
||||
// Always return feature with pending tasks (agent never completes T002)
|
||||
mockLoadFeatureFn = vi.fn().mockResolvedValue(featureAlwaysPending);
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
|
||||
continuationPrompt: 'Continue',
|
||||
_calledInternally: true,
|
||||
});
|
||||
|
||||
// Initial run + 3 retry attempts = 4 total
|
||||
expect(mockRunAgentFn).toHaveBeenCalledTimes(4);
|
||||
|
||||
// Should still set final status even with incomplete tasks
|
||||
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
|
||||
'/test/project',
|
||||
'feature-1',
|
||||
'verified'
|
||||
);
|
||||
});
|
||||
|
||||
it('stops retrying when abort signal is triggered', async () => {
|
||||
const featureWithPendingTasks: Feature = {
|
||||
...testFeature,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
|
||||
],
|
||||
tasksCompleted: 1,
|
||||
},
|
||||
};
|
||||
|
||||
mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithPendingTasks);
|
||||
|
||||
// Simulate abort after first agent run
|
||||
let runCount = 0;
|
||||
const capturedAbortController = { current: null as AbortController | null };
|
||||
mockRunAgentFn = vi.fn().mockImplementation((_wd, _fid, _prompt, abortCtrl) => {
|
||||
capturedAbortController.current = abortCtrl;
|
||||
runCount++;
|
||||
if (runCount >= 1) {
|
||||
// Abort after first run
|
||||
abortCtrl.abort();
|
||||
}
|
||||
return Promise.resolve();
|
||||
});
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
|
||||
continuationPrompt: 'Continue',
|
||||
_calledInternally: true,
|
||||
});
|
||||
|
||||
// Should only have the initial run, then abort prevents retries
|
||||
expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('re-runs agent for in_progress tasks (not just pending)', async () => {
|
||||
const featureWithInProgressTask: Feature = {
|
||||
...testFeature,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'in_progress', description: 'Second task' },
|
||||
],
|
||||
tasksCompleted: 1,
|
||||
currentTaskId: 'T002',
|
||||
},
|
||||
};
|
||||
|
||||
const featureAllDone: Feature = {
|
||||
...testFeature,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
|
||||
],
|
||||
tasksCompleted: 2,
|
||||
},
|
||||
};
|
||||
|
||||
let loadCallCount = 0;
|
||||
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
|
||||
loadCallCount++;
|
||||
if (loadCallCount <= 2) return featureWithInProgressTask;
|
||||
return featureAllDone;
|
||||
});
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
|
||||
continuationPrompt: 'Continue',
|
||||
_calledInternally: true,
|
||||
});
|
||||
|
||||
// Should have retried for the in_progress task
|
||||
expect(mockRunAgentFn).toHaveBeenCalledTimes(2);
|
||||
|
||||
// The retry prompt should mention the in_progress task
|
||||
const retryCallArgs = mockRunAgentFn.mock.calls[1];
|
||||
expect(retryCallArgs[2]).toContain('T002');
|
||||
expect(retryCallArgs[2]).toContain('in_progress');
|
||||
});
|
||||
|
||||
it('uses planningMode skip and no plan approval for retry runs', async () => {
|
||||
const featureWithPendingTasks: Feature = {
|
||||
...testFeature,
|
||||
planningMode: 'full',
|
||||
requirePlanApproval: true,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
|
||||
],
|
||||
tasksCompleted: 1,
|
||||
},
|
||||
};
|
||||
|
||||
const featureAllDone: Feature = {
|
||||
...testFeature,
|
||||
planSpec: {
|
||||
status: 'approved',
|
||||
content: 'Plan',
|
||||
tasks: [
|
||||
{ id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
|
||||
{ id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
|
||||
],
|
||||
tasksCompleted: 2,
|
||||
},
|
||||
};
|
||||
|
||||
let loadCallCount = 0;
|
||||
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
|
||||
loadCallCount++;
|
||||
if (loadCallCount <= 2) return featureWithPendingTasks;
|
||||
return featureAllDone;
|
||||
});
|
||||
|
||||
const svc = createServiceWithMocks();
|
||||
await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
|
||||
continuationPrompt: 'Continue',
|
||||
_calledInternally: true,
|
||||
});
|
||||
|
||||
// The retry agent call should use planningMode: 'skip' and requirePlanApproval: false
|
||||
const retryCallArgs = mockRunAgentFn.mock.calls[1];
|
||||
const retryOptions = retryCallArgs[7]; // options object
|
||||
expect(retryOptions.planningMode).toBe('skip');
|
||||
expect(retryOptions.requirePlanApproval).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('executeFeature - error handling', () => {
|
||||
it('classifies and emits error event', async () => {
|
||||
const testError = new Error('Test error');
|
||||
|
||||
@@ -164,13 +164,16 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({
|
||||
const currentTaskId = planSpec.currentTaskId;
|
||||
|
||||
return planSpec.tasks.map((task: ParsedTask, index: number) => {
|
||||
// If the feature is done (waiting_approval/verified), all tasks are completed
|
||||
// This is a defensive UI-side check: the server should have already finalized
|
||||
// task statuses, but stale data from before the fix could still show spinners
|
||||
// When feature is finished (waiting_approval/verified), finalize task display:
|
||||
// - in_progress tasks → completed (agent was working on them when it finished)
|
||||
// - pending tasks stay pending (they were never started)
|
||||
// - completed tasks stay completed
|
||||
// This matches server-side behavior in feature-state-manager.ts
|
||||
if (isFeatureFinished) {
|
||||
const finalStatus = task.status === 'in_progress' ? 'completed' : task.status;
|
||||
return {
|
||||
content: task.description,
|
||||
status: 'completed' as const,
|
||||
status: (finalStatus || 'completed') as 'pending' | 'in_progress' | 'completed',
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user