feat: Add task retry logic and improve max turns limit

2026-03-16 21:53:07 +00:00 · 2026-02-16 22:10:50 -08:00
parent 30fce3f746
commit aa940d44ff
5 changed files with 419 additions and 6 deletions
--- a/apps/server/src/services/agent-executor.ts
+++ b/apps/server/src/services/agent-executor.ts
@@ -331,7 +331,7 @@ export class AgentExecutor {
        userFeedback
      );
      const taskStream = provider.executeQuery(
-        this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 50))
+        this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 150))
      );
      let taskOutput = '',
        taskStartDetected = false,
--- a/apps/server/src/services/execution-service.ts
+++ b/apps/server/src/services/execution-service.ts
@@ -270,6 +270,84 @@ ${feature.spec}
        }
      );

+      // Check for incomplete tasks after agent execution.
+      // The agent may have finished early (hit max turns, decided it was done, etc.)
+      // while tasks are still pending. If so, re-run the agent to complete remaining tasks.
+      const MAX_TASK_RETRY_ATTEMPTS = 3;
+      let taskRetryAttempts = 0;
+      while (!abortController.signal.aborted && taskRetryAttempts < MAX_TASK_RETRY_ATTEMPTS) {
+        const currentFeature = await this.loadFeatureFn(projectPath, featureId);
+        if (!currentFeature?.planSpec?.tasks) break;
+
+        const pendingTasks = currentFeature.planSpec.tasks.filter(
+          (t) => t.status === 'pending' || t.status === 'in_progress'
+        );
+        if (pendingTasks.length === 0) break;
+
+        taskRetryAttempts++;
+        const totalTasks = currentFeature.planSpec.tasks.length;
+        const completedTasks = currentFeature.planSpec.tasks.filter(
+          (t) => t.status === 'completed'
+        ).length;
+        logger.info(
+          `[executeFeature] Feature ${featureId} has ${pendingTasks.length} incomplete tasks (${completedTasks}/${totalTasks} completed). Re-running agent (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})`
+        );
+
+        this.eventBus.emitAutoModeEvent('auto_mode_progress', {
+          featureId,
+          branchName: feature.branchName ?? null,
+          content: `Agent finished with ${pendingTasks.length} tasks remaining. Re-running to complete tasks (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})...`,
+          projectPath,
+        });
+
+        // Build a continuation prompt that tells the agent to finish remaining tasks
+        const remainingTasksList = pendingTasks
+          .map((t) => `- ${t.id}: ${t.description} (${t.status})`)
+          .join('\n');
+
+        const continuationPrompt = `## Continue Implementation - Incomplete Tasks
+
+The previous agent session ended before all tasks were completed. Please continue implementing the remaining tasks.
+
+**Completed:** ${completedTasks}/${totalTasks} tasks
+**Remaining tasks:**
+${remainingTasksList}
+
+Please continue from where you left off and complete all remaining tasks. Use the same [TASK_START:ID] and [TASK_COMPLETE:ID] markers for each task.`;
+
+        await this.runAgentFn(
+          workDir,
+          featureId,
+          continuationPrompt,
+          abortController,
+          projectPath,
+          undefined,
+          model,
+          {
+            projectPath,
+            planningMode: 'skip',
+            requirePlanApproval: false,
+            systemPrompt: combinedSystemPrompt || undefined,
+            autoLoadClaudeMd,
+            thinkingLevel: feature.thinkingLevel,
+            branchName: feature.branchName ?? null,
+          }
+        );
+      }
+
+      // Log if tasks are still incomplete after retry attempts
+      if (taskRetryAttempts >= MAX_TASK_RETRY_ATTEMPTS) {
+        const finalFeature = await this.loadFeatureFn(projectPath, featureId);
+        const stillPending = finalFeature?.planSpec?.tasks?.filter(
+          (t) => t.status === 'pending' || t.status === 'in_progress'
+        );
+        if (stillPending && stillPending.length > 0) {
+          logger.warn(
+            `[executeFeature] Feature ${featureId} still has ${stillPending.length} incomplete tasks after ${MAX_TASK_RETRY_ATTEMPTS} retry attempts. Moving to final status.`
+          );
+        }
+      }
+
      const pipelineConfig = await pipelineService.getPipelineConfig(projectPath);
      const excludedStepIds = new Set(feature.excludedPipelineSteps || []);
      const sortedSteps = [...(pipelineConfig?.steps || [])]
@@ -300,6 +378,13 @@ ${feature.spec}
      await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
      this.recordSuccessFn();

+      // Check final task completion state for accurate reporting
+      const completedFeature = await this.loadFeatureFn(projectPath, featureId);
+      const totalTasks = completedFeature?.planSpec?.tasks?.length ?? 0;
+      const completedTasks =
+        completedFeature?.planSpec?.tasks?.filter((t) => t.status === 'completed').length ?? 0;
+      const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
+
      try {
        const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
        let agentOutput = '';
@@ -326,12 +411,18 @@ ${feature.spec}
        /* learnings recording failed */
      }

+      const elapsedSeconds = Math.round((Date.now() - tempRunningFeature.startTime) / 1000);
+      let completionMessage = `Feature completed in ${elapsedSeconds}s`;
+      if (finalStatus === 'verified') completionMessage += ' - auto-verified';
+      if (hasIncompleteTasks)
+        completionMessage += ` (${completedTasks}/${totalTasks} tasks completed)`;
+
      this.eventBus.emitAutoModeEvent('auto_mode_feature_complete', {
        featureId,
        featureName: feature.title,
        branchName: feature.branchName ?? null,
        passes: true,
-        message: `Feature completed in ${Math.round((Date.now() - tempRunningFeature.startTime) / 1000)}s${finalStatus === 'verified' ? ' - auto-verified' : ''}`,
+        message: completionMessage,
        projectPath,
        model: tempRunningFeature.model,
        provider: tempRunningFeature.provider,
--- a/apps/server/src/services/feature-state-manager.ts
+++ b/apps/server/src/services/feature-state-manager.ts
@@ -115,10 +115,13 @@ export class FeatureStateManager {
        // This prevents cards in "waiting for review" from appearing to still have running tasks
        if (feature.planSpec?.tasks) {
          let tasksFinalized = 0;
+          let tasksPending = 0;
          for (const task of feature.planSpec.tasks) {
            if (task.status === 'in_progress') {
              task.status = 'completed';
              tasksFinalized++;
+            } else if (task.status === 'pending') {
+              tasksPending++;
            }
          }
          if (tasksFinalized > 0) {
@@ -126,6 +129,11 @@ export class FeatureStateManager {
              `[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to waiting_approval`
            );
          }
+          if (tasksPending > 0) {
+            logger.warn(
+              `[updateFeatureStatus] Feature ${featureId} moving to waiting_approval with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total`
+            );
+          }
          // Update tasksCompleted count to reflect actual completed tasks
          feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter(
            (t) => t.status === 'completed'
@@ -136,11 +144,26 @@ export class FeatureStateManager {
        // Also finalize in_progress tasks when moving directly to verified (skipTests=false)
        // Do NOT mark pending tasks as completed - they were never started
        if (feature.planSpec?.tasks) {
+          let tasksFinalized = 0;
+          let tasksPending = 0;
          for (const task of feature.planSpec.tasks) {
            if (task.status === 'in_progress') {
              task.status = 'completed';
+              tasksFinalized++;
+            } else if (task.status === 'pending') {
+              tasksPending++;
            }
          }
+          if (tasksFinalized > 0) {
+            logger.info(
+              `[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to verified`
+            );
+          }
+          if (tasksPending > 0) {
+            logger.warn(
+              `[updateFeatureStatus] Feature ${featureId} moving to verified with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total`
+            );
+          }
          feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter(
            (t) => t.status === 'completed'
          ).length;
--- a/apps/server/tests/unit/services/execution-service.test.ts
+++ b/apps/server/tests/unit/services/execution-service.test.ts
@@ -677,6 +677,302 @@ describe('execution-service.ts', () => {
    });
  });

+  describe('executeFeature - incomplete task retry', () => {
+    const createServiceWithMocks = () => {
+      return new ExecutionService(
+        mockEventBus,
+        mockConcurrencyManager,
+        mockWorktreeResolver,
+        mockSettingsService,
+        mockRunAgentFn,
+        mockExecutePipelineFn,
+        mockUpdateFeatureStatusFn,
+        mockLoadFeatureFn,
+        mockGetPlanningPromptPrefixFn,
+        mockSaveFeatureSummaryFn,
+        mockRecordLearningsFn,
+        mockContextExistsFn,
+        mockResumeFeatureFn,
+        mockTrackFailureFn,
+        mockSignalPauseFn,
+        mockRecordSuccessFn,
+        mockSaveExecutionStateFn,
+        mockLoadContextFilesFn
+      );
+    };
+
+    it('does not re-run agent when feature has no tasks', async () => {
+      // Feature with no planSpec/tasks - should complete normally with 1 agent call
+      mockLoadFeatureFn = vi.fn().mockResolvedValue(testFeature);
+      const svc = createServiceWithMocks();
+
+      await svc.executeFeature('/test/project', 'feature-1');
+
+      expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
+    });
+
+    it('does not re-run agent when all tasks are completed', async () => {
+      const featureWithCompletedTasks: Feature = {
+        ...testFeature,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
+          ],
+          tasksCompleted: 2,
+        },
+      };
+      mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithCompletedTasks);
+      const svc = createServiceWithMocks();
+
+      await svc.executeFeature('/test/project', 'feature-1');
+
+      // Only the initial agent call + the approved-plan recursive call
+      // The approved plan triggers recursive executeFeature, so runAgentFn is called once in the inner call
+      expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
+    });
+
+    it('re-runs agent when there are pending tasks after initial execution', async () => {
+      const featureWithPendingTasks: Feature = {
+        ...testFeature,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
+            { id: 'T003', title: 'Task 3', status: 'pending', description: 'Third task' },
+          ],
+          tasksCompleted: 1,
+        },
+      };
+
+      // After first agent run, loadFeature returns feature with pending tasks
+      // After second agent run, loadFeature returns feature with all tasks completed
+      const featureAllDone: Feature = {
+        ...testFeature,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
+            { id: 'T003', title: 'Task 3', status: 'completed', description: 'Third task' },
+          ],
+          tasksCompleted: 3,
+        },
+      };
+
+      let loadCallCount = 0;
+      mockLoadFeatureFn = vi.fn().mockImplementation(() => {
+        loadCallCount++;
+        // First call: initial feature load at the top of executeFeature
+        // Second call: after first agent run (check for incomplete tasks) - has pending tasks
+        // Third call: after second agent run (check for incomplete tasks) - all done
+        if (loadCallCount <= 2) return featureWithPendingTasks;
+        return featureAllDone;
+      });
+
+      const svc = createServiceWithMocks();
+      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
+        continuationPrompt: 'Continue',
+        _calledInternally: true,
+      });
+
+      // Should have called runAgentFn twice: initial + one retry
+      expect(mockRunAgentFn).toHaveBeenCalledTimes(2);
+
+      // The retry call should contain continuation prompt about incomplete tasks
+      const retryCallArgs = mockRunAgentFn.mock.calls[1];
+      expect(retryCallArgs[2]).toContain('Continue Implementation - Incomplete Tasks');
+      expect(retryCallArgs[2]).toContain('T002');
+      expect(retryCallArgs[2]).toContain('T003');
+
+      // Should have emitted a progress event about retrying
+      expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
+        'auto_mode_progress',
+        expect.objectContaining({
+          featureId: 'feature-1',
+          content: expect.stringContaining('Re-running to complete tasks'),
+        })
+      );
+    });
+
+    it('respects maximum retry attempts', async () => {
+      const featureAlwaysPending: Feature = {
+        ...testFeature,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
+          ],
+          tasksCompleted: 1,
+        },
+      };
+
+      // Always return feature with pending tasks (agent never completes T002)
+      mockLoadFeatureFn = vi.fn().mockResolvedValue(featureAlwaysPending);
+
+      const svc = createServiceWithMocks();
+      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
+        continuationPrompt: 'Continue',
+        _calledInternally: true,
+      });
+
+      // Initial run + 3 retry attempts = 4 total
+      expect(mockRunAgentFn).toHaveBeenCalledTimes(4);
+
+      // Should still set final status even with incomplete tasks
+      expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
+        '/test/project',
+        'feature-1',
+        'verified'
+      );
+    });
+
+    it('stops retrying when abort signal is triggered', async () => {
+      const featureWithPendingTasks: Feature = {
+        ...testFeature,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
+          ],
+          tasksCompleted: 1,
+        },
+      };
+
+      mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithPendingTasks);
+
+      // Simulate abort after first agent run
+      let runCount = 0;
+      const capturedAbortController = { current: null as AbortController | null };
+      mockRunAgentFn = vi.fn().mockImplementation((_wd, _fid, _prompt, abortCtrl) => {
+        capturedAbortController.current = abortCtrl;
+        runCount++;
+        if (runCount >= 1) {
+          // Abort after first run
+          abortCtrl.abort();
+        }
+        return Promise.resolve();
+      });
+
+      const svc = createServiceWithMocks();
+      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
+        continuationPrompt: 'Continue',
+        _calledInternally: true,
+      });
+
+      // Should only have the initial run, then abort prevents retries
+      expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
+    });
+
+    it('re-runs agent for in_progress tasks (not just pending)', async () => {
+      const featureWithInProgressTask: Feature = {
+        ...testFeature,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'in_progress', description: 'Second task' },
+          ],
+          tasksCompleted: 1,
+          currentTaskId: 'T002',
+        },
+      };
+
+      const featureAllDone: Feature = {
+        ...testFeature,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
+          ],
+          tasksCompleted: 2,
+        },
+      };
+
+      let loadCallCount = 0;
+      mockLoadFeatureFn = vi.fn().mockImplementation(() => {
+        loadCallCount++;
+        if (loadCallCount <= 2) return featureWithInProgressTask;
+        return featureAllDone;
+      });
+
+      const svc = createServiceWithMocks();
+      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
+        continuationPrompt: 'Continue',
+        _calledInternally: true,
+      });
+
+      // Should have retried for the in_progress task
+      expect(mockRunAgentFn).toHaveBeenCalledTimes(2);
+
+      // The retry prompt should mention the in_progress task
+      const retryCallArgs = mockRunAgentFn.mock.calls[1];
+      expect(retryCallArgs[2]).toContain('T002');
+      expect(retryCallArgs[2]).toContain('in_progress');
+    });
+
+    it('uses planningMode skip and no plan approval for retry runs', async () => {
+      const featureWithPendingTasks: Feature = {
+        ...testFeature,
+        planningMode: 'full',
+        requirePlanApproval: true,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
+          ],
+          tasksCompleted: 1,
+        },
+      };
+
+      const featureAllDone: Feature = {
+        ...testFeature,
+        planSpec: {
+          status: 'approved',
+          content: 'Plan',
+          tasks: [
+            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
+            { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
+          ],
+          tasksCompleted: 2,
+        },
+      };
+
+      let loadCallCount = 0;
+      mockLoadFeatureFn = vi.fn().mockImplementation(() => {
+        loadCallCount++;
+        if (loadCallCount <= 2) return featureWithPendingTasks;
+        return featureAllDone;
+      });
+
+      const svc = createServiceWithMocks();
+      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
+        continuationPrompt: 'Continue',
+        _calledInternally: true,
+      });
+
+      // The retry agent call should use planningMode: 'skip' and requirePlanApproval: false
+      const retryCallArgs = mockRunAgentFn.mock.calls[1];
+      const retryOptions = retryCallArgs[7]; // options object
+      expect(retryOptions.planningMode).toBe('skip');
+      expect(retryOptions.requirePlanApproval).toBe(false);
+    });
+  });
+
  describe('executeFeature - error handling', () => {
    it('classifies and emits error event', async () => {
      const testError = new Error('Test error');
--- a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx
+++ b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx
@@ -164,13 +164,16 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({
      const currentTaskId = planSpec.currentTaskId;

      return planSpec.tasks.map((task: ParsedTask, index: number) => {
-        // If the feature is done (waiting_approval/verified), all tasks are completed
-        // This is a defensive UI-side check: the server should have already finalized
-        // task statuses, but stale data from before the fix could still show spinners
+        // When feature is finished (waiting_approval/verified), finalize task display:
+        // - in_progress tasks → completed (agent was working on them when it finished)
+        // - pending tasks stay pending (they were never started)
+        // - completed tasks stay completed
+        // This matches server-side behavior in feature-state-manager.ts
        if (isFeatureFinished) {
+          const finalStatus = task.status === 'in_progress' ? 'completed' : task.status;
          return {
            content: task.description,
-            status: 'completed' as const,
+            status: (finalStatus || 'completed') as 'pending' | 'in_progress' | 'completed',
          };
        }