Merge pull request #778 from gsxdsm/fix/features-completed-too-soon

feat: Add task retry logic and improve max turns limit
2026-03-21 23:33:07 +00:00 · 2026-02-16 23:11:32 -08:00
parent 381698b048 f06088a062
commit 57446b4fba
7 changed files with 423 additions and 9 deletions
--- a/apps/server/src/providers/claude-provider.ts
+++ b/apps/server/src/providers/claude-provider.ts
@@ -204,7 +204,7 @@ export class ClaudeProvider extends BaseProvider {
      model,
      cwd,
      systemPrompt,
-      maxTurns = 20,
+      maxTurns = 100,
      allowedTools,
      abortController,
      conversationHistory,
--- a/apps/server/src/services/agent-executor.ts
+++ b/apps/server/src/services/agent-executor.ts
@@ -331,7 +331,7 @@ export class AgentExecutor {
        userFeedback
      );
      const taskStream = provider.executeQuery(
-        this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 50, 50))
+        this.buildExecOpts(options, taskPrompt, Math.min(sdkOptions?.maxTurns ?? 100, 100))
      );
      let taskOutput = '',
        taskStartDetected = false,
--- a/apps/server/src/services/execution-service.ts
+++ b/apps/server/src/services/execution-service.ts
@@ -270,6 +270,84 @@ ${feature.spec}
        }
      );
      // Check for incomplete tasks after agent execution.
      // The agent may have finished early (hit max turns, decided it was done, etc.)
      // while tasks are still pending. If so, re-run the agent to complete remaining tasks.
      const MAX_TASK_RETRY_ATTEMPTS = 3;
      let taskRetryAttempts = 0;
      while (!abortController.signal.aborted && taskRetryAttempts < MAX_TASK_RETRY_ATTEMPTS) {
        const currentFeature = await this.loadFeatureFn(projectPath, featureId);
        if (!currentFeature?.planSpec?.tasks) break;
        const pendingTasks = currentFeature.planSpec.tasks.filter(
          (t) => t.status === 'pending' || t.status === 'in_progress'
        );
        if (pendingTasks.length === 0) break;
        taskRetryAttempts++;
        const totalTasks = currentFeature.planSpec.tasks.length;
        const completedTasks = currentFeature.planSpec.tasks.filter(
          (t) => t.status === 'completed'
        ).length;
        logger.info(
          `[executeFeature] Feature ${featureId} has ${pendingTasks.length} incomplete tasks (${completedTasks}/${totalTasks} completed). Re-running agent (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})`
        );
        this.eventBus.emitAutoModeEvent('auto_mode_progress', {
          featureId,
          branchName: feature.branchName ?? null,
          content: `Agent finished with ${pendingTasks.length} tasks remaining. Re-running to complete tasks (attempt ${taskRetryAttempts}/${MAX_TASK_RETRY_ATTEMPTS})...`,
          projectPath,
        });
        // Build a continuation prompt that tells the agent to finish remaining tasks
        const remainingTasksList = pendingTasks
          .map((t) => `- ${t.id}: ${t.description} (${t.status})`)
          .join('\n');
        const continuationPrompt = `## Continue Implementation - Incomplete Tasks
 The previous agent session ended before all tasks were completed. Please continue implementing the remaining tasks.
 **Completed:** ${completedTasks}/${totalTasks} tasks
 **Remaining tasks:**
 ${remainingTasksList}
 Please continue from where you left off and complete all remaining tasks. Use the same [TASK_START:ID] and [TASK_COMPLETE:ID] markers for each task.`;
        await this.runAgentFn(
          workDir,
          featureId,
          continuationPrompt,
          abortController,
          projectPath,
          undefined,
          model,
          {
            projectPath,
            planningMode: 'skip',
            requirePlanApproval: false,
            systemPrompt: combinedSystemPrompt || undefined,
            autoLoadClaudeMd,
            thinkingLevel: feature.thinkingLevel,
            branchName: feature.branchName ?? null,
          }
        );
      }
      // Log if tasks are still incomplete after retry attempts
      if (taskRetryAttempts >= MAX_TASK_RETRY_ATTEMPTS) {
        const finalFeature = await this.loadFeatureFn(projectPath, featureId);
        const stillPending = finalFeature?.planSpec?.tasks?.filter(
          (t) => t.status === 'pending' || t.status === 'in_progress'
        );
        if (stillPending && stillPending.length > 0) {
          logger.warn(
            `[executeFeature] Feature ${featureId} still has ${stillPending.length} incomplete tasks after ${MAX_TASK_RETRY_ATTEMPTS} retry attempts. Moving to final status.`
          );
        }
      }
      const pipelineConfig = await pipelineService.getPipelineConfig(projectPath);
      const excludedStepIds = new Set(feature.excludedPipelineSteps || []);
      const sortedSteps = [...(pipelineConfig?.steps || [])]
@@ -300,6 +378,13 @@ ${feature.spec}
      await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
      this.recordSuccessFn();
      // Check final task completion state for accurate reporting
      const completedFeature = await this.loadFeatureFn(projectPath, featureId);
      const totalTasks = completedFeature?.planSpec?.tasks?.length ?? 0;
      const completedTasks =
        completedFeature?.planSpec?.tasks?.filter((t) => t.status === 'completed').length ?? 0;
      const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
      try {
        const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
        let agentOutput = '';
@@ -326,12 +411,18 @@ ${feature.spec}
        /* learnings recording failed */
      }
      const elapsedSeconds = Math.round((Date.now() - tempRunningFeature.startTime) / 1000);
      let completionMessage = `Feature completed in ${elapsedSeconds}s`;
      if (finalStatus === 'verified') completionMessage += ' - auto-verified';
      if (hasIncompleteTasks)
        completionMessage += ` (${completedTasks}/${totalTasks} tasks completed)`;
      this.eventBus.emitAutoModeEvent('auto_mode_feature_complete', {
        featureId,
        featureName: feature.title,
        branchName: feature.branchName ?? null,
        passes: true,
-        message: `Feature completed in ${Math.round((Date.now() - tempRunningFeature.startTime) / 1000)}s${finalStatus === 'verified' ? ' - auto-verified' : ''}`,
+        message: completionMessage,
        projectPath,
        model: tempRunningFeature.model,
        provider: tempRunningFeature.provider,
--- a/apps/server/src/services/feature-state-manager.ts
+++ b/apps/server/src/services/feature-state-manager.ts
@@ -115,10 +115,13 @@ export class FeatureStateManager {
        // This prevents cards in "waiting for review" from appearing to still have running tasks
        if (feature.planSpec?.tasks) {
          let tasksFinalized = 0;
          let tasksPending = 0;
          for (const task of feature.planSpec.tasks) {
            if (task.status === 'in_progress') {
              task.status = 'completed';
              tasksFinalized++;
            } else if (task.status === 'pending') {
              tasksPending++;
            }
          }
          if (tasksFinalized > 0) {
@@ -126,6 +129,11 @@ export class FeatureStateManager {
              `[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to waiting_approval`
            );
          }
          if (tasksPending > 0) {
            logger.warn(
              `[updateFeatureStatus] Feature ${featureId} moving to waiting_approval with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total`
            );
          }
          // Update tasksCompleted count to reflect actual completed tasks
          feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter(
            (t) => t.status === 'completed'
@@ -136,11 +144,26 @@ export class FeatureStateManager {
        // Also finalize in_progress tasks when moving directly to verified (skipTests=false)
        // Do NOT mark pending tasks as completed - they were never started
        if (feature.planSpec?.tasks) {
          let tasksFinalized = 0;
          let tasksPending = 0;
          for (const task of feature.planSpec.tasks) {
            if (task.status === 'in_progress') {
              task.status = 'completed';
              tasksFinalized++;
            } else if (task.status === 'pending') {
              tasksPending++;
            }
          }
          if (tasksFinalized > 0) {
            logger.info(
              `[updateFeatureStatus] Finalized ${tasksFinalized} in_progress tasks for feature ${featureId} moving to verified`
            );
          }
          if (tasksPending > 0) {
            logger.warn(
              `[updateFeatureStatus] Feature ${featureId} moving to verified with ${tasksPending} pending (never started) tasks out of ${feature.planSpec.tasks.length} total`
            );
          }
          feature.planSpec.tasksCompleted = feature.planSpec.tasks.filter(
            (t) => t.status === 'completed'
          ).length;
--- a/apps/server/tests/unit/providers/claude-provider.test.ts
+++ b/apps/server/tests/unit/providers/claude-provider.test.ts
@@ -187,7 +187,7 @@ describe('claude-provider.ts', () => {
      expect(typeof callArgs.prompt).not.toBe('string');
    });
-    it('should use maxTurns default of 20', async () => {
+    it('should use maxTurns default of 100', async () => {
      vi.mocked(sdk.query).mockReturnValue(
        (async function* () {
          yield { type: 'text', text: 'test' };
@@ -205,7 +205,7 @@ describe('claude-provider.ts', () => {
      expect(sdk.query).toHaveBeenCalledWith({
        prompt: 'Test',
        options: expect.objectContaining({
-          maxTurns: 20,
+          maxTurns: 100,
        }),
      });
    });
--- a/apps/server/tests/unit/services/execution-service.test.ts
+++ b/apps/server/tests/unit/services/execution-service.test.ts
@@ -677,6 +677,302 @@ describe('execution-service.ts', () => {
    });
  });
  describe('executeFeature - incomplete task retry', () => {
    const createServiceWithMocks = () => {
      return new ExecutionService(
        mockEventBus,
        mockConcurrencyManager,
        mockWorktreeResolver,
        mockSettingsService,
        mockRunAgentFn,
        mockExecutePipelineFn,
        mockUpdateFeatureStatusFn,
        mockLoadFeatureFn,
        mockGetPlanningPromptPrefixFn,
        mockSaveFeatureSummaryFn,
        mockRecordLearningsFn,
        mockContextExistsFn,
        mockResumeFeatureFn,
        mockTrackFailureFn,
        mockSignalPauseFn,
        mockRecordSuccessFn,
        mockSaveExecutionStateFn,
        mockLoadContextFilesFn
      );
    };
    it('does not re-run agent when feature has no tasks', async () => {
      // Feature with no planSpec/tasks - should complete normally with 1 agent call
      mockLoadFeatureFn = vi.fn().mockResolvedValue(testFeature);
      const svc = createServiceWithMocks();
      await svc.executeFeature('/test/project', 'feature-1');
      expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
    });
    it('does not re-run agent when all tasks are completed', async () => {
      const featureWithCompletedTasks: Feature = {
        ...testFeature,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
          ],
          tasksCompleted: 2,
        },
      };
      mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithCompletedTasks);
      const svc = createServiceWithMocks();
      await svc.executeFeature('/test/project', 'feature-1');
      // Only the initial agent call + the approved-plan recursive call
      // The approved plan triggers recursive executeFeature, so runAgentFn is called once in the inner call
      expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
    });
    it('re-runs agent when there are pending tasks after initial execution', async () => {
      const featureWithPendingTasks: Feature = {
        ...testFeature,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
            { id: 'T003', title: 'Task 3', status: 'pending', description: 'Third task' },
          ],
          tasksCompleted: 1,
        },
      };
      // After first agent run, loadFeature returns feature with pending tasks
      // After second agent run, loadFeature returns feature with all tasks completed
      const featureAllDone: Feature = {
        ...testFeature,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
            { id: 'T003', title: 'Task 3', status: 'completed', description: 'Third task' },
          ],
          tasksCompleted: 3,
        },
      };
      let loadCallCount = 0;
      mockLoadFeatureFn = vi.fn().mockImplementation(() => {
        loadCallCount++;
        // First call: initial feature load at the top of executeFeature
        // Second call: after first agent run (check for incomplete tasks) - has pending tasks
        // Third call: after second agent run (check for incomplete tasks) - all done
        if (loadCallCount <= 2) return featureWithPendingTasks;
        return featureAllDone;
      });
      const svc = createServiceWithMocks();
      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
        continuationPrompt: 'Continue',
        _calledInternally: true,
      });
      // Should have called runAgentFn twice: initial + one retry
      expect(mockRunAgentFn).toHaveBeenCalledTimes(2);
      // The retry call should contain continuation prompt about incomplete tasks
      const retryCallArgs = mockRunAgentFn.mock.calls[1];
      expect(retryCallArgs[2]).toContain('Continue Implementation - Incomplete Tasks');
      expect(retryCallArgs[2]).toContain('T002');
      expect(retryCallArgs[2]).toContain('T003');
      // Should have emitted a progress event about retrying
      expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
        'auto_mode_progress',
        expect.objectContaining({
          featureId: 'feature-1',
          content: expect.stringContaining('Re-running to complete tasks'),
        })
      );
    });
    it('respects maximum retry attempts', async () => {
      const featureAlwaysPending: Feature = {
        ...testFeature,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
          ],
          tasksCompleted: 1,
        },
      };
      // Always return feature with pending tasks (agent never completes T002)
      mockLoadFeatureFn = vi.fn().mockResolvedValue(featureAlwaysPending);
      const svc = createServiceWithMocks();
      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
        continuationPrompt: 'Continue',
        _calledInternally: true,
      });
      // Initial run + 3 retry attempts = 4 total
      expect(mockRunAgentFn).toHaveBeenCalledTimes(4);
      // Should still set final status even with incomplete tasks
      expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
        '/test/project',
        'feature-1',
        'verified'
      );
    });
    it('stops retrying when abort signal is triggered', async () => {
      const featureWithPendingTasks: Feature = {
        ...testFeature,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
          ],
          tasksCompleted: 1,
        },
      };
      mockLoadFeatureFn = vi.fn().mockResolvedValue(featureWithPendingTasks);
      // Simulate abort after first agent run
      let runCount = 0;
      const capturedAbortController = { current: null as AbortController | null };
      mockRunAgentFn = vi.fn().mockImplementation((_wd, _fid, _prompt, abortCtrl) => {
        capturedAbortController.current = abortCtrl;
        runCount++;
        if (runCount >= 1) {
          // Abort after first run
          abortCtrl.abort();
        }
        return Promise.resolve();
      });
      const svc = createServiceWithMocks();
      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
        continuationPrompt: 'Continue',
        _calledInternally: true,
      });
      // Should only have the initial run, then abort prevents retries
      expect(mockRunAgentFn).toHaveBeenCalledTimes(1);
    });
    it('re-runs agent for in_progress tasks (not just pending)', async () => {
      const featureWithInProgressTask: Feature = {
        ...testFeature,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'in_progress', description: 'Second task' },
          ],
          tasksCompleted: 1,
          currentTaskId: 'T002',
        },
      };
      const featureAllDone: Feature = {
        ...testFeature,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
          ],
          tasksCompleted: 2,
        },
      };
      let loadCallCount = 0;
      mockLoadFeatureFn = vi.fn().mockImplementation(() => {
        loadCallCount++;
        if (loadCallCount <= 2) return featureWithInProgressTask;
        return featureAllDone;
      });
      const svc = createServiceWithMocks();
      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
        continuationPrompt: 'Continue',
        _calledInternally: true,
      });
      // Should have retried for the in_progress task
      expect(mockRunAgentFn).toHaveBeenCalledTimes(2);
      // The retry prompt should mention the in_progress task
      const retryCallArgs = mockRunAgentFn.mock.calls[1];
      expect(retryCallArgs[2]).toContain('T002');
      expect(retryCallArgs[2]).toContain('in_progress');
    });
    it('uses planningMode skip and no plan approval for retry runs', async () => {
      const featureWithPendingTasks: Feature = {
        ...testFeature,
        planningMode: 'full',
        requirePlanApproval: true,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'pending', description: 'Second task' },
          ],
          tasksCompleted: 1,
        },
      };
      const featureAllDone: Feature = {
        ...testFeature,
        planSpec: {
          status: 'approved',
          content: 'Plan',
          tasks: [
            { id: 'T001', title: 'Task 1', status: 'completed', description: 'First task' },
            { id: 'T002', title: 'Task 2', status: 'completed', description: 'Second task' },
          ],
          tasksCompleted: 2,
        },
      };
      let loadCallCount = 0;
      mockLoadFeatureFn = vi.fn().mockImplementation(() => {
        loadCallCount++;
        if (loadCallCount <= 2) return featureWithPendingTasks;
        return featureAllDone;
      });
      const svc = createServiceWithMocks();
      await svc.executeFeature('/test/project', 'feature-1', false, false, undefined, {
        continuationPrompt: 'Continue',
        _calledInternally: true,
      });
      // The retry agent call should use planningMode: 'skip' and requirePlanApproval: false
      const retryCallArgs = mockRunAgentFn.mock.calls[1];
      const retryOptions = retryCallArgs[7]; // options object
      expect(retryOptions.planningMode).toBe('skip');
      expect(retryOptions.requirePlanApproval).toBe(false);
    });
  });
  describe('executeFeature - error handling', () => {
    it('classifies and emits error event', async () => {
      const testError = new Error('Test error');
--- a/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx
+++ b/apps/ui/src/components/views/board-view/components/kanban-card/agent-info-panel.tsx
@@ -164,13 +164,17 @@ export const AgentInfoPanel = memo(function AgentInfoPanel({
      const currentTaskId = planSpec.currentTaskId;
      return planSpec.tasks.map((task: ParsedTask, index: number) => {
-        // If the feature is done (waiting_approval/verified), all tasks are completed
+        // When feature is finished (waiting_approval/verified), finalize task display:
-        // This is a defensive UI-side check: the server should have already finalized
+        // - in_progress tasks → completed (agent was working on them when it finished)
-        // task statuses, but stale data from before the fix could still show spinners
+        // - pending tasks stay pending (they were never started)
        // - completed tasks stay completed
        // This matches server-side behavior in feature-state-manager.ts
        if (isFeatureFinished) {
          const finalStatus =
            task.status === 'in_progress' || task.status === 'failed' ? 'completed' : task.status;
          return {
            content: task.description,
-            status: 'completed' as const,
+            status: (finalStatus || 'completed') as 'pending' | 'in_progress' | 'completed',
          };
        }