Fix agent output validation to prevent false verified status (#807)

* Changes from fix/cursor-fix

* feat: Enhance provider error messages with diagnostic context, address test failure, fix port change, move playwright tests to different port

* Update apps/ui/src/components/views/board-view/dialogs/add-feature-dialog.tsx

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* ci: Update test server port from 3008 to 3108 and add environment configuration

* fix: Correct typo in health endpoint URL and standardize port env vars

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
gsxdsm
2026-02-24 20:18:40 -08:00
committed by GitHub
parent 0330c70261
commit 51e9a23ba1
36 changed files with 1610 additions and 104 deletions

View File

@@ -52,6 +52,12 @@ HOST=0.0.0.0
# Port to run the server on
PORT=3008
# Port to run the server on for testing
TEST_SERVER_PORT=3108
# Port to run the UI on for testing
TEST_PORT=3107
# Data directory for sessions and metadata
DATA_DIR=./data

View File

@@ -389,9 +389,14 @@ export class CopilotProvider extends CliProvider {
case 'session.error': {
const errorEvent = sdkEvent as SdkSessionErrorEvent;
const enrichedError =
errorEvent.data.message ||
(errorEvent.data.code
? `Copilot agent error (code: ${errorEvent.data.code})`
: 'Copilot agent error');
return {
type: 'error',
error: errorEvent.data.message || 'Unknown error',
error: enrichedError,
};
}

View File

@@ -562,10 +562,14 @@ export class CursorProvider extends CliProvider {
const resultEvent = cursorEvent as CursorResultEvent;
if (resultEvent.is_error) {
const errorText = resultEvent.error || resultEvent.result || '';
const enrichedError =
errorText ||
`Cursor agent failed (duration: ${resultEvent.duration_ms}ms, subtype: ${resultEvent.subtype}, session: ${resultEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: resultEvent.session_id,
error: resultEvent.error || resultEvent.result || 'Unknown error',
error: enrichedError,
};
}

View File

@@ -381,10 +381,13 @@ export class GeminiProvider extends CliProvider {
const resultEvent = geminiEvent as GeminiResultEvent;
if (resultEvent.status === 'error') {
const enrichedError =
resultEvent.error ||
`Gemini agent failed (duration: ${resultEvent.stats?.duration_ms ?? 'unknown'}ms, session: ${resultEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: resultEvent.session_id,
error: resultEvent.error || 'Unknown error',
error: enrichedError,
};
}
@@ -401,10 +404,12 @@ export class GeminiProvider extends CliProvider {
case 'error': {
const errorEvent = geminiEvent as GeminiResultEvent;
const enrichedError =
errorEvent.error || `Gemini agent failed (session: ${errorEvent.session_id ?? 'none'})`;
return {
type: 'error',
session_id: errorEvent.session_id,
error: errorEvent.error || 'Unknown error',
error: enrichedError,
};
}

View File

@@ -296,8 +296,28 @@ export class AgentExecutor {
}
}
} else if (msg.type === 'error') {
throw new Error(AgentExecutor.sanitizeProviderError(msg.error));
} else if (msg.type === 'result' && msg.subtype === 'success') scheduleWrite();
const sanitized = AgentExecutor.sanitizeProviderError(msg.error);
logger.error(
`[execute] Feature ${featureId} received error from provider. ` +
`raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
);
throw new Error(sanitized);
} else if (msg.type === 'result') {
if (msg.subtype === 'success') {
scheduleWrite();
} else if (msg.subtype?.startsWith('error')) {
// Non-success result subtypes from the SDK (error_max_turns, error_during_execution, etc.)
logger.error(
`[execute] Feature ${featureId} ended with error subtype: ${msg.subtype}. ` +
`session_id=${msg.session_id ?? 'none'}`
);
throw new Error(`Agent execution ended with: ${msg.subtype}`);
} else {
logger.warn(
`[execute] Feature ${featureId} received unhandled result subtype: ${msg.subtype}`
);
}
}
}
} finally {
clearInterval(streamHeartbeat);
@@ -447,16 +467,28 @@ export class AgentExecutor {
});
}
} else if (msg.type === 'error') {
// Clean the error: strip ANSI codes and redundant "Error: " prefix
const cleanedError =
(msg.error || `Error during task ${task.id}`)
.replace(/\x1b\[[0-9;]*m/g, '')
.replace(/^Error:\s*/i, '')
.trim() || `Error during task ${task.id}`;
throw new Error(cleanedError);
} else if (msg.type === 'result' && msg.subtype === 'success') {
taskOutput += msg.result || '';
responseText += msg.result || '';
const fallback = `Error during task ${task.id}`;
const sanitized = AgentExecutor.sanitizeProviderError(msg.error || fallback);
logger.error(
`[executeTasksLoop] Feature ${featureId} task ${task.id} received error from provider. ` +
`raw="${msg.error}", sanitized="${sanitized}", session_id=${msg.session_id ?? 'none'}`
);
throw new Error(sanitized);
} else if (msg.type === 'result') {
if (msg.subtype === 'success') {
taskOutput += msg.result || '';
responseText += msg.result || '';
} else if (msg.subtype?.startsWith('error')) {
logger.error(
`[executeTasksLoop] Feature ${featureId} task ${task.id} ended with error subtype: ${msg.subtype}. ` +
`session_id=${msg.session_id ?? 'none'}`
);
throw new Error(`Agent execution ended with: ${msg.subtype}`);
} else {
logger.warn(
`[executeTasksLoop] Feature ${featureId} task ${task.id} received unhandled result subtype: ${msg.subtype}`
);
}
}
}
if (!taskCompleteDetected)

View File

@@ -60,6 +60,12 @@ import type {
const logger = createLogger('ExecutionService');
/** Marker written by agent-executor for each tool invocation. */
const TOOL_USE_MARKER = '🔧 Tool:';
/** Minimum trimmed output length to consider agent work meaningful. */
const MIN_MEANINGFUL_OUTPUT_LENGTH = 200;
export class ExecutionService {
constructor(
private eventBus: TypedEventBus,
@@ -409,7 +415,41 @@ Please continue from where you left off and complete all remaining tasks. Use th
}
}
const finalStatus = feature.skipTests ? 'waiting_approval' : 'verified';
// Read agent output before determining final status.
// CLI-based providers (Cursor, Codex, etc.) may exit quickly without doing
// meaningful work. Check output to avoid prematurely marking as 'verified'.
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
let agentOutput = '';
try {
agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
} catch {
/* */
}
// Determine if the agent did meaningful work by checking for tool usage
// indicators in the output. The agent executor writes "🔧 Tool:" markers
// each time a tool is invoked. No tool usage suggests the CLI exited
// without performing implementation work.
const hasToolUsage = agentOutput.includes(TOOL_USE_MARKER);
const isOutputTooShort = agentOutput.trim().length < MIN_MEANINGFUL_OUTPUT_LENGTH;
const agentDidWork = hasToolUsage && !isOutputTooShort;
let finalStatus: 'verified' | 'waiting_approval';
if (feature.skipTests) {
finalStatus = 'waiting_approval';
} else if (!agentDidWork) {
// Agent didn't produce meaningful output (e.g., CLI exited quickly).
// Route to waiting_approval so the user can review and re-run.
finalStatus = 'waiting_approval';
logger.warn(
`[executeFeature] Feature ${featureId}: agent produced insufficient output ` +
`(${agentOutput.trim().length}/${MIN_MEANINGFUL_OUTPUT_LENGTH} chars, toolUsage=${hasToolUsage}). ` +
`Setting status to waiting_approval instead of verified.`
);
} else {
finalStatus = 'verified';
}
await this.updateFeatureStatusFn(projectPath, featureId, finalStatus);
this.recordSuccessFn();
@@ -421,13 +461,6 @@ Please continue from where you left off and complete all remaining tasks. Use th
const hasIncompleteTasks = totalTasks > 0 && completedTasks < totalTasks;
try {
const outputPath = path.join(getFeatureDir(projectPath, featureId), 'agent-output.md');
let agentOutput = '';
try {
agentOutput = (await secureFs.readFile(outputPath, 'utf-8')) as string;
} catch {
/* */
}
if (agentOutput) {
const summary = extractSummary(agentOutput);
if (summary) await this.saveFeatureSummaryFn(projectPath, featureId, summary);

View File

@@ -0,0 +1,20 @@
import { describe, it, expect } from 'vitest';
import { normalizeThinkingLevelForModel } from '@automaker/types';
describe('normalizeThinkingLevelForModel', () => {
it('preserves explicitly selected none for Opus models', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'none')).toBe('none');
});
it('falls back to none when Opus receives an unsupported manual thinking level', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'medium')).toBe('none');
});
it('keeps adaptive for Opus when adaptive is selected', () => {
expect(normalizeThinkingLevelForModel('claude-opus', 'adaptive')).toBe('adaptive');
});
it('preserves supported manual levels for non-Opus models', () => {
expect(normalizeThinkingLevelForModel('claude-sonnet', 'high')).toBe('high');
});
});

View File

@@ -397,6 +397,45 @@ describe('copilot-provider.ts', () => {
});
});
it('should use error code in fallback when session.error message is empty', () => {
const event = {
type: 'session.error',
data: { message: '', code: 'RATE_LIMIT_EXCEEDED' },
};
const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toContain('RATE_LIMIT_EXCEEDED');
expect(result!.error).not.toBe('Unknown error');
});
it('should return generic "Copilot agent error" fallback when both message and code are empty', () => {
const event = {
type: 'session.error',
data: { message: '', code: '' },
};
const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toBe('Copilot agent error');
// Must NOT be the old opaque 'Unknown error'
expect(result!.error).not.toBe('Unknown error');
});
it('should return generic "Copilot agent error" fallback when data has no code field', () => {
const event = {
type: 'session.error',
data: { message: '' },
};
const result = provider.normalizeEvent(event);
expect(result).not.toBeNull();
expect(result!.type).toBe('error');
expect(result!.error).toBe('Copilot agent error');
});
it('should return null for unknown event types', () => {
const event = { type: 'unknown.event' };

View File

@@ -1,4 +1,4 @@
import { describe, it, expect } from 'vitest';
import { describe, it, expect, beforeEach } from 'vitest';
import { CursorProvider } from '@/providers/cursor-provider.js';
describe('cursor-provider.ts', () => {
@@ -36,4 +36,122 @@ describe('cursor-provider.ts', () => {
expect(args).not.toContain('--resume');
});
});
describe('normalizeEvent - result error handling', () => {
let provider: CursorProvider;
beforeEach(() => {
provider = Object.create(CursorProvider.prototype) as CursorProvider;
});
it('returns error message from resultEvent.error when is_error=true', () => {
const event = {
type: 'result',
is_error: true,
error: 'Rate limit exceeded',
result: '',
subtype: 'error',
duration_ms: 3000,
session_id: 'sess-123',
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('error');
expect(msg!.error).toBe('Rate limit exceeded');
});
it('falls back to resultEvent.result when error field is empty and is_error=true', () => {
const event = {
type: 'result',
is_error: true,
error: '',
result: 'Process terminated unexpectedly',
subtype: 'error',
duration_ms: 5000,
session_id: 'sess-456',
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('error');
expect(msg!.error).toBe('Process terminated unexpectedly');
});
it('builds diagnostic fallback when both error and result are empty and is_error=true', () => {
const event = {
type: 'result',
is_error: true,
error: '',
result: '',
subtype: 'error',
duration_ms: 5000,
session_id: 'sess-789',
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('error');
// Should contain diagnostic info rather than 'Unknown error'
expect(msg!.error).toContain('5000ms');
expect(msg!.error).toContain('sess-789');
expect(msg!.error).not.toBe('Unknown error');
});
it('preserves session_id in error message', () => {
const event = {
type: 'result',
is_error: true,
error: 'Timeout occurred',
result: '',
subtype: 'error',
duration_ms: 30000,
session_id: 'my-session-id',
};
const msg = provider.normalizeEvent(event);
expect(msg!.session_id).toBe('my-session-id');
});
it('uses "none" when session_id is missing from diagnostic fallback', () => {
const event = {
type: 'result',
is_error: true,
error: '',
result: '',
subtype: 'error',
duration_ms: 5000,
// session_id intentionally omitted
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('error');
expect(msg!.error).toContain('none');
expect(msg!.error).not.toContain('undefined');
});
it('returns success result when is_error=false', () => {
const event = {
type: 'result',
is_error: false,
error: '',
result: 'Completed successfully',
subtype: 'success',
duration_ms: 2000,
session_id: 'sess-ok',
};
const msg = provider.normalizeEvent(event);
expect(msg).not.toBeNull();
expect(msg!.type).toBe('result');
expect(msg!.subtype).toBe('success');
});
});
});

View File

@@ -1,5 +1,6 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { GeminiProvider } from '@/providers/gemini-provider.js';
import type { ProviderMessage } from '@automaker/types';
describe('gemini-provider.ts', () => {
let provider: GeminiProvider;
@@ -116,4 +117,140 @@ describe('gemini-provider.ts', () => {
expect(args[modelIndex + 1]).toBe('gemini-2.5-pro');
});
});
describe('normalizeEvent - error handling', () => {
it('returns error from result event when status=error and error field is set', () => {
const event = {
type: 'result',
status: 'error',
error: 'Model overloaded',
session_id: 'sess-gemini-1',
stats: { duration_ms: 4000, total_tokens: 0 },
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
expect(msg.error).toBe('Model overloaded');
expect(msg.session_id).toBe('sess-gemini-1');
});
it('builds diagnostic fallback when result event has status=error but empty error field', () => {
const event = {
type: 'result',
status: 'error',
error: '',
session_id: 'sess-gemini-2',
stats: { duration_ms: 7500, total_tokens: 0 },
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
// Diagnostic info should be present instead of 'Unknown error'
expect(msg.error).toContain('7500ms');
expect(msg.error).toContain('sess-gemini-2');
expect(msg.error).not.toBe('Unknown error');
});
it('builds fallback with "unknown" duration when stats are missing', () => {
const event = {
type: 'result',
status: 'error',
error: '',
session_id: 'sess-gemini-nostats',
// no stats field
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
expect(msg.error).toContain('unknown');
});
it('returns error from standalone error event with error field set', () => {
const event = {
type: 'error',
error: 'API key invalid',
session_id: 'sess-gemini-3',
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
expect(msg.error).toBe('API key invalid');
});
it('builds diagnostic fallback when standalone error event has empty error field', () => {
const event = {
type: 'error',
error: '',
session_id: 'sess-gemini-empty',
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
// Should include session_id, not just 'Unknown error'
expect(msg.error).toContain('sess-gemini-empty');
expect(msg.error).not.toBe('Unknown error');
});
it('builds fallback mentioning "none" when session_id is missing from error event', () => {
const event = {
type: 'error',
error: '',
// no session_id
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('error');
expect(msg.error).toContain('none');
});
it('uses consistent "Gemini agent failed" label for both result and error event fallbacks', () => {
const resultEvent = {
type: 'result',
status: 'error',
error: '',
session_id: 'sess-r',
stats: { duration_ms: 1000 },
};
const errorEvent = {
type: 'error',
error: '',
session_id: 'sess-e',
};
const resultMsg = provider.normalizeEvent(resultEvent) as ProviderMessage;
const errorMsg = provider.normalizeEvent(errorEvent) as ProviderMessage;
// Both fallback messages should use the same "Gemini agent failed" prefix
expect(resultMsg.error).toContain('Gemini agent failed');
expect(errorMsg.error).toContain('Gemini agent failed');
});
it('returns success result when result event has status=success', () => {
const event = {
type: 'result',
status: 'success',
error: '',
session_id: 'sess-gemini-ok',
stats: { duration_ms: 1200, total_tokens: 500 },
};
const msg = provider.normalizeEvent(event) as ProviderMessage;
expect(msg).not.toBeNull();
expect(msg.type).toBe('result');
expect(msg.subtype).toBe('success');
});
});
});

View File

@@ -685,6 +685,309 @@ describe('AgentExecutor', () => {
await expect(executor.execute(options, callbacks)).rejects.toThrow('API rate limit exceeded');
});
it('should throw "Unknown error" when provider stream yields error with empty message', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'error',
error: '',
session_id: 'sess-123',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow('Unknown error');
});
it('should throw with sanitized error when provider yields ANSI-decorated error', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'error',
// ANSI color codes + "Error: " prefix that should be stripped
error: '\x1b[31mError: Connection refused\x1b[0m',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
// Should strip ANSI codes and "Error: " prefix
await expect(executor.execute(options, callbacks)).rejects.toThrow('Connection refused');
});
it('should throw when result subtype is error_max_turns', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'assistant',
message: {
content: [{ type: 'text', text: 'Working on it...' }],
},
};
yield {
type: 'result',
subtype: 'error_max_turns',
session_id: 'sess-456',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow(
'Agent execution ended with: error_max_turns'
);
});
it('should throw when result subtype is error_during_execution', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'result',
subtype: 'error_during_execution',
session_id: 'sess-789',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow(
'Agent execution ended with: error_during_execution'
);
});
it('should throw when result subtype is error_max_structured_output_retries', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'result',
subtype: 'error_max_structured_output_retries',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow(
'Agent execution ended with: error_max_structured_output_retries'
);
});
it('should throw when result subtype is error_max_budget_usd', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'result',
subtype: 'error_max_budget_usd',
session_id: 'sess-budget',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
await expect(executor.execute(options, callbacks)).rejects.toThrow(
'Agent execution ended with: error_max_budget_usd'
);
});
it('should NOT throw when result subtype is success', async () => {
const executor = new AgentExecutor(
mockEventBus,
mockFeatureStateManager,
mockPlanApprovalService,
mockSettingsService
);
const mockProvider = {
getName: () => 'mock',
executeQuery: vi.fn().mockImplementation(function* () {
yield {
type: 'assistant',
message: {
content: [{ type: 'text', text: 'Done!' }],
},
};
yield {
type: 'result',
subtype: 'success',
session_id: 'sess-ok',
};
}),
} as unknown as BaseProvider;
const options: AgentExecutionOptions = {
workDir: '/test',
featureId: 'test-feature',
prompt: 'Test prompt',
projectPath: '/project',
abortController: new AbortController(),
provider: mockProvider,
effectiveBareModel: 'claude-sonnet-4-6',
planningMode: 'skip',
};
const callbacks = {
waitForApproval: vi.fn().mockResolvedValue({ approved: true }),
saveFeatureSummary: vi.fn(),
updateFeatureSummary: vi.fn(),
buildTaskPrompt: vi.fn().mockReturnValue('task prompt'),
};
// Should resolve without throwing
const result = await executor.execute(options, callbacks);
expect(result.aborted).toBe(false);
expect(result.responseText).toContain('Done!');
});
it('should throw error when authentication fails in response', async () => {
const executor = new AgentExecutor(
mockEventBus,

View File

@@ -0,0 +1,192 @@
import { describe, it, expect } from 'vitest';
/**
* Contract tests verifying the tool marker format used by agent-executor
* (which writes agent output) and execution-service (which reads it to
* determine if the agent did meaningful work).
*
* The agent-executor writes: `\n🔧 Tool: ${block.name}\n`
* The execution-service checks: `agentOutput.includes('🔧 Tool:')`
*
* These tests ensure the marker format contract stays consistent and
* document the exact detection logic used for status determination.
*/
// The exact marker prefix that execution-service searches for
const TOOL_MARKER = '🔧 Tool:';
// Minimum output length threshold for "meaningful work"
const MIN_OUTPUT_LENGTH = 200;
/**
* Simulates the agent-executor's tool_use output format.
* See: agent-executor.ts line ~293
*/
function formatToolUseBlock(toolName: string, input?: Record<string, unknown>): string {
let output = `\n${TOOL_MARKER} ${toolName}\n`;
if (input) output += `Input: ${JSON.stringify(input, null, 2)}\n`;
return output;
}
/**
* Simulates the execution-service's output validation logic.
* See: execution-service.ts lines ~427-429
*/
function validateAgentOutput(
agentOutput: string,
skipTests: boolean
): 'verified' | 'waiting_approval' {
const hasToolUsage = agentOutput.includes(TOOL_MARKER);
const hasMinimalOutput = agentOutput.trim().length < MIN_OUTPUT_LENGTH;
const agentDidWork = hasToolUsage && !hasMinimalOutput;
if (skipTests) return 'waiting_approval';
if (!agentDidWork) return 'waiting_approval';
return 'verified';
}
describe('Agent Output Validation - Contract Tests', () => {
describe('tool marker format contract', () => {
it('agent-executor tool format contains the expected marker', () => {
const toolOutput = formatToolUseBlock('Read', { file_path: '/src/index.ts' });
expect(toolOutput).toContain(TOOL_MARKER);
});
it('agent-executor tool format includes tool name after marker', () => {
const toolOutput = formatToolUseBlock('Edit', {
file_path: '/src/app.ts',
old_string: 'foo',
new_string: 'bar',
});
expect(toolOutput).toContain('🔧 Tool: Edit');
});
it('agent-executor tool format includes JSON input', () => {
const input = { file_path: '/src/index.ts' };
const toolOutput = formatToolUseBlock('Read', input);
expect(toolOutput).toContain('Input: ');
expect(toolOutput).toContain('"file_path": "/src/index.ts"');
});
it('agent-executor tool format works without input', () => {
const toolOutput = formatToolUseBlock('Bash');
expect(toolOutput).toContain('🔧 Tool: Bash');
expect(toolOutput).not.toContain('Input:');
});
it('marker includes colon and space to avoid false positives', () => {
// Ensure the marker is specific enough to avoid matching other emoji patterns
expect(TOOL_MARKER).toBe('🔧 Tool:');
expect(TOOL_MARKER).toContain(':');
});
});
describe('output validation logic', () => {
it('verified: tool usage + sufficient output', () => {
const output =
'Starting implementation of the new feature...\n' +
formatToolUseBlock('Read', { file_path: '/src/index.ts' }) +
'I can see the existing code. Let me make the needed changes.\n' +
formatToolUseBlock('Edit', { file_path: '/src/index.ts' }) +
'Changes complete. The implementation adds new validation logic and tests.';
expect(output.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(output, false)).toBe('verified');
});
it('waiting_approval: no tool markers regardless of length', () => {
const longOutput = 'I analyzed the codebase. '.repeat(50);
expect(longOutput.trim().length).toBeGreaterThan(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(longOutput, false)).toBe('waiting_approval');
});
it('waiting_approval: tool markers but insufficient length', () => {
const shortOutput = formatToolUseBlock('Read', { file_path: '/src/a.ts' });
expect(shortOutput.trim().length).toBeLessThan(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(shortOutput, false)).toBe('waiting_approval');
});
it('waiting_approval: empty output', () => {
expect(validateAgentOutput('', false)).toBe('waiting_approval');
});
it('waiting_approval: skipTests always overrides', () => {
const goodOutput =
'Starting...\n' +
formatToolUseBlock('Read', { file_path: '/src/index.ts' }) +
formatToolUseBlock('Edit', { file_path: '/src/index.ts' }) +
'Done implementing. '.repeat(15);
expect(goodOutput.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(goodOutput, true)).toBe('waiting_approval');
});
it('boundary: exactly MIN_OUTPUT_LENGTH chars with tool is verified', () => {
const tool = formatToolUseBlock('Read');
const padding = 'x'.repeat(MIN_OUTPUT_LENGTH - tool.trim().length);
const output = tool + padding;
expect(output.trim().length).toBeGreaterThanOrEqual(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(output, false)).toBe('verified');
});
it('boundary: MIN_OUTPUT_LENGTH - 1 chars with tool is waiting_approval', () => {
const marker = `${TOOL_MARKER} Read\n`;
const padding = 'x'.repeat(MIN_OUTPUT_LENGTH - 1 - marker.length);
const output = marker + padding;
expect(output.trim().length).toBe(MIN_OUTPUT_LENGTH - 1);
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
});
});
describe('realistic provider scenarios', () => {
it('Claude SDK agent with multiple tools → verified', () => {
let output = "I'll implement the feature.\n\n";
output += formatToolUseBlock('Read', { file_path: '/src/components/App.tsx' });
output += 'I see the component. Let me update it.\n\n';
output += formatToolUseBlock('Edit', {
file_path: '/src/components/App.tsx',
old_string: 'const App = () => {',
new_string: 'const App: React.FC = () => {',
});
output += 'Done. The component is now typed correctly.\n';
expect(validateAgentOutput(output, false)).toBe('verified');
});
it('Cursor CLI quick exit (no tools) → waiting_approval', () => {
const output = 'Task received. Processing...\nResult: completed successfully.';
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
});
it('Codex CLI with brief acknowledgment → waiting_approval', () => {
const output = 'Understood the task. Starting implementation.\nDone.';
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
});
it('Agent that only reads but makes no edits (single Read tool, short output) → waiting_approval', () => {
const output = formatToolUseBlock('Read', { file_path: '/src/index.ts' }) + 'File read.';
expect(output.trim().length).toBeLessThan(MIN_OUTPUT_LENGTH);
expect(validateAgentOutput(output, false)).toBe('waiting_approval');
});
it('Agent with extensive tool usage and explanation → verified', () => {
let output = 'Analyzing the codebase for the authentication feature.\n\n';
for (let i = 0; i < 5; i++) {
output += formatToolUseBlock('Read', { file_path: `/src/auth/handler${i}.ts` });
output += `Found handler ${i}. `;
}
output += formatToolUseBlock('Edit', {
file_path: '/src/auth/handler0.ts',
old_string: 'function login() {}',
new_string: 'async function login(creds: Credentials) { ... }',
});
output += 'Implementation complete with all authentication changes applied.\n';
expect(validateAgentOutput(output, false)).toBe('verified');
});
});
});

View File

@@ -211,7 +211,14 @@ describe('execution-service.ts', () => {
});
// Default mocks for secureFs
vi.mocked(secureFs.readFile).mockResolvedValue('Agent output content');
// Include tool usage markers to simulate meaningful agent output.
// The execution service checks for '🔧 Tool:' markers and minimum
// output length to determine if the agent did real work.
vi.mocked(secureFs.readFile).mockResolvedValue(
'Starting implementation...\n\n🔧 Tool: Read\nInput: {"file_path": "/src/index.ts"}\n\n' +
'🔧 Tool: Edit\nInput: {"file_path": "/src/index.ts", "old_string": "foo", "new_string": "bar"}\n\n' +
'Implementation complete. Updated the code as requested.'
);
vi.mocked(secureFs.access).mockResolvedValue(undefined);
// Re-setup platform mocks
@@ -1433,4 +1440,439 @@ describe('execution-service.ts', () => {
);
});
});
describe('executeFeature - agent output validation', () => {
// Helper to generate realistic agent output with tool markers
const makeAgentOutput = (toolCount: number, extraText = ''): string => {
let output = 'Starting implementation...\n\n';
for (let i = 0; i < toolCount; i++) {
output += `🔧 Tool: Edit\nInput: {"file_path": "/src/file${i}.ts", "old_string": "old${i}", "new_string": "new${i}"}\n\n`;
}
output += `Implementation complete. ${extraText}`;
return output;
};
const createServiceWithMocks = () => {
return new ExecutionService(
mockEventBus,
mockConcurrencyManager,
mockWorktreeResolver,
mockSettingsService,
mockRunAgentFn,
mockExecutePipelineFn,
mockUpdateFeatureStatusFn,
mockLoadFeatureFn,
mockGetPlanningPromptPrefixFn,
mockSaveFeatureSummaryFn,
mockRecordLearningsFn,
mockContextExistsFn,
mockResumeFeatureFn,
mockTrackFailureFn,
mockSignalPauseFn,
mockRecordSuccessFn,
mockSaveExecutionStateFn,
mockLoadContextFilesFn
);
};
it('sets verified when agent output has tool usage and sufficient length', async () => {
const output = makeAgentOutput(3, 'Updated authentication module with new login flow.');
vi.mocked(secureFs.readFile).mockResolvedValue(output);
await service.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('sets waiting_approval when agent output is empty', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets waiting_approval when agent output has no tool usage markers', async () => {
// Long output but no tool markers - agent printed text but didn't use tools
const longOutputNoTools = 'I analyzed the codebase and found several issues. '.repeat(20);
vi.mocked(secureFs.readFile).mockResolvedValue(longOutputNoTools);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets waiting_approval when agent output has tool markers but is too short', async () => {
// Has a tool marker but total output is under 200 chars
const shortWithTool = '🔧 Tool: Read\nInput: {"file_path": "/src/index.ts"}\nDone.';
expect(shortWithTool.trim().length).toBeLessThan(200);
vi.mocked(secureFs.readFile).mockResolvedValue(shortWithTool);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets waiting_approval when agent output file is missing (ENOENT)', async () => {
vi.mocked(secureFs.readFile).mockRejectedValue(new Error('ENOENT'));
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets waiting_approval when agent output is only whitespace', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue(' \n\n\t \n ');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('sets verified when output is exactly at the 200 char threshold with tool usage', async () => {
// Create output that's exactly 200 chars trimmed with tool markers
const toolMarker = '🔧 Tool: Edit\nInput: {"file_path": "/src/index.ts"}\n';
const padding = 'x'.repeat(200 - toolMarker.length);
const output = toolMarker + padding;
expect(output.trim().length).toBeGreaterThanOrEqual(200);
vi.mocked(secureFs.readFile).mockResolvedValue(output);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('sets waiting_approval when output is 199 chars with tool usage (below threshold)', async () => {
const toolMarker = '🔧 Tool: Read\n';
const padding = 'x'.repeat(199 - toolMarker.length);
const output = toolMarker + padding;
expect(output.trim().length).toBe(199);
vi.mocked(secureFs.readFile).mockResolvedValue(output);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('skipTests always takes priority over output validation', async () => {
// Meaningful output with tool usage - would normally be 'verified'
const output = makeAgentOutput(5, 'All changes applied successfully.');
vi.mocked(secureFs.readFile).mockResolvedValue(output);
mockLoadFeatureFn = vi.fn().mockResolvedValue({ ...testFeature, skipTests: true });
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// skipTests=true always means waiting_approval regardless of output quality
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('skipTests with empty output still results in waiting_approval', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
mockLoadFeatureFn = vi.fn().mockResolvedValue({ ...testFeature, skipTests: true });
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('still records success even when output validation fails', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// recordSuccess should still be called - the agent ran without errors
expect(mockRecordSuccessFn).toHaveBeenCalled();
});
it('still extracts summary when output has content but no tool markers', async () => {
const outputNoTools = 'A '.repeat(150); // > 200 chars but no tool markers
vi.mocked(secureFs.readFile).mockResolvedValue(outputNoTools);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Summary extraction still runs even though status is waiting_approval
expect(extractSummary).toHaveBeenCalledWith(outputNoTools);
expect(mockSaveFeatureSummaryFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'Test summary'
);
});
it('emits feature_complete with passes=true even when output validation routes to waiting_approval', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, true);
// The agent ran without error - it's still a "pass" from the execution perspective
expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
'auto_mode_feature_complete',
expect.objectContaining({ passes: true })
);
});
it('handles realistic Cursor CLI output that exits quickly', async () => {
// Simulates a Cursor CLI that prints a brief message and exits
const cursorQuickExit = 'Task received. Processing...\nResult: completed successfully.';
expect(cursorQuickExit.includes('🔧 Tool:')).toBe(false);
vi.mocked(secureFs.readFile).mockResolvedValue(cursorQuickExit);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// No tool usage = waiting_approval
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('handles realistic Claude SDK output with multiple tool uses', async () => {
// Simulates a Claude SDK agent that does real work
const claudeOutput =
"I'll implement the requested feature.\n\n" +
'🔧 Tool: Read\nInput: {"file_path": "/src/components/App.tsx"}\n\n' +
'I can see the existing component structure. Let me modify it.\n\n' +
'🔧 Tool: Edit\nInput: {"file_path": "/src/components/App.tsx", "old_string": "const App = () => {", "new_string": "const App: React.FC = () => {"}\n\n' +
'🔧 Tool: Write\nInput: {"file_path": "/src/components/NewFeature.tsx"}\n\n' +
"I've created the new component and updated the existing one. The feature is now implemented with proper TypeScript types.";
vi.mocked(secureFs.readFile).mockResolvedValue(claudeOutput);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Real work = verified
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('reads agent output from the correct path with utf-8 encoding', async () => {
const output = makeAgentOutput(2, 'Done with changes.');
vi.mocked(secureFs.readFile).mockResolvedValue(output);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Verify readFile was called with the correct path derived from getFeatureDir
expect(secureFs.readFile).toHaveBeenCalledWith(
'/test/project/.automaker/features/feature-1/agent-output.md',
'utf-8'
);
});
it('completion message includes auto-verified when status is verified', async () => {
const output = makeAgentOutput(3, 'All changes applied.');
vi.mocked(secureFs.readFile).mockResolvedValue(output);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, true);
expect(mockEventBus.emitAutoModeEvent).toHaveBeenCalledWith(
'auto_mode_feature_complete',
expect.objectContaining({
message: expect.stringContaining('auto-verified'),
})
);
});
it('completion message does NOT include auto-verified when status is waiting_approval', async () => {
// Empty output → waiting_approval
vi.mocked(secureFs.readFile).mockResolvedValue('');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1', false, true);
const completeCall = vi
.mocked(mockEventBus.emitAutoModeEvent)
.mock.calls.find((call) => call[0] === 'auto_mode_feature_complete');
expect(completeCall).toBeDefined();
expect((completeCall![1] as { message: string }).message).not.toContain('auto-verified');
});
it('uses same agentOutput for both status determination and summary extraction', async () => {
// Specific output that is long enough with tool markers (verified path)
// AND has content for summary extraction
const specificOutput =
'🔧 Tool: Read\nReading file...\n🔧 Tool: Edit\nEditing file...\n' +
'The implementation is complete. Here is a detailed description of what was done. '.repeat(
3
);
vi.mocked(secureFs.readFile).mockResolvedValue(specificOutput);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Status should be verified (has tools + long enough)
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
// extractSummary should receive the exact same output
expect(extractSummary).toHaveBeenCalledWith(specificOutput);
// recordLearnings should also receive the same output
expect(mockRecordLearningsFn).toHaveBeenCalledWith(
'/test/project',
testFeature,
specificOutput
);
});
it('does not call recordMemoryUsage when output is empty and memoryFiles is empty', async () => {
vi.mocked(secureFs.readFile).mockResolvedValue('');
const { recordMemoryUsage } = await import('@automaker/utils');
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// With empty output and empty memoryFiles, recordMemoryUsage should not be called
expect(recordMemoryUsage).not.toHaveBeenCalled();
});
it('handles output with special unicode characters correctly', async () => {
// Output with various unicode but includes tool markers
const unicodeOutput =
'🔧 Tool: Read\n' +
'🔧 Tool: Edit\n' +
'Añadiendo función de búsqueda con caracteres especiales: ñ, ü, ö, é, 日本語テスト. ' +
'Die Änderungen wurden erfolgreich implementiert. '.repeat(3);
vi.mocked(secureFs.readFile).mockResolvedValue(unicodeOutput);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Should still detect tool markers and sufficient length
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'verified'
);
});
it('treats output with only newlines and spaces around tool marker as insufficient', async () => {
// Has tool marker but surrounded by whitespace, total trimmed < 200
const sparseOutput = '\n\n 🔧 Tool: Read \n\n';
expect(sparseOutput.trim().length).toBeLessThan(200);
vi.mocked(secureFs.readFile).mockResolvedValue(sparseOutput);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('detects tool marker substring correctly (partial match like "🔧 Tools:" does not count)', async () => {
// Output with a similar but not exact marker - "🔧 Tools:" instead of "🔧 Tool:"
const wrongMarker = '🔧 Tools: Read\n🔧 Tools: Edit\n' + 'Implementation done. '.repeat(20);
expect(wrongMarker.includes('🔧 Tool:')).toBe(false);
vi.mocked(secureFs.readFile).mockResolvedValue(wrongMarker);
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// "🔧 Tools:" is not the same as "🔧 Tool:" - should be waiting_approval
expect(mockUpdateFeatureStatusFn).toHaveBeenCalledWith(
'/test/project',
'feature-1',
'waiting_approval'
);
});
it('pipeline merge_conflict status short-circuits before output validation', async () => {
// Set up pipeline that results in merge_conflict
vi.mocked(pipelineService.getPipelineConfig).mockResolvedValue({
version: 1,
steps: [{ id: 'step-1', name: 'Step 1', order: 1, instructions: 'Do step 1' }] as any,
});
// After pipeline, loadFeature returns merge_conflict status
let loadCallCount = 0;
mockLoadFeatureFn = vi.fn().mockImplementation(() => {
loadCallCount++;
if (loadCallCount === 1) return testFeature; // initial load
// All subsequent loads (task check + pipeline refresh) return merge_conflict
return { ...testFeature, status: 'merge_conflict' };
});
const svc = createServiceWithMocks();
await svc.executeFeature('/test/project', 'feature-1');
// Should NOT have called updateFeatureStatusFn with 'verified' or 'waiting_approval'
// because pipeline merge_conflict short-circuits the method
const statusCalls = vi
.mocked(mockUpdateFeatureStatusFn)
.mock.calls.filter((call) => call[2] === 'verified' || call[2] === 'waiting_approval');
// The only non-in_progress status call should be absent since merge_conflict returns early
expect(statusCalls.length).toBe(0);
});
});
});